10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52241Shuah * Common Development and Distribution License (the "License").
62241Shuah * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*11991SChristopher.Baumbauer@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #include <sys/types.h>
270Sstevel@tonic-gate #include <sys/systm.h>
280Sstevel@tonic-gate #include <sys/ddi.h>
290Sstevel@tonic-gate #include <sys/sysmacros.h>
300Sstevel@tonic-gate #include <sys/archsystm.h>
310Sstevel@tonic-gate #include <sys/vmsystm.h>
320Sstevel@tonic-gate #include <sys/machparam.h>
330Sstevel@tonic-gate #include <sys/machsystm.h>
340Sstevel@tonic-gate #include <sys/machthread.h>
350Sstevel@tonic-gate #include <sys/cpu.h>
360Sstevel@tonic-gate #include <sys/cmp.h>
370Sstevel@tonic-gate #include <sys/elf_SPARC.h>
380Sstevel@tonic-gate #include <vm/vm_dep.h>
390Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
400Sstevel@tonic-gate #include <vm/seg_kpm.h>
410Sstevel@tonic-gate #include <sys/cpuvar.h>
420Sstevel@tonic-gate #include <sys/cheetahregs.h>
430Sstevel@tonic-gate #include <sys/us3_module.h>
440Sstevel@tonic-gate #include <sys/async.h>
450Sstevel@tonic-gate #include <sys/cmn_err.h>
460Sstevel@tonic-gate #include <sys/debug.h>
470Sstevel@tonic-gate #include <sys/dditypes.h>
480Sstevel@tonic-gate #include <sys/prom_debug.h>
490Sstevel@tonic-gate #include <sys/prom_plat.h>
500Sstevel@tonic-gate #include <sys/cpu_module.h>
510Sstevel@tonic-gate #include <sys/sysmacros.h>
520Sstevel@tonic-gate #include <sys/intreg.h>
530Sstevel@tonic-gate #include <sys/clock.h>
540Sstevel@tonic-gate #include <sys/platform_module.h>
550Sstevel@tonic-gate #include <sys/machtrap.h>
560Sstevel@tonic-gate #include <sys/ontrap.h>
570Sstevel@tonic-gate #include <sys/panic.h>
580Sstevel@tonic-gate #include <sys/memlist.h>
590Sstevel@tonic-gate #include <sys/bootconf.h>
600Sstevel@tonic-gate #include <sys/ivintr.h>
610Sstevel@tonic-gate #include <sys/atomic.h>
620Sstevel@tonic-gate #include <sys/taskq.h>
630Sstevel@tonic-gate #include <sys/note.h>
640Sstevel@tonic-gate #include <sys/ndifm.h>
650Sstevel@tonic-gate #include <sys/ddifm.h>
660Sstevel@tonic-gate #include <sys/fm/protocol.h>
670Sstevel@tonic-gate #include <sys/fm/util.h>
680Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-III.h>
690Sstevel@tonic-gate #include <sys/fpras_impl.h>
700Sstevel@tonic-gate #include <sys/dtrace.h>
710Sstevel@tonic-gate #include <sys/watchpoint.h>
720Sstevel@tonic-gate #include <sys/plat_ecc_unum.h>
730Sstevel@tonic-gate #include <sys/cyclic.h>
740Sstevel@tonic-gate #include <sys/errorq.h>
750Sstevel@tonic-gate #include <sys/errclassify.h>
763434Sesaxe #include <sys/pghw.h>
7711066Srafael.vanoni@sun.com #include <sys/clock_impl.h>
780Sstevel@tonic-gate
790Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
800Sstevel@tonic-gate #include <sys/xc_impl.h>
810Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */
820Sstevel@tonic-gate
836330Sjc25722 ch_cpu_logout_t clop_before_flush;
846330Sjc25722 ch_cpu_logout_t clop_after_flush;
856330Sjc25722 uint_t flush_retries_done = 0;
860Sstevel@tonic-gate /*
870Sstevel@tonic-gate * Note that 'Cheetah PRM' refers to:
880Sstevel@tonic-gate * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
890Sstevel@tonic-gate */
900Sstevel@tonic-gate
910Sstevel@tonic-gate /*
920Sstevel@tonic-gate * Per CPU pointers to physical address of TL>0 logout data areas.
930Sstevel@tonic-gate * These pointers have to be in the kernel nucleus to avoid MMU
940Sstevel@tonic-gate * misses.
950Sstevel@tonic-gate */
960Sstevel@tonic-gate uint64_t ch_err_tl1_paddrs[NCPU];
970Sstevel@tonic-gate
980Sstevel@tonic-gate /*
990Sstevel@tonic-gate * One statically allocated structure to use during startup/DR
1000Sstevel@tonic-gate * to prevent unnecessary panics.
1010Sstevel@tonic-gate */
1020Sstevel@tonic-gate ch_err_tl1_data_t ch_err_tl1_data;
1030Sstevel@tonic-gate
1040Sstevel@tonic-gate /*
1050Sstevel@tonic-gate * Per CPU pending error at TL>0, used by level15 softint handler
1060Sstevel@tonic-gate */
1070Sstevel@tonic-gate uchar_t ch_err_tl1_pending[NCPU];
1080Sstevel@tonic-gate
1090Sstevel@tonic-gate /*
1100Sstevel@tonic-gate * For deferred CE re-enable after trap.
1110Sstevel@tonic-gate */
1120Sstevel@tonic-gate taskq_t *ch_check_ce_tq;
1130Sstevel@tonic-gate
1140Sstevel@tonic-gate /*
1150Sstevel@tonic-gate * Internal functions.
1160Sstevel@tonic-gate */
1170Sstevel@tonic-gate static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
1180Sstevel@tonic-gate static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
1190Sstevel@tonic-gate static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
1200Sstevel@tonic-gate ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
1212436Smb91622 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
1222436Smb91622 uint64_t t_afsr_bit);
1230Sstevel@tonic-gate static int clear_ecc(struct async_flt *ecc);
1240Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
1250Sstevel@tonic-gate static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
1260Sstevel@tonic-gate #endif
1276330Sjc25722 int cpu_ecache_set_size(struct cpu *cp);
1280Sstevel@tonic-gate static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
1296330Sjc25722 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
1306330Sjc25722 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
1316330Sjc25722 int cpu_ectag_pa_to_subblk_state(int cachesize,
1320Sstevel@tonic-gate uint64_t subaddr, uint64_t tag);
1330Sstevel@tonic-gate static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
1340Sstevel@tonic-gate static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
1350Sstevel@tonic-gate static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
1360Sstevel@tonic-gate static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
1370Sstevel@tonic-gate static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
1380Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
1392436Smb91622 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
1400Sstevel@tonic-gate static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
1410Sstevel@tonic-gate static void cpu_scrubphys(struct async_flt *aflt);
1420Sstevel@tonic-gate static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
1430Sstevel@tonic-gate int *, int *);
1440Sstevel@tonic-gate static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
1450Sstevel@tonic-gate static void cpu_ereport_init(struct async_flt *aflt);
1460Sstevel@tonic-gate static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
1470Sstevel@tonic-gate static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
1480Sstevel@tonic-gate static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
149815Sdilpreet uint64_t nceen, ch_cpu_logout_t *clop);
1500Sstevel@tonic-gate static int cpu_ce_delayed_ec_logout(uint64_t);
1510Sstevel@tonic-gate static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
1522381Smikechr static int cpu_error_is_ecache_data(int, uint64_t);
1532381Smikechr static void cpu_fmri_cpu_set(nvlist_t *, int);
1542381Smikechr static int cpu_error_to_resource_type(struct async_flt *aflt);
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
1570Sstevel@tonic-gate static int mondo_recover_proc(uint16_t, int);
1580Sstevel@tonic-gate static void cheetah_nudge_init(void);
1590Sstevel@tonic-gate static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
1600Sstevel@tonic-gate cyc_time_t *when);
1610Sstevel@tonic-gate static void cheetah_nudge_buddy(void);
1620Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */
1630Sstevel@tonic-gate
1640Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
1650Sstevel@tonic-gate static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
1660Sstevel@tonic-gate static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
1670Sstevel@tonic-gate static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
1680Sstevel@tonic-gate ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
1690Sstevel@tonic-gate static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
1700Sstevel@tonic-gate static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
1710Sstevel@tonic-gate static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
1720Sstevel@tonic-gate static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
1730Sstevel@tonic-gate static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
1740Sstevel@tonic-gate static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
1750Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
1760Sstevel@tonic-gate
1770Sstevel@tonic-gate int (*p2get_mem_info)(int synd_code, uint64_t paddr,
1780Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1790Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp);
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when an ECC
1830Sstevel@tonic-gate * error occurs. The array is indexed by an 9-bit syndrome. The entries
1840Sstevel@tonic-gate * of this array have the following semantics:
1850Sstevel@tonic-gate *
1860Sstevel@tonic-gate * 00-127 The number of the bad bit, when only one bit is bad.
1870Sstevel@tonic-gate * 128 ECC bit C0 is bad.
1880Sstevel@tonic-gate * 129 ECC bit C1 is bad.
1890Sstevel@tonic-gate * 130 ECC bit C2 is bad.
1900Sstevel@tonic-gate * 131 ECC bit C3 is bad.
1910Sstevel@tonic-gate * 132 ECC bit C4 is bad.
1920Sstevel@tonic-gate * 133 ECC bit C5 is bad.
1930Sstevel@tonic-gate * 134 ECC bit C6 is bad.
1940Sstevel@tonic-gate * 135 ECC bit C7 is bad.
1950Sstevel@tonic-gate * 136 ECC bit C8 is bad.
1960Sstevel@tonic-gate * 137-143 reserved for Mtag Data and ECC.
1970Sstevel@tonic-gate * 144(M2) Two bits are bad within a nibble.
1980Sstevel@tonic-gate * 145(M3) Three bits are bad within a nibble.
1990Sstevel@tonic-gate * 146(M3) Four bits are bad within a nibble.
2000Sstevel@tonic-gate * 147(M) Multiple bits (5 or more) are bad.
2010Sstevel@tonic-gate * 148 NO bits are bad.
2020Sstevel@tonic-gate * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
2030Sstevel@tonic-gate */
2040Sstevel@tonic-gate
2050Sstevel@tonic-gate #define C0 128
2060Sstevel@tonic-gate #define C1 129
2070Sstevel@tonic-gate #define C2 130
2080Sstevel@tonic-gate #define C3 131
2090Sstevel@tonic-gate #define C4 132
2100Sstevel@tonic-gate #define C5 133
2110Sstevel@tonic-gate #define C6 134
2120Sstevel@tonic-gate #define C7 135
2130Sstevel@tonic-gate #define C8 136
2140Sstevel@tonic-gate #define MT0 137 /* Mtag Data bit 0 */
2150Sstevel@tonic-gate #define MT1 138
2160Sstevel@tonic-gate #define MT2 139
2170Sstevel@tonic-gate #define MTC0 140 /* Mtag Check bit 0 */
2180Sstevel@tonic-gate #define MTC1 141
2190Sstevel@tonic-gate #define MTC2 142
2200Sstevel@tonic-gate #define MTC3 143
2210Sstevel@tonic-gate #define M2 144
2220Sstevel@tonic-gate #define M3 145
2230Sstevel@tonic-gate #define M4 146
2240Sstevel@tonic-gate #define M 147
2250Sstevel@tonic-gate #define NA 148
2260Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2270Sstevel@tonic-gate #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
2280Sstevel@tonic-gate #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */
2290Sstevel@tonic-gate #define SLAST S003MEM /* last special syndrome */
2300Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
2310Sstevel@tonic-gate #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */
2320Sstevel@tonic-gate #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */
2330Sstevel@tonic-gate #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */
2340Sstevel@tonic-gate #define SLAST S11C /* last special syndrome */
2350Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
2360Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2370Sstevel@tonic-gate #define BPAR0 152 /* syndrom 152 through 167 for bus parity */
2380Sstevel@tonic-gate #define BPAR15 167
2390Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
2400Sstevel@tonic-gate
2410Sstevel@tonic-gate static uint8_t ecc_syndrome_tab[] =
2420Sstevel@tonic-gate {
2430Sstevel@tonic-gate NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M,
2440Sstevel@tonic-gate C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16,
2450Sstevel@tonic-gate C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10,
2460Sstevel@tonic-gate M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M,
2470Sstevel@tonic-gate C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6,
2480Sstevel@tonic-gate M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4,
2490Sstevel@tonic-gate M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4,
2500Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2510Sstevel@tonic-gate 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
2520Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
2530Sstevel@tonic-gate 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
2540Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
2550Sstevel@tonic-gate C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5,
2560Sstevel@tonic-gate M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M,
2570Sstevel@tonic-gate M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2,
2580Sstevel@tonic-gate 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3,
2590Sstevel@tonic-gate M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M,
2600Sstevel@tonic-gate 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3,
2610Sstevel@tonic-gate 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M,
2620Sstevel@tonic-gate M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M,
2630Sstevel@tonic-gate C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4,
2640Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2650Sstevel@tonic-gate M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M,
2660Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
2670Sstevel@tonic-gate M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M,
2680Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
2690Sstevel@tonic-gate M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2,
2700Sstevel@tonic-gate 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M,
2710Sstevel@tonic-gate M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4,
2720Sstevel@tonic-gate 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3,
2730Sstevel@tonic-gate 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3,
2740Sstevel@tonic-gate M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2,
2750Sstevel@tonic-gate M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4,
2760Sstevel@tonic-gate 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M,
2770Sstevel@tonic-gate 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3,
2780Sstevel@tonic-gate M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M,
2790Sstevel@tonic-gate 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3,
2800Sstevel@tonic-gate M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M,
2810Sstevel@tonic-gate M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M,
2820Sstevel@tonic-gate 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M
2830Sstevel@tonic-gate };
2840Sstevel@tonic-gate
2850Sstevel@tonic-gate #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate #if !(defined(JALAPENO) || defined(SERRANO))
2880Sstevel@tonic-gate /*
2890Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when a Mtag
2900Sstevel@tonic-gate * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries
2910Sstevel@tonic-gate * of this array have the following semantics:
2920Sstevel@tonic-gate *
2930Sstevel@tonic-gate * -1 Invalid mtag syndrome.
2940Sstevel@tonic-gate * 137 Mtag Data 0 is bad.
2950Sstevel@tonic-gate * 138 Mtag Data 1 is bad.
2960Sstevel@tonic-gate * 139 Mtag Data 2 is bad.
2970Sstevel@tonic-gate * 140 Mtag ECC 0 is bad.
2980Sstevel@tonic-gate * 141 Mtag ECC 1 is bad.
2990Sstevel@tonic-gate * 142 Mtag ECC 2 is bad.
3000Sstevel@tonic-gate * 143 Mtag ECC 3 is bad.
3010Sstevel@tonic-gate * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
3020Sstevel@tonic-gate */
3030Sstevel@tonic-gate short mtag_syndrome_tab[] =
3040Sstevel@tonic-gate {
3050Sstevel@tonic-gate NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2
3060Sstevel@tonic-gate };
3070Sstevel@tonic-gate
3080Sstevel@tonic-gate #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short))
3090Sstevel@tonic-gate
3100Sstevel@tonic-gate #else /* !(JALAPENO || SERRANO) */
3110Sstevel@tonic-gate
3120Sstevel@tonic-gate #define BSYND_TBL_SIZE 16
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate #endif /* !(JALAPENO || SERRANO) */
3150Sstevel@tonic-gate
3160Sstevel@tonic-gate /*
317*11991SChristopher.Baumbauer@Sun.COM * Virtual Address bit flag in the data cache. This is actually bit 2 in the
318*11991SChristopher.Baumbauer@Sun.COM * dcache data tag.
319*11991SChristopher.Baumbauer@Sun.COM */
320*11991SChristopher.Baumbauer@Sun.COM #define VA13 INT64_C(0x0000000000000002)
321*11991SChristopher.Baumbauer@Sun.COM
322*11991SChristopher.Baumbauer@Sun.COM /*
3232381Smikechr * Types returned from cpu_error_to_resource_type()
3242381Smikechr */
3252381Smikechr #define ERRTYPE_UNKNOWN 0
3262381Smikechr #define ERRTYPE_CPU 1
3272381Smikechr #define ERRTYPE_MEMORY 2
3282381Smikechr #define ERRTYPE_ECACHE_DATA 3
3292381Smikechr
3302381Smikechr /*
3310Sstevel@tonic-gate * CE initial classification and subsequent action lookup table
3320Sstevel@tonic-gate */
3330Sstevel@tonic-gate static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
3340Sstevel@tonic-gate static int ce_disp_inited;
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate * Set to disable leaky and partner check for memory correctables
3380Sstevel@tonic-gate */
3390Sstevel@tonic-gate int ce_xdiag_off;
3400Sstevel@tonic-gate
3410Sstevel@tonic-gate /*
3420Sstevel@tonic-gate * The following are not incremented atomically so are indicative only
3430Sstevel@tonic-gate */
3440Sstevel@tonic-gate static int ce_xdiag_drops;
3450Sstevel@tonic-gate static int ce_xdiag_lkydrops;
3460Sstevel@tonic-gate static int ce_xdiag_ptnrdrops;
3470Sstevel@tonic-gate static int ce_xdiag_bad;
3480Sstevel@tonic-gate
3490Sstevel@tonic-gate /*
3500Sstevel@tonic-gate * CE leaky check callback structure
3510Sstevel@tonic-gate */
3520Sstevel@tonic-gate typedef struct {
3530Sstevel@tonic-gate struct async_flt *lkycb_aflt;
3540Sstevel@tonic-gate errorq_t *lkycb_eqp;
3550Sstevel@tonic-gate errorq_elem_t *lkycb_eqep;
3560Sstevel@tonic-gate } ce_lkychk_cb_t;
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate /*
3590Sstevel@tonic-gate * defines for various ecache_flush_flag's
3600Sstevel@tonic-gate */
3610Sstevel@tonic-gate #define ECACHE_FLUSH_LINE 1
3620Sstevel@tonic-gate #define ECACHE_FLUSH_ALL 2
3630Sstevel@tonic-gate
3640Sstevel@tonic-gate /*
3650Sstevel@tonic-gate * STICK sync
3660Sstevel@tonic-gate */
3670Sstevel@tonic-gate #define STICK_ITERATION 10
3680Sstevel@tonic-gate #define MAX_TSKEW 1
3690Sstevel@tonic-gate #define EV_A_START 0
3700Sstevel@tonic-gate #define EV_A_END 1
3710Sstevel@tonic-gate #define EV_B_START 2
3720Sstevel@tonic-gate #define EV_B_END 3
3730Sstevel@tonic-gate #define EVENTS 4
3740Sstevel@tonic-gate
3750Sstevel@tonic-gate static int64_t stick_iter = STICK_ITERATION;
3760Sstevel@tonic-gate static int64_t stick_tsk = MAX_TSKEW;
3770Sstevel@tonic-gate
3780Sstevel@tonic-gate typedef enum {
3790Sstevel@tonic-gate EVENT_NULL = 0,
3800Sstevel@tonic-gate SLAVE_START,
3810Sstevel@tonic-gate SLAVE_CONT,
3820Sstevel@tonic-gate MASTER_START
3830Sstevel@tonic-gate } event_cmd_t;
3840Sstevel@tonic-gate
3850Sstevel@tonic-gate static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
3860Sstevel@tonic-gate static int64_t timestamp[EVENTS];
3870Sstevel@tonic-gate static volatile int slave_done;
3880Sstevel@tonic-gate
3890Sstevel@tonic-gate #ifdef DEBUG
3900Sstevel@tonic-gate #define DSYNC_ATTEMPTS 64
3910Sstevel@tonic-gate typedef struct {
3920Sstevel@tonic-gate int64_t skew_val[DSYNC_ATTEMPTS];
3930Sstevel@tonic-gate } ss_t;
3940Sstevel@tonic-gate
3950Sstevel@tonic-gate ss_t stick_sync_stats[NCPU];
3960Sstevel@tonic-gate #endif /* DEBUG */
3970Sstevel@tonic-gate
3982241Shuah uint_t cpu_impl_dual_pgsz = 0;
3990Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE)
4000Sstevel@tonic-gate uint_t disable_dual_pgsz = 0;
4010Sstevel@tonic-gate #endif /* CPU_IMP_DUAL_PAGESIZE */
4020Sstevel@tonic-gate
4030Sstevel@tonic-gate /*
4040Sstevel@tonic-gate * Save the cache bootup state for use when internal
4050Sstevel@tonic-gate * caches are to be re-enabled after an error occurs.
4060Sstevel@tonic-gate */
4070Sstevel@tonic-gate uint64_t cache_boot_state;
4080Sstevel@tonic-gate
4090Sstevel@tonic-gate /*
4100Sstevel@tonic-gate * PA[22:0] represent Displacement in Safari configuration space.
4110Sstevel@tonic-gate */
4120Sstevel@tonic-gate uint_t root_phys_addr_lo_mask = 0x7fffffu;
4130Sstevel@tonic-gate
4140Sstevel@tonic-gate bus_config_eclk_t bus_config_eclk[] = {
4150Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
4160Sstevel@tonic-gate {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
4170Sstevel@tonic-gate {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
4180Sstevel@tonic-gate {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
4190Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
4200Sstevel@tonic-gate {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
4210Sstevel@tonic-gate {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
4220Sstevel@tonic-gate {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
4230Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
4240Sstevel@tonic-gate {0, 0}
4250Sstevel@tonic-gate };
4260Sstevel@tonic-gate
4270Sstevel@tonic-gate /*
4280Sstevel@tonic-gate * Interval for deferred CEEN reenable
4290Sstevel@tonic-gate */
4300Sstevel@tonic-gate int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
4310Sstevel@tonic-gate
4320Sstevel@tonic-gate /*
4330Sstevel@tonic-gate * set in /etc/system to control logging of user BERR/TO's
4340Sstevel@tonic-gate */
4350Sstevel@tonic-gate int cpu_berr_to_verbose = 0;
4360Sstevel@tonic-gate
4370Sstevel@tonic-gate /*
4380Sstevel@tonic-gate * set to 0 in /etc/system to defer CEEN reenable for all CEs
4390Sstevel@tonic-gate */
4400Sstevel@tonic-gate uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
4410Sstevel@tonic-gate uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
4420Sstevel@tonic-gate
4430Sstevel@tonic-gate /*
4440Sstevel@tonic-gate * Set of all offline cpus
4450Sstevel@tonic-gate */
4460Sstevel@tonic-gate cpuset_t cpu_offline_set;
4470Sstevel@tonic-gate
4480Sstevel@tonic-gate static void cpu_delayed_check_ce_errors(void *);
4490Sstevel@tonic-gate static void cpu_check_ce_errors(void *);
4500Sstevel@tonic-gate void cpu_error_ecache_flush(ch_async_flt_t *);
4510Sstevel@tonic-gate static int cpu_error_ecache_flush_required(ch_async_flt_t *);
4520Sstevel@tonic-gate static void cpu_log_and_clear_ce(ch_async_flt_t *);
4530Sstevel@tonic-gate void cpu_ce_detected(ch_cpu_errors_t *, int);
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate /*
4560Sstevel@tonic-gate * CE Leaky check timeout in microseconds. This is chosen to be twice the
4570Sstevel@tonic-gate * memory refresh interval of current DIMMs (64ms). After initial fix that
4580Sstevel@tonic-gate * gives at least one full refresh cycle in which the cell can leak
4590Sstevel@tonic-gate * (whereafter further refreshes simply reinforce any incorrect bit value).
4600Sstevel@tonic-gate */
4610Sstevel@tonic-gate clock_t cpu_ce_lkychk_timeout_usec = 128000;
4620Sstevel@tonic-gate
4630Sstevel@tonic-gate /*
4640Sstevel@tonic-gate * CE partner check partner caching period in seconds
4650Sstevel@tonic-gate */
4660Sstevel@tonic-gate int cpu_ce_ptnr_cachetime_sec = 60;
4670Sstevel@tonic-gate
4680Sstevel@tonic-gate /*
4690Sstevel@tonic-gate * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
4700Sstevel@tonic-gate */
4710Sstevel@tonic-gate #define CH_SET_TRAP(ttentry, ttlabel) \
4720Sstevel@tonic-gate bcopy((const void *)&ttlabel, &ttentry, 32); \
4730Sstevel@tonic-gate flush_instr_mem((caddr_t)&ttentry, 32);
4740Sstevel@tonic-gate
4750Sstevel@tonic-gate static int min_ecache_size;
4760Sstevel@tonic-gate static uint_t priv_hcl_1;
4770Sstevel@tonic-gate static uint_t priv_hcl_2;
4780Sstevel@tonic-gate static uint_t priv_hcl_4;
4790Sstevel@tonic-gate static uint_t priv_hcl_8;
4800Sstevel@tonic-gate
4810Sstevel@tonic-gate void
cpu_setup(void)4820Sstevel@tonic-gate cpu_setup(void)
4830Sstevel@tonic-gate {
4840Sstevel@tonic-gate extern int at_flags;
4850Sstevel@tonic-gate extern int cpc_has_overflow_intr;
4860Sstevel@tonic-gate
4870Sstevel@tonic-gate /*
4880Sstevel@tonic-gate * Setup chip-specific trap handlers.
4890Sstevel@tonic-gate */
4900Sstevel@tonic-gate cpu_init_trap();
4910Sstevel@tonic-gate
4920Sstevel@tonic-gate cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
4930Sstevel@tonic-gate
4940Sstevel@tonic-gate at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate /*
4970Sstevel@tonic-gate * save the cache bootup state.
4980Sstevel@tonic-gate */
4990Sstevel@tonic-gate cache_boot_state = get_dcu() & DCU_CACHE;
5000Sstevel@tonic-gate
5010Sstevel@tonic-gate /*
5020Sstevel@tonic-gate * Due to the number of entries in the fully-associative tlb
5030Sstevel@tonic-gate * this may have to be tuned lower than in spitfire.
5040Sstevel@tonic-gate */
5050Sstevel@tonic-gate pp_slots = MIN(8, MAXPP_SLOTS);
5060Sstevel@tonic-gate
5070Sstevel@tonic-gate /*
5080Sstevel@tonic-gate * Block stores do not invalidate all pages of the d$, pagecopy
5090Sstevel@tonic-gate * et. al. need virtual translations with virtual coloring taken
5100Sstevel@tonic-gate * into consideration. prefetch/ldd will pollute the d$ on the
5110Sstevel@tonic-gate * load side.
5120Sstevel@tonic-gate */
5130Sstevel@tonic-gate pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate if (use_page_coloring) {
5160Sstevel@tonic-gate do_pg_coloring = 1;
5170Sstevel@tonic-gate }
5180Sstevel@tonic-gate
5190Sstevel@tonic-gate isa_list =
5200Sstevel@tonic-gate "sparcv9+vis2 sparcv9+vis sparcv9 "
5210Sstevel@tonic-gate "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
5220Sstevel@tonic-gate "sparcv8 sparcv8-fsmuld sparcv7 sparc";
5230Sstevel@tonic-gate
5240Sstevel@tonic-gate /*
5250Sstevel@tonic-gate * On Panther-based machines, this should
5260Sstevel@tonic-gate * also include AV_SPARC_POPC too
5270Sstevel@tonic-gate */
5280Sstevel@tonic-gate cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
5290Sstevel@tonic-gate
5300Sstevel@tonic-gate /*
5310Sstevel@tonic-gate * On cheetah, there's no hole in the virtual address space
5320Sstevel@tonic-gate */
5330Sstevel@tonic-gate hole_start = hole_end = 0;
5340Sstevel@tonic-gate
5350Sstevel@tonic-gate /*
5360Sstevel@tonic-gate * The kpm mapping window.
5370Sstevel@tonic-gate * kpm_size:
5380Sstevel@tonic-gate * The size of a single kpm range.
5390Sstevel@tonic-gate * The overall size will be: kpm_size * vac_colors.
5400Sstevel@tonic-gate * kpm_vbase:
5410Sstevel@tonic-gate * The virtual start address of the kpm range within the kernel
5420Sstevel@tonic-gate * virtual address space. kpm_vbase has to be kpm_size aligned.
5430Sstevel@tonic-gate */
5440Sstevel@tonic-gate kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
5450Sstevel@tonic-gate kpm_size_shift = 43;
5460Sstevel@tonic-gate kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
5470Sstevel@tonic-gate kpm_smallpages = 1;
5480Sstevel@tonic-gate
5490Sstevel@tonic-gate /*
5500Sstevel@tonic-gate * The traptrace code uses either %tick or %stick for
5510Sstevel@tonic-gate * timestamping. We have %stick so we can use it.
5520Sstevel@tonic-gate */
5530Sstevel@tonic-gate traptrace_use_stick = 1;
5540Sstevel@tonic-gate
5550Sstevel@tonic-gate /*
5560Sstevel@tonic-gate * Cheetah has a performance counter overflow interrupt
5570Sstevel@tonic-gate */
5580Sstevel@tonic-gate cpc_has_overflow_intr = 1;
5590Sstevel@tonic-gate
5600Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE)
5610Sstevel@tonic-gate /*
5620Sstevel@tonic-gate * Use Cheetah+ and later dual page size support.
5630Sstevel@tonic-gate */
5640Sstevel@tonic-gate if (!disable_dual_pgsz) {
5652241Shuah cpu_impl_dual_pgsz = 1;
5660Sstevel@tonic-gate }
5670Sstevel@tonic-gate #endif /* CPU_IMP_DUAL_PAGESIZE */
5680Sstevel@tonic-gate
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate * Declare that this architecture/cpu combination does fpRAS.
5710Sstevel@tonic-gate */
5720Sstevel@tonic-gate fpras_implemented = 1;
5730Sstevel@tonic-gate
5740Sstevel@tonic-gate /*
5750Sstevel@tonic-gate * Setup CE lookup table
5760Sstevel@tonic-gate */
5770Sstevel@tonic-gate CE_INITDISPTBL_POPULATE(ce_disp_table);
5780Sstevel@tonic-gate ce_disp_inited = 1;
5790Sstevel@tonic-gate }
5800Sstevel@tonic-gate
5810Sstevel@tonic-gate /*
5820Sstevel@tonic-gate * Called by setcpudelay
5830Sstevel@tonic-gate */
5840Sstevel@tonic-gate void
cpu_init_tick_freq(void)5850Sstevel@tonic-gate cpu_init_tick_freq(void)
5860Sstevel@tonic-gate {
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate * For UltraSPARC III and beyond we want to use the
5890Sstevel@tonic-gate * system clock rate as the basis for low level timing,
5900Sstevel@tonic-gate * due to support of mixed speed CPUs and power managment.
5910Sstevel@tonic-gate */
5920Sstevel@tonic-gate if (system_clock_freq == 0)
5930Sstevel@tonic-gate cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate sys_tick_freq = system_clock_freq;
5960Sstevel@tonic-gate }
5970Sstevel@tonic-gate
5980Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
5990Sstevel@tonic-gate /*
6000Sstevel@tonic-gate * Tunables
6010Sstevel@tonic-gate */
6020Sstevel@tonic-gate int cheetah_bpe_off = 0;
6030Sstevel@tonic-gate int cheetah_sendmondo_recover = 1;
6040Sstevel@tonic-gate int cheetah_sendmondo_fullscan = 0;
6050Sstevel@tonic-gate int cheetah_sendmondo_recover_delay = 5;
6060Sstevel@tonic-gate
6070Sstevel@tonic-gate #define CHEETAH_LIVELOCK_MIN_DELAY 1
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate /*
6100Sstevel@tonic-gate * Recovery Statistics
6110Sstevel@tonic-gate */
6120Sstevel@tonic-gate typedef struct cheetah_livelock_entry {
6130Sstevel@tonic-gate int cpuid; /* fallen cpu */
6140Sstevel@tonic-gate int buddy; /* cpu that ran recovery */
6150Sstevel@tonic-gate clock_t lbolt; /* when recovery started */
6160Sstevel@tonic-gate hrtime_t recovery_time; /* time spent in recovery */
6170Sstevel@tonic-gate } cheetah_livelock_entry_t;
6180Sstevel@tonic-gate
6190Sstevel@tonic-gate #define CHEETAH_LIVELOCK_NENTRY 32
6200Sstevel@tonic-gate
6210Sstevel@tonic-gate cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
6220Sstevel@tonic-gate int cheetah_livelock_entry_nxt;
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \
6250Sstevel@tonic-gate statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \
6260Sstevel@tonic-gate if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \
6270Sstevel@tonic-gate cheetah_livelock_entry_nxt = 0; \
6280Sstevel@tonic-gate } \
6290Sstevel@tonic-gate }
6300Sstevel@tonic-gate
6310Sstevel@tonic-gate #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val
6320Sstevel@tonic-gate
6330Sstevel@tonic-gate struct {
6340Sstevel@tonic-gate hrtime_t hrt; /* maximum recovery time */
6350Sstevel@tonic-gate int recovery; /* recovered */
6360Sstevel@tonic-gate int full_claimed; /* maximum pages claimed in full recovery */
6370Sstevel@tonic-gate int proc_entry; /* attempted to claim TSB */
6380Sstevel@tonic-gate int proc_tsb_scan; /* tsb scanned */
6390Sstevel@tonic-gate int proc_tsb_partscan; /* tsb partially scanned */
6400Sstevel@tonic-gate int proc_tsb_fullscan; /* whole tsb scanned */
6410Sstevel@tonic-gate int proc_claimed; /* maximum pages claimed in tsb scan */
6420Sstevel@tonic-gate int proc_user; /* user thread */
6430Sstevel@tonic-gate int proc_kernel; /* kernel thread */
6440Sstevel@tonic-gate int proc_onflt; /* bad stack */
6450Sstevel@tonic-gate int proc_cpu; /* null cpu */
6460Sstevel@tonic-gate int proc_thread; /* null thread */
6470Sstevel@tonic-gate int proc_proc; /* null proc */
6480Sstevel@tonic-gate int proc_as; /* null as */
6490Sstevel@tonic-gate int proc_hat; /* null hat */
6500Sstevel@tonic-gate int proc_hat_inval; /* hat contents don't make sense */
6510Sstevel@tonic-gate int proc_hat_busy; /* hat is changing TSBs */
6520Sstevel@tonic-gate int proc_tsb_reloc; /* TSB skipped because being relocated */
6530Sstevel@tonic-gate int proc_cnum_bad; /* cnum out of range */
6540Sstevel@tonic-gate int proc_cnum; /* last cnum processed */
6550Sstevel@tonic-gate tte_t proc_tte; /* last tte processed */
6560Sstevel@tonic-gate } cheetah_livelock_stat;
6570Sstevel@tonic-gate
6580Sstevel@tonic-gate #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate #define CHEETAH_LIVELOCK_STATSET(item, value) \
6610Sstevel@tonic-gate cheetah_livelock_stat.item = value
6620Sstevel@tonic-gate
6630Sstevel@tonic-gate #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \
6640Sstevel@tonic-gate if (value > cheetah_livelock_stat.item) \
6650Sstevel@tonic-gate cheetah_livelock_stat.item = value; \
6660Sstevel@tonic-gate }
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate /*
6690Sstevel@tonic-gate * Attempt to recover a cpu by claiming every cache line as saved
6700Sstevel@tonic-gate * in the TSB that the non-responsive cpu is using. Since we can't
6710Sstevel@tonic-gate * grab any adaptive lock, this is at best an attempt to do so. Because
6720Sstevel@tonic-gate * we don't grab any locks, we must operate under the protection of
6730Sstevel@tonic-gate * on_fault().
6740Sstevel@tonic-gate *
6750Sstevel@tonic-gate * Return 1 if cpuid could be recovered, 0 if failed.
6760Sstevel@tonic-gate */
6770Sstevel@tonic-gate int
mondo_recover_proc(uint16_t cpuid,int bn)6780Sstevel@tonic-gate mondo_recover_proc(uint16_t cpuid, int bn)
6790Sstevel@tonic-gate {
6800Sstevel@tonic-gate label_t ljb;
6810Sstevel@tonic-gate cpu_t *cp;
6820Sstevel@tonic-gate kthread_t *t;
6830Sstevel@tonic-gate proc_t *p;
6840Sstevel@tonic-gate struct as *as;
6850Sstevel@tonic-gate struct hat *hat;
6862241Shuah uint_t cnum;
6870Sstevel@tonic-gate struct tsb_info *tsbinfop;
6880Sstevel@tonic-gate struct tsbe *tsbep;
6890Sstevel@tonic-gate caddr_t tsbp;
6900Sstevel@tonic-gate caddr_t end_tsbp;
6910Sstevel@tonic-gate uint64_t paddr;
6920Sstevel@tonic-gate uint64_t idsr;
6930Sstevel@tonic-gate u_longlong_t pahi, palo;
6940Sstevel@tonic-gate int pages_claimed = 0;
6950Sstevel@tonic-gate tte_t tsbe_tte;
6960Sstevel@tonic-gate int tried_kernel_tsb = 0;
6972241Shuah mmu_ctx_t *mmu_ctxp;
6980Sstevel@tonic-gate
6990Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_entry);
7000Sstevel@tonic-gate
7010Sstevel@tonic-gate if (on_fault(&ljb)) {
7020Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_onflt);
7030Sstevel@tonic-gate goto badstruct;
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate
7060Sstevel@tonic-gate if ((cp = cpu[cpuid]) == NULL) {
7070Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_cpu);
7080Sstevel@tonic-gate goto badstruct;
7090Sstevel@tonic-gate }
7100Sstevel@tonic-gate
7110Sstevel@tonic-gate if ((t = cp->cpu_thread) == NULL) {
7120Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_thread);
7130Sstevel@tonic-gate goto badstruct;
7140Sstevel@tonic-gate }
7150Sstevel@tonic-gate
7160Sstevel@tonic-gate if ((p = ttoproc(t)) == NULL) {
7170Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_proc);
7180Sstevel@tonic-gate goto badstruct;
7190Sstevel@tonic-gate }
7200Sstevel@tonic-gate
7210Sstevel@tonic-gate if ((as = p->p_as) == NULL) {
7220Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_as);
7230Sstevel@tonic-gate goto badstruct;
7240Sstevel@tonic-gate }
7250Sstevel@tonic-gate
7260Sstevel@tonic-gate if ((hat = as->a_hat) == NULL) {
7270Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat);
7280Sstevel@tonic-gate goto badstruct;
7290Sstevel@tonic-gate }
7300Sstevel@tonic-gate
7310Sstevel@tonic-gate if (hat != ksfmmup) {
7320Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_user);
7330Sstevel@tonic-gate if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
7340Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat_busy);
7350Sstevel@tonic-gate goto badstruct;
7360Sstevel@tonic-gate }
7370Sstevel@tonic-gate tsbinfop = hat->sfmmu_tsb;
7380Sstevel@tonic-gate if (tsbinfop == NULL) {
7390Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat_inval);
7400Sstevel@tonic-gate goto badstruct;
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate tsbp = tsbinfop->tsb_va;
7430Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
7440Sstevel@tonic-gate } else {
7450Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_kernel);
7460Sstevel@tonic-gate tsbinfop = NULL;
7470Sstevel@tonic-gate tsbp = ktsb_base;
7480Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
7490Sstevel@tonic-gate }
7500Sstevel@tonic-gate
7510Sstevel@tonic-gate /* Verify as */
7520Sstevel@tonic-gate if (hat->sfmmu_as != as) {
7530Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat_inval);
7540Sstevel@tonic-gate goto badstruct;
7550Sstevel@tonic-gate }
7560Sstevel@tonic-gate
7572241Shuah mmu_ctxp = CPU_MMU_CTXP(cp);
7582241Shuah ASSERT(mmu_ctxp);
7592241Shuah cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
7600Sstevel@tonic-gate CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
7610Sstevel@tonic-gate
7622241Shuah if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
7632241Shuah (cnum >= mmu_ctxp->mmu_nctxs)) {
7640Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
7650Sstevel@tonic-gate goto badstruct;
7660Sstevel@tonic-gate }
7670Sstevel@tonic-gate
7680Sstevel@tonic-gate do {
7690Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
7700Sstevel@tonic-gate
7710Sstevel@tonic-gate /*
7720Sstevel@tonic-gate * Skip TSBs being relocated. This is important because
7730Sstevel@tonic-gate * we want to avoid the following deadlock scenario:
7740Sstevel@tonic-gate *
7750Sstevel@tonic-gate * 1) when we came in we set ourselves to "in recover" state.
7760Sstevel@tonic-gate * 2) when we try to touch TSB being relocated the mapping
7770Sstevel@tonic-gate * will be in the suspended state so we'll spin waiting
7780Sstevel@tonic-gate * for it to be unlocked.
7790Sstevel@tonic-gate * 3) when the CPU that holds the TSB mapping locked tries to
7800Sstevel@tonic-gate * unlock it it will send a xtrap which will fail to xcall
7810Sstevel@tonic-gate * us or the CPU we're trying to recover, and will in turn
7820Sstevel@tonic-gate * enter the mondo code.
7830Sstevel@tonic-gate * 4) since we are still spinning on the locked mapping
7840Sstevel@tonic-gate * no further progress will be made and the system will
7850Sstevel@tonic-gate * inevitably hard hang.
7860Sstevel@tonic-gate *
7870Sstevel@tonic-gate * A TSB not being relocated can't begin being relocated
7880Sstevel@tonic-gate * while we're accessing it because we check
7890Sstevel@tonic-gate * sendmondo_in_recover before relocating TSBs.
7900Sstevel@tonic-gate */
7910Sstevel@tonic-gate if (hat != ksfmmup &&
7920Sstevel@tonic-gate (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
7930Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
7940Sstevel@tonic-gate goto next_tsbinfo;
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate
7970Sstevel@tonic-gate for (tsbep = (struct tsbe *)tsbp;
7980Sstevel@tonic-gate tsbep < (struct tsbe *)end_tsbp; tsbep++) {
7990Sstevel@tonic-gate tsbe_tte = tsbep->tte_data;
8000Sstevel@tonic-gate
8010Sstevel@tonic-gate if (tsbe_tte.tte_val == 0) {
8020Sstevel@tonic-gate /*
8030Sstevel@tonic-gate * Invalid tte
8040Sstevel@tonic-gate */
8050Sstevel@tonic-gate continue;
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate if (tsbe_tte.tte_se) {
8080Sstevel@tonic-gate /*
8090Sstevel@tonic-gate * Don't want device registers
8100Sstevel@tonic-gate */
8110Sstevel@tonic-gate continue;
8120Sstevel@tonic-gate }
8130Sstevel@tonic-gate if (tsbe_tte.tte_cp == 0) {
8140Sstevel@tonic-gate /*
8150Sstevel@tonic-gate * Must be cached in E$
8160Sstevel@tonic-gate */
8170Sstevel@tonic-gate continue;
8180Sstevel@tonic-gate }
8194485Sjesusm if (tsbep->tte_tag.tag_invalid != 0) {
8204485Sjesusm /*
8214485Sjesusm * Invalid tag, ingnore this entry.
8224485Sjesusm */
8234485Sjesusm continue;
8244485Sjesusm }
8250Sstevel@tonic-gate CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
8260Sstevel@tonic-gate idsr = getidsr();
8270Sstevel@tonic-gate if ((idsr & (IDSR_NACK_BIT(bn) |
8280Sstevel@tonic-gate IDSR_BUSY_BIT(bn))) == 0) {
8290Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
8300Sstevel@tonic-gate goto done;
8310Sstevel@tonic-gate }
8320Sstevel@tonic-gate pahi = tsbe_tte.tte_pahi;
8330Sstevel@tonic-gate palo = tsbe_tte.tte_palo;
8340Sstevel@tonic-gate paddr = (uint64_t)((pahi << 32) |
8350Sstevel@tonic-gate (palo << MMU_PAGESHIFT));
8360Sstevel@tonic-gate claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
8370Sstevel@tonic-gate CH_ECACHE_SUBBLK_SIZE);
8380Sstevel@tonic-gate if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
8390Sstevel@tonic-gate shipit(cpuid, bn);
8400Sstevel@tonic-gate }
8410Sstevel@tonic-gate pages_claimed++;
8420Sstevel@tonic-gate }
8430Sstevel@tonic-gate next_tsbinfo:
8440Sstevel@tonic-gate if (tsbinfop != NULL)
8450Sstevel@tonic-gate tsbinfop = tsbinfop->tsb_next;
8460Sstevel@tonic-gate if (tsbinfop != NULL) {
8470Sstevel@tonic-gate tsbp = tsbinfop->tsb_va;
8480Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
8490Sstevel@tonic-gate } else if (tsbp == ktsb_base) {
8500Sstevel@tonic-gate tried_kernel_tsb = 1;
8510Sstevel@tonic-gate } else if (!tried_kernel_tsb) {
8520Sstevel@tonic-gate tsbp = ktsb_base;
8530Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
8540Sstevel@tonic-gate hat = ksfmmup;
8550Sstevel@tonic-gate tsbinfop = NULL;
8560Sstevel@tonic-gate }
8570Sstevel@tonic-gate } while (tsbinfop != NULL ||
8584485Sjesusm ((tsbp == ktsb_base) && !tried_kernel_tsb));
8590Sstevel@tonic-gate
8600Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
8610Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
8620Sstevel@tonic-gate no_fault();
8630Sstevel@tonic-gate idsr = getidsr();
8640Sstevel@tonic-gate if ((idsr & (IDSR_NACK_BIT(bn) |
8650Sstevel@tonic-gate IDSR_BUSY_BIT(bn))) == 0) {
8660Sstevel@tonic-gate return (1);
8670Sstevel@tonic-gate } else {
8680Sstevel@tonic-gate return (0);
8690Sstevel@tonic-gate }
8700Sstevel@tonic-gate
8710Sstevel@tonic-gate done:
8720Sstevel@tonic-gate no_fault();
8730Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
8740Sstevel@tonic-gate return (1);
8750Sstevel@tonic-gate
8760Sstevel@tonic-gate badstruct:
8770Sstevel@tonic-gate no_fault();
8780Sstevel@tonic-gate return (0);
8790Sstevel@tonic-gate }
8800Sstevel@tonic-gate
8810Sstevel@tonic-gate /*
8820Sstevel@tonic-gate * Attempt to claim ownership, temporarily, of every cache line that a
8830Sstevel@tonic-gate * non-responsive cpu might be using. This might kick that cpu out of
8840Sstevel@tonic-gate * this state.
8850Sstevel@tonic-gate *
8860Sstevel@tonic-gate * The return value indicates to the caller if we have exhausted all recovery
8870Sstevel@tonic-gate * techniques. If 1 is returned, it is useless to call this function again
8880Sstevel@tonic-gate * even for a different target CPU.
8890Sstevel@tonic-gate */
8900Sstevel@tonic-gate int
mondo_recover(uint16_t cpuid,int bn)8910Sstevel@tonic-gate mondo_recover(uint16_t cpuid, int bn)
8920Sstevel@tonic-gate {
8930Sstevel@tonic-gate struct memseg *seg;
8940Sstevel@tonic-gate uint64_t begin_pa, end_pa, cur_pa;
8950Sstevel@tonic-gate hrtime_t begin_hrt, end_hrt;
8960Sstevel@tonic-gate int retval = 0;
8970Sstevel@tonic-gate int pages_claimed = 0;
8980Sstevel@tonic-gate cheetah_livelock_entry_t *histp;
8990Sstevel@tonic-gate uint64_t idsr;
9000Sstevel@tonic-gate
9010Sstevel@tonic-gate if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
9020Sstevel@tonic-gate /*
9030Sstevel@tonic-gate * Wait while recovery takes place
9040Sstevel@tonic-gate */
9050Sstevel@tonic-gate while (sendmondo_in_recover) {
9060Sstevel@tonic-gate drv_usecwait(1);
9070Sstevel@tonic-gate }
9080Sstevel@tonic-gate /*
9090Sstevel@tonic-gate * Assume we didn't claim the whole memory. If
9100Sstevel@tonic-gate * the target of this caller is not recovered,
9110Sstevel@tonic-gate * it will come back.
9120Sstevel@tonic-gate */
9130Sstevel@tonic-gate return (retval);
9140Sstevel@tonic-gate }
9150Sstevel@tonic-gate
91611066Srafael.vanoni@sun.com CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
91711066Srafael.vanoni@sun.com CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
9180Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
9190Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
9200Sstevel@tonic-gate
9210Sstevel@tonic-gate begin_hrt = gethrtime_waitfree();
9220Sstevel@tonic-gate /*
9230Sstevel@tonic-gate * First try to claim the lines in the TSB the target
9240Sstevel@tonic-gate * may have been using.
9250Sstevel@tonic-gate */
9260Sstevel@tonic-gate if (mondo_recover_proc(cpuid, bn) == 1) {
9270Sstevel@tonic-gate /*
9280Sstevel@tonic-gate * Didn't claim the whole memory
9290Sstevel@tonic-gate */
9300Sstevel@tonic-gate goto done;
9310Sstevel@tonic-gate }
9320Sstevel@tonic-gate
9330Sstevel@tonic-gate /*
9340Sstevel@tonic-gate * We tried using the TSB. The target is still
9350Sstevel@tonic-gate * not recovered. Check if complete memory scan is
9360Sstevel@tonic-gate * enabled.
9370Sstevel@tonic-gate */
9380Sstevel@tonic-gate if (cheetah_sendmondo_fullscan == 0) {
9390Sstevel@tonic-gate /*
9400Sstevel@tonic-gate * Full memory scan is disabled.
9410Sstevel@tonic-gate */
9420Sstevel@tonic-gate retval = 1;
9430Sstevel@tonic-gate goto done;
9440Sstevel@tonic-gate }
9450Sstevel@tonic-gate
9460Sstevel@tonic-gate /*
9470Sstevel@tonic-gate * Try claiming the whole memory.
9480Sstevel@tonic-gate */
9490Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) {
9500Sstevel@tonic-gate begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
9510Sstevel@tonic-gate end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
9520Sstevel@tonic-gate for (cur_pa = begin_pa; cur_pa < end_pa;
9530Sstevel@tonic-gate cur_pa += MMU_PAGESIZE) {
9540Sstevel@tonic-gate idsr = getidsr();
9550Sstevel@tonic-gate if ((idsr & (IDSR_NACK_BIT(bn) |
9560Sstevel@tonic-gate IDSR_BUSY_BIT(bn))) == 0) {
9570Sstevel@tonic-gate /*
9580Sstevel@tonic-gate * Didn't claim all memory
9590Sstevel@tonic-gate */
9600Sstevel@tonic-gate goto done;
9610Sstevel@tonic-gate }
9620Sstevel@tonic-gate claimlines(cur_pa, MMU_PAGESIZE,
9630Sstevel@tonic-gate CH_ECACHE_SUBBLK_SIZE);
9640Sstevel@tonic-gate if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
9650Sstevel@tonic-gate shipit(cpuid, bn);
9660Sstevel@tonic-gate }
9670Sstevel@tonic-gate pages_claimed++;
9680Sstevel@tonic-gate }
9690Sstevel@tonic-gate }
9700Sstevel@tonic-gate
9710Sstevel@tonic-gate /*
9720Sstevel@tonic-gate * We did all we could.
9730Sstevel@tonic-gate */
9740Sstevel@tonic-gate retval = 1;
9750Sstevel@tonic-gate
9760Sstevel@tonic-gate done:
9770Sstevel@tonic-gate /*
9780Sstevel@tonic-gate * Update statistics
9790Sstevel@tonic-gate */
9800Sstevel@tonic-gate end_hrt = gethrtime_waitfree();
9810Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(recovery);
9820Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
9830Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
9840Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
9850Sstevel@tonic-gate (end_hrt - begin_hrt));
9860Sstevel@tonic-gate
9874485Sjesusm while (cas32(&sendmondo_in_recover, 1, 0) != 1)
9884485Sjesusm ;
9890Sstevel@tonic-gate
9900Sstevel@tonic-gate return (retval);
9910Sstevel@tonic-gate }
9920Sstevel@tonic-gate
9930Sstevel@tonic-gate /*
9940Sstevel@tonic-gate * This is called by the cyclic framework when this CPU becomes online
9950Sstevel@tonic-gate */
9960Sstevel@tonic-gate /*ARGSUSED*/
9970Sstevel@tonic-gate static void
cheetah_nudge_onln(void * arg,cpu_t * cpu,cyc_handler_t * hdlr,cyc_time_t * when)9980Sstevel@tonic-gate cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
9990Sstevel@tonic-gate {
10000Sstevel@tonic-gate
10010Sstevel@tonic-gate hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
10020Sstevel@tonic-gate hdlr->cyh_level = CY_LOW_LEVEL;
10030Sstevel@tonic-gate hdlr->cyh_arg = NULL;
10040Sstevel@tonic-gate
10050Sstevel@tonic-gate /*
10060Sstevel@tonic-gate * Stagger the start time
10070Sstevel@tonic-gate */
10080Sstevel@tonic-gate when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
10090Sstevel@tonic-gate if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
10100Sstevel@tonic-gate cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
10110Sstevel@tonic-gate }
10120Sstevel@tonic-gate when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
10130Sstevel@tonic-gate }
10140Sstevel@tonic-gate
10150Sstevel@tonic-gate /*
10160Sstevel@tonic-gate * Create a low level cyclic to send a xtrap to the next cpu online.
10170Sstevel@tonic-gate * However, there's no need to have this running on a uniprocessor system.
10180Sstevel@tonic-gate */
10190Sstevel@tonic-gate static void
cheetah_nudge_init(void)10200Sstevel@tonic-gate cheetah_nudge_init(void)
10210Sstevel@tonic-gate {
10220Sstevel@tonic-gate cyc_omni_handler_t hdlr;
10230Sstevel@tonic-gate
10240Sstevel@tonic-gate if (max_ncpus == 1) {
10250Sstevel@tonic-gate return;
10260Sstevel@tonic-gate }
10270Sstevel@tonic-gate
10280Sstevel@tonic-gate hdlr.cyo_online = cheetah_nudge_onln;
10290Sstevel@tonic-gate hdlr.cyo_offline = NULL;
10300Sstevel@tonic-gate hdlr.cyo_arg = NULL;
10310Sstevel@tonic-gate
10320Sstevel@tonic-gate mutex_enter(&cpu_lock);
10330Sstevel@tonic-gate (void) cyclic_add_omni(&hdlr);
10340Sstevel@tonic-gate mutex_exit(&cpu_lock);
10350Sstevel@tonic-gate }
10360Sstevel@tonic-gate
10370Sstevel@tonic-gate /*
10380Sstevel@tonic-gate * Cyclic handler to wake up buddy
10390Sstevel@tonic-gate */
10400Sstevel@tonic-gate void
cheetah_nudge_buddy(void)10410Sstevel@tonic-gate cheetah_nudge_buddy(void)
10420Sstevel@tonic-gate {
10430Sstevel@tonic-gate /*
10440Sstevel@tonic-gate * Disable kernel preemption to protect the cpu list
10450Sstevel@tonic-gate */
10460Sstevel@tonic-gate kpreempt_disable();
10470Sstevel@tonic-gate if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
10480Sstevel@tonic-gate xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
10490Sstevel@tonic-gate 0, 0);
10500Sstevel@tonic-gate }
10510Sstevel@tonic-gate kpreempt_enable();
10520Sstevel@tonic-gate }
10530Sstevel@tonic-gate
10540Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */
10550Sstevel@tonic-gate
10560Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
10570Sstevel@tonic-gate uint32_t x_one_stimes[64];
10580Sstevel@tonic-gate uint32_t x_one_ltimes[16];
10590Sstevel@tonic-gate uint32_t x_set_stimes[64];
10600Sstevel@tonic-gate uint32_t x_set_ltimes[16];
10610Sstevel@tonic-gate uint32_t x_set_cpus[NCPU];
10620Sstevel@tonic-gate uint32_t x_nack_stimes[64];
10630Sstevel@tonic-gate #endif
10640Sstevel@tonic-gate
10650Sstevel@tonic-gate /*
10660Sstevel@tonic-gate * Note: A version of this function is used by the debugger via the KDI,
10670Sstevel@tonic-gate * and must be kept in sync with this version. Any changes made to this
10680Sstevel@tonic-gate * function to support new chips or to accomodate errata must also be included
10690Sstevel@tonic-gate * in the KDI-specific version. See us3_kdi.c.
10700Sstevel@tonic-gate */
10710Sstevel@tonic-gate void
send_one_mondo(int cpuid)10720Sstevel@tonic-gate send_one_mondo(int cpuid)
10730Sstevel@tonic-gate {
10740Sstevel@tonic-gate int busy, nack;
10750Sstevel@tonic-gate uint64_t idsr, starttick, endtick, tick, lasttick;
10760Sstevel@tonic-gate uint64_t busymask;
10770Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
10780Sstevel@tonic-gate int recovered = 0;
10790Sstevel@tonic-gate #endif
10800Sstevel@tonic-gate
10810Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
10820Sstevel@tonic-gate starttick = lasttick = gettick();
10830Sstevel@tonic-gate shipit(cpuid, 0);
10840Sstevel@tonic-gate endtick = starttick + xc_tick_limit;
10850Sstevel@tonic-gate busy = nack = 0;
10860Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
10870Sstevel@tonic-gate /*
10880Sstevel@tonic-gate * Lower 2 bits of the agent ID determine which BUSY/NACK pair
10890Sstevel@tonic-gate * will be used for dispatching interrupt. For now, assume
10900Sstevel@tonic-gate * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
10910Sstevel@tonic-gate * issues with respect to BUSY/NACK pair usage.
10920Sstevel@tonic-gate */
10930Sstevel@tonic-gate busymask = IDSR_BUSY_BIT(cpuid);
10940Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
10950Sstevel@tonic-gate busymask = IDSR_BUSY;
10960Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
10970Sstevel@tonic-gate for (;;) {
10980Sstevel@tonic-gate idsr = getidsr();
10990Sstevel@tonic-gate if (idsr == 0)
11000Sstevel@tonic-gate break;
11010Sstevel@tonic-gate
11020Sstevel@tonic-gate tick = gettick();
11030Sstevel@tonic-gate /*
11040Sstevel@tonic-gate * If there is a big jump between the current tick
11050Sstevel@tonic-gate * count and lasttick, we have probably hit a break
11060Sstevel@tonic-gate * point. Adjust endtick accordingly to avoid panic.
11070Sstevel@tonic-gate */
11080Sstevel@tonic-gate if (tick > (lasttick + xc_tick_jump_limit))
11090Sstevel@tonic-gate endtick += (tick - lasttick);
11100Sstevel@tonic-gate lasttick = tick;
11110Sstevel@tonic-gate if (tick > endtick) {
11120Sstevel@tonic-gate if (panic_quiesce)
11130Sstevel@tonic-gate return;
11140Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
11150Sstevel@tonic-gate if (cheetah_sendmondo_recover && recovered == 0) {
11160Sstevel@tonic-gate if (mondo_recover(cpuid, 0)) {
11170Sstevel@tonic-gate /*
11180Sstevel@tonic-gate * We claimed the whole memory or
11190Sstevel@tonic-gate * full scan is disabled.
11200Sstevel@tonic-gate */
11210Sstevel@tonic-gate recovered++;
11220Sstevel@tonic-gate }
11230Sstevel@tonic-gate tick = gettick();
11240Sstevel@tonic-gate endtick = tick + xc_tick_limit;
11250Sstevel@tonic-gate lasttick = tick;
11260Sstevel@tonic-gate /*
11270Sstevel@tonic-gate * Recheck idsr
11280Sstevel@tonic-gate */
11290Sstevel@tonic-gate continue;
11300Sstevel@tonic-gate } else
11310Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */
11320Sstevel@tonic-gate {
11330Sstevel@tonic-gate cmn_err(CE_PANIC, "send mondo timeout "
11340Sstevel@tonic-gate "(target 0x%x) [%d NACK %d BUSY]",
11350Sstevel@tonic-gate cpuid, nack, busy);
11360Sstevel@tonic-gate }
11370Sstevel@tonic-gate }
11380Sstevel@tonic-gate
11390Sstevel@tonic-gate if (idsr & busymask) {
11400Sstevel@tonic-gate busy++;
11410Sstevel@tonic-gate continue;
11420Sstevel@tonic-gate }
11430Sstevel@tonic-gate drv_usecwait(1);
11440Sstevel@tonic-gate shipit(cpuid, 0);
11450Sstevel@tonic-gate nack++;
11460Sstevel@tonic-gate busy = 0;
11470Sstevel@tonic-gate }
11480Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
11490Sstevel@tonic-gate {
11500Sstevel@tonic-gate int n = gettick() - starttick;
11510Sstevel@tonic-gate if (n < 8192)
11520Sstevel@tonic-gate x_one_stimes[n >> 7]++;
11530Sstevel@tonic-gate else
11540Sstevel@tonic-gate x_one_ltimes[(n >> 13) & 0xf]++;
11550Sstevel@tonic-gate }
11560Sstevel@tonic-gate #endif
11570Sstevel@tonic-gate }
11580Sstevel@tonic-gate
11590Sstevel@tonic-gate void
syncfpu(void)11600Sstevel@tonic-gate syncfpu(void)
11610Sstevel@tonic-gate {
11620Sstevel@tonic-gate }
11630Sstevel@tonic-gate
11640Sstevel@tonic-gate /*
11650Sstevel@tonic-gate * Return processor specific async error structure
11660Sstevel@tonic-gate * size used.
11670Sstevel@tonic-gate */
11680Sstevel@tonic-gate int
cpu_aflt_size(void)11690Sstevel@tonic-gate cpu_aflt_size(void)
11700Sstevel@tonic-gate {
11710Sstevel@tonic-gate return (sizeof (ch_async_flt_t));
11720Sstevel@tonic-gate }
11730Sstevel@tonic-gate
11740Sstevel@tonic-gate /*
1175960Srscott * Tunable to disable the checking of other cpu logout areas during panic for
1176960Srscott * potential syndrome 71 generating errors.
1177960Srscott */
1178960Srscott int enable_check_other_cpus_logout = 1;
1179960Srscott
1180960Srscott /*
1181960Srscott * Check other cpus logout area for potential synd 71 generating
1182960Srscott * errors.
1183960Srscott */
1184960Srscott static void
cpu_check_cpu_logout(int cpuid,caddr_t tpc,int tl,int ecc_type,ch_cpu_logout_t * clop)1185960Srscott cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1186960Srscott ch_cpu_logout_t *clop)
1187960Srscott {
1188960Srscott struct async_flt *aflt;
1189960Srscott ch_async_flt_t ch_flt;
1190960Srscott uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1191960Srscott
1192960Srscott if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1193960Srscott return;
1194960Srscott }
1195960Srscott
1196960Srscott bzero(&ch_flt, sizeof (ch_async_flt_t));
1197960Srscott
1198960Srscott t_afar = clop->clo_data.chd_afar;
1199960Srscott t_afsr = clop->clo_data.chd_afsr;
1200960Srscott t_afsr_ext = clop->clo_data.chd_afsr_ext;
1201960Srscott #if defined(SERRANO)
1202960Srscott ch_flt.afar2 = clop->clo_data.chd_afar2;
1203960Srscott #endif /* SERRANO */
1204960Srscott
1205960Srscott /*
1206960Srscott * In order to simplify code, we maintain this afsr_errs
1207960Srscott * variable which holds the aggregate of AFSR and AFSR_EXT
1208960Srscott * sticky bits.
1209960Srscott */
1210960Srscott t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1211960Srscott (t_afsr & C_AFSR_ALL_ERRS);
1212960Srscott
1213960Srscott /* Setup the async fault structure */
1214960Srscott aflt = (struct async_flt *)&ch_flt;
1215960Srscott aflt->flt_id = gethrtime_waitfree();
1216960Srscott ch_flt.afsr_ext = t_afsr_ext;
1217960Srscott ch_flt.afsr_errs = t_afsr_errs;
1218960Srscott aflt->flt_stat = t_afsr;
1219960Srscott aflt->flt_addr = t_afar;
1220960Srscott aflt->flt_bus_id = cpuid;
1221960Srscott aflt->flt_inst = cpuid;
1222960Srscott aflt->flt_pc = tpc;
1223960Srscott aflt->flt_prot = AFLT_PROT_NONE;
1224960Srscott aflt->flt_class = CPU_FAULT;
1225960Srscott aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1226960Srscott aflt->flt_tl = tl;
1227960Srscott aflt->flt_status = ecc_type;
1228960Srscott aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1229960Srscott
1230960Srscott /*
1231960Srscott * Queue events on the async event queue, one event per error bit.
1232960Srscott * If no events are queued, queue an event to complain.
1233960Srscott */
1234960Srscott if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1235960Srscott ch_flt.flt_type = CPU_INV_AFSR;
1236960Srscott cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1237960Srscott (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1238960Srscott aflt->flt_panic);
1239960Srscott }
1240960Srscott
1241960Srscott /*
1242960Srscott * Zero out + invalidate CPU logout.
1243960Srscott */
1244960Srscott bzero(clop, sizeof (ch_cpu_logout_t));
1245960Srscott clop->clo_data.chd_afar = LOGOUT_INVALID;
1246960Srscott }
1247960Srscott
1248960Srscott /*
1249960Srscott * Check the logout areas of all other cpus for unlogged errors.
1250960Srscott */
1251960Srscott static void
cpu_check_other_cpus_logout(void)1252960Srscott cpu_check_other_cpus_logout(void)
1253960Srscott {
1254960Srscott int i, j;
1255960Srscott processorid_t myid;
1256960Srscott struct cpu *cp;
1257960Srscott ch_err_tl1_data_t *cl1p;
1258960Srscott
1259960Srscott myid = CPU->cpu_id;
1260960Srscott for (i = 0; i < NCPU; i++) {
1261960Srscott cp = cpu[i];
1262960Srscott
1263960Srscott if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1264960Srscott (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1265960Srscott continue;
1266960Srscott }
1267960Srscott
1268960Srscott /*
1269960Srscott * Check each of the tl>0 logout areas
1270960Srscott */
1271960Srscott cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1272960Srscott for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1273960Srscott if (cl1p->ch_err_tl1_flags == 0)
1274960Srscott continue;
1275960Srscott
1276960Srscott cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1277960Srscott 1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1278960Srscott }
1279960Srscott
1280960Srscott /*
1281960Srscott * Check each of the remaining logout areas
1282960Srscott */
1283960Srscott cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1284960Srscott CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1285960Srscott cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1286960Srscott CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1287960Srscott cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1288960Srscott CPU_PRIVATE_PTR(cp, chpr_async_logout));
1289960Srscott }
1290960Srscott }
1291960Srscott
1292960Srscott /*
12930Sstevel@tonic-gate * The fast_ecc_err handler transfers control here for UCU, UCC events.
12940Sstevel@tonic-gate * Note that we flush Ecache twice, once in the fast_ecc_err handler to
12950Sstevel@tonic-gate * flush the error that caused the UCU/UCC, then again here at the end to
12960Sstevel@tonic-gate * flush the TL=1 trap handler code out of the Ecache, so we can minimize
12970Sstevel@tonic-gate * the probability of getting a TL>1 Fast ECC trap when we're fielding
12980Sstevel@tonic-gate * another Fast ECC trap.
12990Sstevel@tonic-gate *
13000Sstevel@tonic-gate * Cheetah+ also handles: TSCE: No additional processing required.
13010Sstevel@tonic-gate * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
13020Sstevel@tonic-gate *
13030Sstevel@tonic-gate * Note that the p_clo_flags input is only valid in cases where the
13040Sstevel@tonic-gate * cpu_private struct is not yet initialized (since that is the only
13050Sstevel@tonic-gate * time that information cannot be obtained from the logout struct.)
13060Sstevel@tonic-gate */
13070Sstevel@tonic-gate /*ARGSUSED*/
13080Sstevel@tonic-gate void
cpu_fast_ecc_error(struct regs * rp,ulong_t p_clo_flags)13090Sstevel@tonic-gate cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
13100Sstevel@tonic-gate {
13110Sstevel@tonic-gate ch_cpu_logout_t *clop;
1312815Sdilpreet uint64_t ceen, nceen;
13130Sstevel@tonic-gate
13140Sstevel@tonic-gate /*
13150Sstevel@tonic-gate * Get the CPU log out info. If we can't find our CPU private
13160Sstevel@tonic-gate * pointer, then we will have to make due without any detailed
13170Sstevel@tonic-gate * logout information.
13180Sstevel@tonic-gate */
13190Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) {
13200Sstevel@tonic-gate clop = NULL;
13210Sstevel@tonic-gate ceen = p_clo_flags & EN_REG_CEEN;
1322815Sdilpreet nceen = p_clo_flags & EN_REG_NCEEN;
13230Sstevel@tonic-gate } else {
13240Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
13250Sstevel@tonic-gate ceen = clop->clo_flags & EN_REG_CEEN;
1326815Sdilpreet nceen = clop->clo_flags & EN_REG_NCEEN;
13270Sstevel@tonic-gate }
13280Sstevel@tonic-gate
13290Sstevel@tonic-gate cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1330815Sdilpreet (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
13310Sstevel@tonic-gate }
13320Sstevel@tonic-gate
13330Sstevel@tonic-gate /*
13340Sstevel@tonic-gate * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
13350Sstevel@tonic-gate * ECC at TL>0. Need to supply either a error register pointer or a
13360Sstevel@tonic-gate * cpu logout structure pointer.
13370Sstevel@tonic-gate */
13380Sstevel@tonic-gate static void
cpu_log_fast_ecc_error(caddr_t tpc,int priv,int tl,uint64_t ceen,uint64_t nceen,ch_cpu_logout_t * clop)13390Sstevel@tonic-gate cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1340815Sdilpreet uint64_t nceen, ch_cpu_logout_t *clop)
13410Sstevel@tonic-gate {
13420Sstevel@tonic-gate struct async_flt *aflt;
13430Sstevel@tonic-gate ch_async_flt_t ch_flt;
13440Sstevel@tonic-gate uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
13450Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING];
13460Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
13470Sstevel@tonic-gate
13480Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
13490Sstevel@tonic-gate /*
13500Sstevel@tonic-gate * If no cpu logout data, then we will have to make due without
13510Sstevel@tonic-gate * any detailed logout information.
13520Sstevel@tonic-gate */
13530Sstevel@tonic-gate if (clop == NULL) {
13540Sstevel@tonic-gate ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
13550Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
13560Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs);
13570Sstevel@tonic-gate t_afar = cpu_error_regs.afar;
13580Sstevel@tonic-gate t_afsr = cpu_error_regs.afsr;
13590Sstevel@tonic-gate t_afsr_ext = cpu_error_regs.afsr_ext;
13600Sstevel@tonic-gate #if defined(SERRANO)
13610Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2;
13620Sstevel@tonic-gate #endif /* SERRANO */
13630Sstevel@tonic-gate } else {
13640Sstevel@tonic-gate t_afar = clop->clo_data.chd_afar;
13650Sstevel@tonic-gate t_afsr = clop->clo_data.chd_afsr;
13660Sstevel@tonic-gate t_afsr_ext = clop->clo_data.chd_afsr_ext;
13670Sstevel@tonic-gate #if defined(SERRANO)
13680Sstevel@tonic-gate ch_flt.afar2 = clop->clo_data.chd_afar2;
13690Sstevel@tonic-gate #endif /* SERRANO */
13700Sstevel@tonic-gate }
13710Sstevel@tonic-gate
13720Sstevel@tonic-gate /*
13730Sstevel@tonic-gate * In order to simplify code, we maintain this afsr_errs
13740Sstevel@tonic-gate * variable which holds the aggregate of AFSR and AFSR_EXT
13750Sstevel@tonic-gate * sticky bits.
13760Sstevel@tonic-gate */
13770Sstevel@tonic-gate t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
13780Sstevel@tonic-gate (t_afsr & C_AFSR_ALL_ERRS);
13790Sstevel@tonic-gate pr_reason[0] = '\0';
13800Sstevel@tonic-gate
13810Sstevel@tonic-gate /* Setup the async fault structure */
13820Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt;
13830Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree();
13840Sstevel@tonic-gate ch_flt.afsr_ext = t_afsr_ext;
13850Sstevel@tonic-gate ch_flt.afsr_errs = t_afsr_errs;
13860Sstevel@tonic-gate aflt->flt_stat = t_afsr;
13870Sstevel@tonic-gate aflt->flt_addr = t_afar;
13880Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
13890Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id;
13900Sstevel@tonic-gate aflt->flt_pc = tpc;
13910Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
13920Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
13930Sstevel@tonic-gate aflt->flt_priv = priv;
13940Sstevel@tonic-gate aflt->flt_tl = tl;
13950Sstevel@tonic-gate aflt->flt_status = ECC_F_TRAP;
13960Sstevel@tonic-gate aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
13970Sstevel@tonic-gate
13980Sstevel@tonic-gate /*
13990Sstevel@tonic-gate * XXXX - Phenomenal hack to get around Solaris not getting all the
14000Sstevel@tonic-gate * cmn_err messages out to the console. The situation is a UCU (in
14010Sstevel@tonic-gate * priv mode) which causes a WDU which causes a UE (on the retry).
14020Sstevel@tonic-gate * The messages for the UCU and WDU are enqueued and then pulled off
14030Sstevel@tonic-gate * the async queue via softint and syslogd starts to process them
14040Sstevel@tonic-gate * but doesn't get them to the console. The UE causes a panic, but
14050Sstevel@tonic-gate * since the UCU/WDU messages are already in transit, those aren't
14060Sstevel@tonic-gate * on the async queue. The hack is to check if we have a matching
14070Sstevel@tonic-gate * WDU event for the UCU, and if it matches, we're more than likely
14080Sstevel@tonic-gate * going to panic with a UE, unless we're under protection. So, we
14090Sstevel@tonic-gate * check to see if we got a matching WDU event and if we're under
14100Sstevel@tonic-gate * protection.
14110Sstevel@tonic-gate *
14120Sstevel@tonic-gate * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
14130Sstevel@tonic-gate * looks like this:
14140Sstevel@tonic-gate * UCU->WDU->UE
14150Sstevel@tonic-gate * For Panther, it could look like either of these:
14160Sstevel@tonic-gate * UCU---->WDU->L3_WDU->UE
14170Sstevel@tonic-gate * L3_UCU->WDU->L3_WDU->UE
14180Sstevel@tonic-gate */
14190Sstevel@tonic-gate if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
14200Sstevel@tonic-gate aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
14210Sstevel@tonic-gate curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
14220Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
14237058Skwmc if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
14247058Skwmc aflt->flt_panic |=
14257058Skwmc ((cpu_error_regs.afsr & C_AFSR_WDU) &&
14267058Skwmc (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
14277058Skwmc (cpu_error_regs.afar == t_afar));
14287058Skwmc aflt->flt_panic |= ((clop == NULL) &&
14297058Skwmc (t_afsr_errs & C_AFSR_WDU) &&
14307058Skwmc (t_afsr_errs & C_AFSR_L3_WDU));
14317058Skwmc } else {
14327058Skwmc aflt->flt_panic |=
14337058Skwmc ((cpu_error_regs.afsr & C_AFSR_WDU) &&
14347058Skwmc (cpu_error_regs.afar == t_afar));
14357058Skwmc aflt->flt_panic |= ((clop == NULL) &&
14367058Skwmc (t_afsr_errs & C_AFSR_WDU));
14377058Skwmc }
14380Sstevel@tonic-gate }
14390Sstevel@tonic-gate
14400Sstevel@tonic-gate /*
14410Sstevel@tonic-gate * Queue events on the async event queue, one event per error bit.
14420Sstevel@tonic-gate * If no events are queued or no Fast ECC events are on in the AFSR,
14430Sstevel@tonic-gate * queue an event to complain.
14440Sstevel@tonic-gate */
14450Sstevel@tonic-gate if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
14460Sstevel@tonic-gate ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
14470Sstevel@tonic-gate ch_flt.flt_type = CPU_INV_AFSR;
14480Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
14490Sstevel@tonic-gate (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
14500Sstevel@tonic-gate aflt->flt_panic);
14510Sstevel@tonic-gate }
14520Sstevel@tonic-gate
14530Sstevel@tonic-gate /*
14540Sstevel@tonic-gate * Zero out + invalidate CPU logout.
14550Sstevel@tonic-gate */
14560Sstevel@tonic-gate if (clop) {
14570Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t));
14580Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID;
14590Sstevel@tonic-gate }
14600Sstevel@tonic-gate
14610Sstevel@tonic-gate /*
14620Sstevel@tonic-gate * We carefully re-enable NCEEN and CEEN and then check if any deferred
14630Sstevel@tonic-gate * or disrupting errors have happened. We do this because if a
14640Sstevel@tonic-gate * deferred or disrupting error had occurred with NCEEN/CEEN off, the
14650Sstevel@tonic-gate * trap will not be taken when NCEEN/CEEN is re-enabled. Note that
14660Sstevel@tonic-gate * CEEN works differently on Cheetah than on Spitfire. Also, we enable
14670Sstevel@tonic-gate * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
14680Sstevel@tonic-gate * deferred or disrupting error happening between checking the AFSR and
14690Sstevel@tonic-gate * enabling NCEEN/CEEN.
14700Sstevel@tonic-gate *
1471815Sdilpreet * Note: CEEN and NCEEN are only reenabled if they were on when trap
1472815Sdilpreet * taken.
1473815Sdilpreet */
1474815Sdilpreet set_error_enable(get_error_enable() | (nceen | ceen));
14750Sstevel@tonic-gate if (clear_errors(&ch_flt)) {
14760Sstevel@tonic-gate aflt->flt_panic |= ((ch_flt.afsr_errs &
14770Sstevel@tonic-gate (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
14780Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
14790Sstevel@tonic-gate NULL);
14800Sstevel@tonic-gate }
14810Sstevel@tonic-gate
14820Sstevel@tonic-gate /*
14830Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will
14840Sstevel@tonic-gate * be logged as part of the panic flow.
14850Sstevel@tonic-gate */
14860Sstevel@tonic-gate if (aflt->flt_panic)
14870Sstevel@tonic-gate fm_panic("%sError(s)", pr_reason);
14880Sstevel@tonic-gate
14890Sstevel@tonic-gate /*
14900Sstevel@tonic-gate * Flushing the Ecache here gets the part of the trap handler that
14910Sstevel@tonic-gate * is run at TL=1 out of the Ecache.
14920Sstevel@tonic-gate */
14930Sstevel@tonic-gate cpu_flush_ecache();
14940Sstevel@tonic-gate }
14950Sstevel@tonic-gate
14960Sstevel@tonic-gate /*
14970Sstevel@tonic-gate * This is called via sys_trap from pil15_interrupt code if the
14980Sstevel@tonic-gate * corresponding entry in ch_err_tl1_pending is set. Checks the
14990Sstevel@tonic-gate * various ch_err_tl1_data structures for valid entries based on the bit
15000Sstevel@tonic-gate * settings in the ch_err_tl1_flags entry of the structure.
15010Sstevel@tonic-gate */
15020Sstevel@tonic-gate /*ARGSUSED*/
15030Sstevel@tonic-gate void
cpu_tl1_error(struct regs * rp,int panic)15040Sstevel@tonic-gate cpu_tl1_error(struct regs *rp, int panic)
15050Sstevel@tonic-gate {
15060Sstevel@tonic-gate ch_err_tl1_data_t *cl1p, cl1;
15070Sstevel@tonic-gate int i, ncl1ps;
15080Sstevel@tonic-gate uint64_t me_flags;
1509815Sdilpreet uint64_t ceen, nceen;
15100Sstevel@tonic-gate
15110Sstevel@tonic-gate if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
15120Sstevel@tonic-gate cl1p = &ch_err_tl1_data;
15130Sstevel@tonic-gate ncl1ps = 1;
15140Sstevel@tonic-gate } else if (CPU_PRIVATE(CPU) != NULL) {
15150Sstevel@tonic-gate cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
15160Sstevel@tonic-gate ncl1ps = CH_ERR_TL1_TLMAX;
15170Sstevel@tonic-gate } else {
15180Sstevel@tonic-gate ncl1ps = 0;
15190Sstevel@tonic-gate }
15200Sstevel@tonic-gate
15210Sstevel@tonic-gate for (i = 0; i < ncl1ps; i++, cl1p++) {
15220Sstevel@tonic-gate if (cl1p->ch_err_tl1_flags == 0)
15230Sstevel@tonic-gate continue;
15240Sstevel@tonic-gate
15250Sstevel@tonic-gate /*
15260Sstevel@tonic-gate * Grab a copy of the logout data and invalidate
15270Sstevel@tonic-gate * the logout area.
15280Sstevel@tonic-gate */
15290Sstevel@tonic-gate cl1 = *cl1p;
15300Sstevel@tonic-gate bzero(cl1p, sizeof (ch_err_tl1_data_t));
15310Sstevel@tonic-gate cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
15320Sstevel@tonic-gate me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
15330Sstevel@tonic-gate
15340Sstevel@tonic-gate /*
15350Sstevel@tonic-gate * Log "first error" in ch_err_tl1_data.
15360Sstevel@tonic-gate */
15370Sstevel@tonic-gate if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
15380Sstevel@tonic-gate ceen = get_error_enable() & EN_REG_CEEN;
1539815Sdilpreet nceen = get_error_enable() & EN_REG_NCEEN;
15400Sstevel@tonic-gate cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1541815Sdilpreet 1, ceen, nceen, &cl1.ch_err_tl1_logout);
15420Sstevel@tonic-gate }
15430Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
15440Sstevel@tonic-gate if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
15450Sstevel@tonic-gate cpu_parity_error(rp, cl1.ch_err_tl1_flags,
15460Sstevel@tonic-gate (caddr_t)cl1.ch_err_tl1_tpc);
15470Sstevel@tonic-gate }
15480Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
15490Sstevel@tonic-gate
15500Sstevel@tonic-gate /*
15510Sstevel@tonic-gate * Log "multiple events" in ch_err_tl1_data. Note that
15520Sstevel@tonic-gate * we don't read and clear the AFSR/AFAR in the TL>0 code
15530Sstevel@tonic-gate * if the structure is busy, we just do the cache flushing
15540Sstevel@tonic-gate * we have to do and then do the retry. So the AFSR/AFAR
15550Sstevel@tonic-gate * at this point *should* have some relevant info. If there
15560Sstevel@tonic-gate * are no valid errors in the AFSR, we'll assume they've
15570Sstevel@tonic-gate * already been picked up and logged. For I$/D$ parity,
15580Sstevel@tonic-gate * we just log an event with an "Unknown" (NULL) TPC.
15590Sstevel@tonic-gate */
15600Sstevel@tonic-gate if (me_flags & CH_ERR_FECC) {
15610Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
15620Sstevel@tonic-gate uint64_t t_afsr_errs;
15630Sstevel@tonic-gate
15640Sstevel@tonic-gate /*
15650Sstevel@tonic-gate * Get the error registers and see if there's
15660Sstevel@tonic-gate * a pending error. If not, don't bother
15670Sstevel@tonic-gate * generating an "Invalid AFSR" error event.
15680Sstevel@tonic-gate */
15690Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
15700Sstevel@tonic-gate t_afsr_errs = (cpu_error_regs.afsr_ext &
15710Sstevel@tonic-gate C_AFSR_EXT_ALL_ERRS) |
15720Sstevel@tonic-gate (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
15730Sstevel@tonic-gate if (t_afsr_errs != 0) {
15740Sstevel@tonic-gate ceen = get_error_enable() & EN_REG_CEEN;
1575815Sdilpreet nceen = get_error_enable() & EN_REG_NCEEN;
15760Sstevel@tonic-gate cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1577815Sdilpreet 1, ceen, nceen, NULL);
15780Sstevel@tonic-gate }
15790Sstevel@tonic-gate }
15800Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
15810Sstevel@tonic-gate if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
15820Sstevel@tonic-gate cpu_parity_error(rp, me_flags, (caddr_t)NULL);
15830Sstevel@tonic-gate }
15840Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
15850Sstevel@tonic-gate }
15860Sstevel@tonic-gate }
15870Sstevel@tonic-gate
15880Sstevel@tonic-gate /*
15890Sstevel@tonic-gate * Called from Fast ECC TL>0 handler in case of fatal error.
15900Sstevel@tonic-gate * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
15910Sstevel@tonic-gate * but if we don't, we'll panic with something reasonable.
15920Sstevel@tonic-gate */
15930Sstevel@tonic-gate /*ARGSUSED*/
15940Sstevel@tonic-gate void
cpu_tl1_err_panic(struct regs * rp,ulong_t flags)15950Sstevel@tonic-gate cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
15960Sstevel@tonic-gate {
15970Sstevel@tonic-gate cpu_tl1_error(rp, 1);
15980Sstevel@tonic-gate /*
15990Sstevel@tonic-gate * Should never return, but just in case.
16000Sstevel@tonic-gate */
16010Sstevel@tonic-gate fm_panic("Unsurvivable ECC Error at TL>0");
16020Sstevel@tonic-gate }
16030Sstevel@tonic-gate
16040Sstevel@tonic-gate /*
16050Sstevel@tonic-gate * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
16060Sstevel@tonic-gate * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
16070Sstevel@tonic-gate * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
16080Sstevel@tonic-gate * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
16090Sstevel@tonic-gate *
16100Sstevel@tonic-gate * Cheetah+ also handles (No additional processing required):
16110Sstevel@tonic-gate * DUE, DTO, DBERR (NCEEN controlled)
16120Sstevel@tonic-gate * THCE (CEEN and ET_ECC_en controlled)
16130Sstevel@tonic-gate * TUE (ET_ECC_en controlled)
16140Sstevel@tonic-gate *
16150Sstevel@tonic-gate * Panther further adds:
16160Sstevel@tonic-gate * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
16170Sstevel@tonic-gate * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
16180Sstevel@tonic-gate * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
16190Sstevel@tonic-gate * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
16200Sstevel@tonic-gate * THCE (CEEN and L2_tag_ECC_en controlled)
16210Sstevel@tonic-gate * L3_THCE (CEEN and ET_ECC_en controlled)
16220Sstevel@tonic-gate *
16230Sstevel@tonic-gate * Note that the p_clo_flags input is only valid in cases where the
16240Sstevel@tonic-gate * cpu_private struct is not yet initialized (since that is the only
16250Sstevel@tonic-gate * time that information cannot be obtained from the logout struct.)
16260Sstevel@tonic-gate */
16270Sstevel@tonic-gate /*ARGSUSED*/
16280Sstevel@tonic-gate void
cpu_disrupting_error(struct regs * rp,ulong_t p_clo_flags)16290Sstevel@tonic-gate cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
16300Sstevel@tonic-gate {
16310Sstevel@tonic-gate struct async_flt *aflt;
16320Sstevel@tonic-gate ch_async_flt_t ch_flt;
16330Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING];
16340Sstevel@tonic-gate ch_cpu_logout_t *clop;
16350Sstevel@tonic-gate uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
16360Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
16370Sstevel@tonic-gate
16380Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
16390Sstevel@tonic-gate /*
16400Sstevel@tonic-gate * Get the CPU log out info. If we can't find our CPU private
16410Sstevel@tonic-gate * pointer, then we will have to make due without any detailed
16420Sstevel@tonic-gate * logout information.
16430Sstevel@tonic-gate */
16440Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) {
16450Sstevel@tonic-gate clop = NULL;
16460Sstevel@tonic-gate ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
16470Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
16480Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs);
16490Sstevel@tonic-gate t_afar = cpu_error_regs.afar;
16500Sstevel@tonic-gate t_afsr = cpu_error_regs.afsr;
16510Sstevel@tonic-gate t_afsr_ext = cpu_error_regs.afsr_ext;
16520Sstevel@tonic-gate #if defined(SERRANO)
16530Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2;
16540Sstevel@tonic-gate #endif /* SERRANO */
16550Sstevel@tonic-gate } else {
16560Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
16570Sstevel@tonic-gate t_afar = clop->clo_data.chd_afar;
16580Sstevel@tonic-gate t_afsr = clop->clo_data.chd_afsr;
16590Sstevel@tonic-gate t_afsr_ext = clop->clo_data.chd_afsr_ext;
16600Sstevel@tonic-gate #if defined(SERRANO)
16610Sstevel@tonic-gate ch_flt.afar2 = clop->clo_data.chd_afar2;
16620Sstevel@tonic-gate #endif /* SERRANO */
16630Sstevel@tonic-gate }
16640Sstevel@tonic-gate
16650Sstevel@tonic-gate /*
16660Sstevel@tonic-gate * In order to simplify code, we maintain this afsr_errs
16670Sstevel@tonic-gate * variable which holds the aggregate of AFSR and AFSR_EXT
16680Sstevel@tonic-gate * sticky bits.
16690Sstevel@tonic-gate */
16700Sstevel@tonic-gate t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
16710Sstevel@tonic-gate (t_afsr & C_AFSR_ALL_ERRS);
16720Sstevel@tonic-gate
16730Sstevel@tonic-gate pr_reason[0] = '\0';
16740Sstevel@tonic-gate /* Setup the async fault structure */
16750Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt;
16760Sstevel@tonic-gate ch_flt.afsr_ext = t_afsr_ext;
16770Sstevel@tonic-gate ch_flt.afsr_errs = t_afsr_errs;
16780Sstevel@tonic-gate aflt->flt_stat = t_afsr;
16790Sstevel@tonic-gate aflt->flt_addr = t_afar;
16800Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc;
16810Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
16820Sstevel@tonic-gate aflt->flt_tl = 0;
16830Sstevel@tonic-gate aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
16840Sstevel@tonic-gate
16850Sstevel@tonic-gate /*
16860Sstevel@tonic-gate * If this trap is a result of one of the errors not masked
16870Sstevel@tonic-gate * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
16880Sstevel@tonic-gate * indicate that a timeout is to be set later.
16890Sstevel@tonic-gate */
16900Sstevel@tonic-gate if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
16910Sstevel@tonic-gate !aflt->flt_panic)
16920Sstevel@tonic-gate ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
16930Sstevel@tonic-gate else
16940Sstevel@tonic-gate ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
16950Sstevel@tonic-gate
16960Sstevel@tonic-gate /*
16970Sstevel@tonic-gate * log the CE and clean up
16980Sstevel@tonic-gate */
16990Sstevel@tonic-gate cpu_log_and_clear_ce(&ch_flt);
17000Sstevel@tonic-gate
17010Sstevel@tonic-gate /*
17020Sstevel@tonic-gate * We re-enable CEEN (if required) and check if any disrupting errors
17030Sstevel@tonic-gate * have happened. We do this because if a disrupting error had occurred
17040Sstevel@tonic-gate * with CEEN off, the trap will not be taken when CEEN is re-enabled.
17050Sstevel@tonic-gate * Note that CEEN works differently on Cheetah than on Spitfire. Also,
17060Sstevel@tonic-gate * we enable CEEN *before* checking the AFSR to avoid the small window
17070Sstevel@tonic-gate * of a error happening between checking the AFSR and enabling CEEN.
17080Sstevel@tonic-gate */
17090Sstevel@tonic-gate if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
17105219Skm84432 set_error_enable(get_error_enable() | EN_REG_CEEN);
17110Sstevel@tonic-gate if (clear_errors(&ch_flt)) {
17120Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
17130Sstevel@tonic-gate NULL);
17140Sstevel@tonic-gate }
17150Sstevel@tonic-gate
17160Sstevel@tonic-gate /*
17170Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will
17180Sstevel@tonic-gate * be logged as part of the panic flow.
17190Sstevel@tonic-gate */
17200Sstevel@tonic-gate if (aflt->flt_panic)
17210Sstevel@tonic-gate fm_panic("%sError(s)", pr_reason);
17220Sstevel@tonic-gate }
17230Sstevel@tonic-gate
17240Sstevel@tonic-gate /*
17250Sstevel@tonic-gate * The async_err handler transfers control here for UE, EMU, EDU:BLD,
17260Sstevel@tonic-gate * L3_EDU:BLD, TO, and BERR events.
17270Sstevel@tonic-gate * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
17280Sstevel@tonic-gate *
17290Sstevel@tonic-gate * Cheetah+: No additional errors handled.
17300Sstevel@tonic-gate *
17310Sstevel@tonic-gate * Note that the p_clo_flags input is only valid in cases where the
17320Sstevel@tonic-gate * cpu_private struct is not yet initialized (since that is the only
17330Sstevel@tonic-gate * time that information cannot be obtained from the logout struct.)
17340Sstevel@tonic-gate */
17350Sstevel@tonic-gate /*ARGSUSED*/
17360Sstevel@tonic-gate void
cpu_deferred_error(struct regs * rp,ulong_t p_clo_flags)17370Sstevel@tonic-gate cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
17380Sstevel@tonic-gate {
17390Sstevel@tonic-gate ushort_t ttype, tl;
17400Sstevel@tonic-gate ch_async_flt_t ch_flt;
17410Sstevel@tonic-gate struct async_flt *aflt;
17420Sstevel@tonic-gate int trampolined = 0;
17430Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING];
17440Sstevel@tonic-gate ch_cpu_logout_t *clop;
17450Sstevel@tonic-gate uint64_t ceen, clo_flags;
17460Sstevel@tonic-gate uint64_t log_afsr;
17470Sstevel@tonic-gate uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
17480Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
17490Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED;
17500Sstevel@tonic-gate ddi_acc_hdl_t *hp;
17510Sstevel@tonic-gate
17520Sstevel@tonic-gate /*
17530Sstevel@tonic-gate * We need to look at p_flag to determine if the thread detected an
17540Sstevel@tonic-gate * error while dumping core. We can't grab p_lock here, but it's ok
17550Sstevel@tonic-gate * because we just need a consistent snapshot and we know that everyone
17560Sstevel@tonic-gate * else will store a consistent set of bits while holding p_lock. We
17570Sstevel@tonic-gate * don't have to worry about a race because SDOCORE is set once prior
17580Sstevel@tonic-gate * to doing i/o from the process's address space and is never cleared.
17590Sstevel@tonic-gate */
17600Sstevel@tonic-gate uint_t pflag = ttoproc(curthread)->p_flag;
17610Sstevel@tonic-gate
17620Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
17630Sstevel@tonic-gate /*
17640Sstevel@tonic-gate * Get the CPU log out info. If we can't find our CPU private
17650Sstevel@tonic-gate * pointer then we will have to make due without any detailed
17660Sstevel@tonic-gate * logout information.
17670Sstevel@tonic-gate */
17680Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) {
17690Sstevel@tonic-gate clop = NULL;
17700Sstevel@tonic-gate ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
17710Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
17720Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs);
17730Sstevel@tonic-gate t_afar = cpu_error_regs.afar;
17740Sstevel@tonic-gate t_afsr = cpu_error_regs.afsr;
17750Sstevel@tonic-gate t_afsr_ext = cpu_error_regs.afsr_ext;
17760Sstevel@tonic-gate #if defined(SERRANO)
17770Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2;
17780Sstevel@tonic-gate #endif /* SERRANO */
17790Sstevel@tonic-gate clo_flags = p_clo_flags;
17800Sstevel@tonic-gate } else {
17810Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
17820Sstevel@tonic-gate t_afar = clop->clo_data.chd_afar;
17830Sstevel@tonic-gate t_afsr = clop->clo_data.chd_afsr;
17840Sstevel@tonic-gate t_afsr_ext = clop->clo_data.chd_afsr_ext;
17850Sstevel@tonic-gate #if defined(SERRANO)
17860Sstevel@tonic-gate ch_flt.afar2 = clop->clo_data.chd_afar2;
17870Sstevel@tonic-gate #endif /* SERRANO */
17880Sstevel@tonic-gate clo_flags = clop->clo_flags;
17890Sstevel@tonic-gate }
17900Sstevel@tonic-gate
17910Sstevel@tonic-gate /*
17920Sstevel@tonic-gate * In order to simplify code, we maintain this afsr_errs
17930Sstevel@tonic-gate * variable which holds the aggregate of AFSR and AFSR_EXT
17940Sstevel@tonic-gate * sticky bits.
17950Sstevel@tonic-gate */
17960Sstevel@tonic-gate t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
17970Sstevel@tonic-gate (t_afsr & C_AFSR_ALL_ERRS);
17980Sstevel@tonic-gate pr_reason[0] = '\0';
17990Sstevel@tonic-gate
18000Sstevel@tonic-gate /*
18010Sstevel@tonic-gate * Grab information encoded into our clo_flags field.
18020Sstevel@tonic-gate */
18030Sstevel@tonic-gate ceen = clo_flags & EN_REG_CEEN;
18040Sstevel@tonic-gate tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
18050Sstevel@tonic-gate ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
18060Sstevel@tonic-gate
18070Sstevel@tonic-gate /*
18080Sstevel@tonic-gate * handle the specific error
18090Sstevel@tonic-gate */
18100Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt;
18110Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree();
18120Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
18130Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id;
18140Sstevel@tonic-gate ch_flt.afsr_ext = t_afsr_ext;
18150Sstevel@tonic-gate ch_flt.afsr_errs = t_afsr_errs;
18160Sstevel@tonic-gate aflt->flt_stat = t_afsr;
18170Sstevel@tonic-gate aflt->flt_addr = t_afar;
18180Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc;
18190Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
18200Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
18210Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
18220Sstevel@tonic-gate aflt->flt_tl = (uchar_t)tl;
18230Sstevel@tonic-gate aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
18240Sstevel@tonic-gate C_AFSR_PANIC(t_afsr_errs));
18250Sstevel@tonic-gate aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
18260Sstevel@tonic-gate aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
18270Sstevel@tonic-gate
18280Sstevel@tonic-gate /*
18290Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0, we need to check to
18300Sstevel@tonic-gate * see if we were executing in the kernel under on_trap() or t_lofault
18310Sstevel@tonic-gate * protection. If so, modify the saved registers so that we return
18320Sstevel@tonic-gate * from the trap to the appropriate trampoline routine.
18330Sstevel@tonic-gate */
18340Sstevel@tonic-gate if (aflt->flt_priv && tl == 0) {
18350Sstevel@tonic-gate if (curthread->t_ontrap != NULL) {
18360Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap;
18370Sstevel@tonic-gate
18380Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) {
18390Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC;
18400Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC;
18410Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline;
18420Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4;
18430Sstevel@tonic-gate trampolined = 1;
18440Sstevel@tonic-gate }
18450Sstevel@tonic-gate
18460Sstevel@tonic-gate if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
18470Sstevel@tonic-gate (otp->ot_prot & OT_DATA_ACCESS)) {
18480Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS;
18490Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS;
18500Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline;
18510Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4;
18520Sstevel@tonic-gate trampolined = 1;
18530Sstevel@tonic-gate /*
18540Sstevel@tonic-gate * for peeks and caut_gets errors are expected
18550Sstevel@tonic-gate */
18560Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle;
18570Sstevel@tonic-gate if (!hp)
18580Sstevel@tonic-gate expected = DDI_FM_ERR_PEEK;
18590Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access ==
18600Sstevel@tonic-gate DDI_CAUTIOUS_ACC)
18610Sstevel@tonic-gate expected = DDI_FM_ERR_EXPECTED;
18620Sstevel@tonic-gate }
18630Sstevel@tonic-gate
18640Sstevel@tonic-gate } else if (curthread->t_lofault) {
18650Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY;
18660Sstevel@tonic-gate rp->r_g1 = EFAULT;
18670Sstevel@tonic-gate rp->r_pc = curthread->t_lofault;
18680Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4;
18690Sstevel@tonic-gate trampolined = 1;
18700Sstevel@tonic-gate }
18710Sstevel@tonic-gate }
18720Sstevel@tonic-gate
18730Sstevel@tonic-gate /*
18740Sstevel@tonic-gate * If we're in user mode or we're doing a protected copy, we either
18750Sstevel@tonic-gate * want the ASTON code below to send a signal to the user process
18760Sstevel@tonic-gate * or we want to panic if aft_panic is set.
18770Sstevel@tonic-gate *
18780Sstevel@tonic-gate * If we're in privileged mode and we're not doing a copy, then we
18790Sstevel@tonic-gate * need to check if we've trampolined. If we haven't trampolined,
18800Sstevel@tonic-gate * we should panic.
18810Sstevel@tonic-gate */
18820Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
18830Sstevel@tonic-gate if (t_afsr_errs &
18840Sstevel@tonic-gate ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
18850Sstevel@tonic-gate ~(C_AFSR_BERR | C_AFSR_TO)))
18860Sstevel@tonic-gate aflt->flt_panic |= aft_panic;
18870Sstevel@tonic-gate } else if (!trampolined) {
18880Sstevel@tonic-gate aflt->flt_panic = 1;
18890Sstevel@tonic-gate }
18900Sstevel@tonic-gate
18910Sstevel@tonic-gate /*
18920Sstevel@tonic-gate * If we've trampolined due to a privileged TO or BERR, or if an
18930Sstevel@tonic-gate * unprivileged TO or BERR occurred, we don't want to enqueue an
18940Sstevel@tonic-gate * event for that TO or BERR. Queue all other events (if any) besides
18950Sstevel@tonic-gate * the TO/BERR. Since we may not be enqueing any events, we need to
18960Sstevel@tonic-gate * ignore the number of events queued. If we haven't trampolined due
18970Sstevel@tonic-gate * to a TO or BERR, just enqueue events normally.
18980Sstevel@tonic-gate */
18990Sstevel@tonic-gate log_afsr = t_afsr_errs;
19000Sstevel@tonic-gate if (trampolined) {
19010Sstevel@tonic-gate log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
19020Sstevel@tonic-gate } else if (!aflt->flt_priv) {
19030Sstevel@tonic-gate /*
19040Sstevel@tonic-gate * User mode, suppress messages if
19050Sstevel@tonic-gate * cpu_berr_to_verbose is not set.
19060Sstevel@tonic-gate */
19070Sstevel@tonic-gate if (!cpu_berr_to_verbose)
19080Sstevel@tonic-gate log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
19090Sstevel@tonic-gate }
19100Sstevel@tonic-gate
19110Sstevel@tonic-gate /*
19120Sstevel@tonic-gate * Log any errors that occurred
19130Sstevel@tonic-gate */
19140Sstevel@tonic-gate if (((log_afsr &
19155219Skm84432 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
19165219Skm84432 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
19175219Skm84432 (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
19180Sstevel@tonic-gate ch_flt.flt_type = CPU_INV_AFSR;
19190Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
19200Sstevel@tonic-gate (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
19210Sstevel@tonic-gate aflt->flt_panic);
19220Sstevel@tonic-gate }
19230Sstevel@tonic-gate
19240Sstevel@tonic-gate /*
19250Sstevel@tonic-gate * Zero out + invalidate CPU logout.
19260Sstevel@tonic-gate */
19270Sstevel@tonic-gate if (clop) {
19280Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t));
19290Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID;
19300Sstevel@tonic-gate }
19310Sstevel@tonic-gate
19320Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
19330Sstevel@tonic-gate /*
19340Sstevel@tonic-gate * UE/RUE/BERR/TO: Call our bus nexus friends to check for
19350Sstevel@tonic-gate * IO errors that may have resulted in this trap.
19360Sstevel@tonic-gate */
19370Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
19380Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected);
19390Sstevel@tonic-gate }
19400Sstevel@tonic-gate
19410Sstevel@tonic-gate /*
19420Sstevel@tonic-gate * UE/RUE: If UE or RUE is in memory, we need to flush the bad
19430Sstevel@tonic-gate * line from the Ecache. We also need to query the bus nexus for
19440Sstevel@tonic-gate * fatal errors. Attempts to do diagnostic read on caches may
19450Sstevel@tonic-gate * introduce more errors (especially when the module is bad).
19460Sstevel@tonic-gate */
19470Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
19480Sstevel@tonic-gate /*
19490Sstevel@tonic-gate * Ask our bus nexus friends if they have any fatal errors. If
19500Sstevel@tonic-gate * so, they will log appropriate error messages.
19510Sstevel@tonic-gate */
19520Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
19530Sstevel@tonic-gate aflt->flt_panic = 1;
19540Sstevel@tonic-gate
19550Sstevel@tonic-gate /*
19560Sstevel@tonic-gate * We got a UE or RUE and are panicking, save the fault PA in
19570Sstevel@tonic-gate * a known location so that the platform specific panic code
19580Sstevel@tonic-gate * can check for copyback errors.
19590Sstevel@tonic-gate */
19600Sstevel@tonic-gate if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
19610Sstevel@tonic-gate panic_aflt = *aflt;
19620Sstevel@tonic-gate }
19630Sstevel@tonic-gate }
19640Sstevel@tonic-gate
19650Sstevel@tonic-gate /*
19660Sstevel@tonic-gate * Flush Ecache line or entire Ecache
19670Sstevel@tonic-gate */
19680Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
19690Sstevel@tonic-gate cpu_error_ecache_flush(&ch_flt);
19700Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
19710Sstevel@tonic-gate /*
19720Sstevel@tonic-gate * UE/BERR/TO: Call our bus nexus friends to check for
19730Sstevel@tonic-gate * IO errors that may have resulted in this trap.
19740Sstevel@tonic-gate */
19750Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
19760Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected);
19770Sstevel@tonic-gate }
19780Sstevel@tonic-gate
19790Sstevel@tonic-gate /*
19800Sstevel@tonic-gate * UE: If the UE is in memory, we need to flush the bad
19810Sstevel@tonic-gate * line from the Ecache. We also need to query the bus nexus for
19820Sstevel@tonic-gate * fatal errors. Attempts to do diagnostic read on caches may
19830Sstevel@tonic-gate * introduce more errors (especially when the module is bad).
19840Sstevel@tonic-gate */
19850Sstevel@tonic-gate if (t_afsr & C_AFSR_UE) {
19860Sstevel@tonic-gate /*
19870Sstevel@tonic-gate * Ask our legacy bus nexus friends if they have any fatal
19880Sstevel@tonic-gate * errors. If so, they will log appropriate error messages.
19890Sstevel@tonic-gate */
19900Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
19910Sstevel@tonic-gate aflt->flt_panic = 1;
19920Sstevel@tonic-gate
19930Sstevel@tonic-gate /*
19940Sstevel@tonic-gate * We got a UE and are panicking, save the fault PA in a known
19950Sstevel@tonic-gate * location so that the platform specific panic code can check
19960Sstevel@tonic-gate * for copyback errors.
19970Sstevel@tonic-gate */
19980Sstevel@tonic-gate if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
19990Sstevel@tonic-gate panic_aflt = *aflt;
20000Sstevel@tonic-gate }
20010Sstevel@tonic-gate }
20020Sstevel@tonic-gate
20030Sstevel@tonic-gate /*
20040Sstevel@tonic-gate * Flush Ecache line or entire Ecache
20050Sstevel@tonic-gate */
20060Sstevel@tonic-gate if (t_afsr_errs &
20070Sstevel@tonic-gate (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
20080Sstevel@tonic-gate cpu_error_ecache_flush(&ch_flt);
20090Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
20100Sstevel@tonic-gate
20110Sstevel@tonic-gate /*
20120Sstevel@tonic-gate * We carefully re-enable NCEEN and CEEN and then check if any deferred
20130Sstevel@tonic-gate * or disrupting errors have happened. We do this because if a
20140Sstevel@tonic-gate * deferred or disrupting error had occurred with NCEEN/CEEN off, the
20150Sstevel@tonic-gate * trap will not be taken when NCEEN/CEEN is re-enabled. Note that
20160Sstevel@tonic-gate * CEEN works differently on Cheetah than on Spitfire. Also, we enable
20170Sstevel@tonic-gate * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
20180Sstevel@tonic-gate * deferred or disrupting error happening between checking the AFSR and
20190Sstevel@tonic-gate * enabling NCEEN/CEEN.
20200Sstevel@tonic-gate *
20210Sstevel@tonic-gate * Note: CEEN reenabled only if it was on when trap taken.
20220Sstevel@tonic-gate */
20230Sstevel@tonic-gate set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
20240Sstevel@tonic-gate if (clear_errors(&ch_flt)) {
20250Sstevel@tonic-gate /*
20260Sstevel@tonic-gate * Check for secondary errors, and avoid panicking if we
20270Sstevel@tonic-gate * have them
20280Sstevel@tonic-gate */
20290Sstevel@tonic-gate if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
20300Sstevel@tonic-gate t_afar) == 0) {
20310Sstevel@tonic-gate aflt->flt_panic |= ((ch_flt.afsr_errs &
20320Sstevel@tonic-gate (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
20330Sstevel@tonic-gate }
20340Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
20350Sstevel@tonic-gate NULL);
20360Sstevel@tonic-gate }
20370Sstevel@tonic-gate
20380Sstevel@tonic-gate /*
20390Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will
20400Sstevel@tonic-gate * be logged as part of the panic flow.
20410Sstevel@tonic-gate */
20420Sstevel@tonic-gate if (aflt->flt_panic)
20430Sstevel@tonic-gate fm_panic("%sError(s)", pr_reason);
20440Sstevel@tonic-gate
20450Sstevel@tonic-gate /*
20460Sstevel@tonic-gate * If we queued an error and we are going to return from the trap and
20470Sstevel@tonic-gate * the error was in user mode or inside of a copy routine, set AST flag
20480Sstevel@tonic-gate * so the queue will be drained before returning to user mode. The
20490Sstevel@tonic-gate * AST processing will also act on our failure policy.
20500Sstevel@tonic-gate */
20510Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
20520Sstevel@tonic-gate int pcb_flag = 0;
20530Sstevel@tonic-gate
20540Sstevel@tonic-gate if (t_afsr_errs &
20550Sstevel@tonic-gate (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
20560Sstevel@tonic-gate ~(C_AFSR_BERR | C_AFSR_TO)))
20570Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR;
20580Sstevel@tonic-gate
20590Sstevel@tonic-gate if (t_afsr & C_AFSR_BERR)
20600Sstevel@tonic-gate pcb_flag |= ASYNC_BERR;
20610Sstevel@tonic-gate
20620Sstevel@tonic-gate if (t_afsr & C_AFSR_TO)
20630Sstevel@tonic-gate pcb_flag |= ASYNC_BTO;
20640Sstevel@tonic-gate
20650Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
20660Sstevel@tonic-gate aston(curthread);
20670Sstevel@tonic-gate }
20680Sstevel@tonic-gate }
20690Sstevel@tonic-gate
20700Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
20710Sstevel@tonic-gate /*
20720Sstevel@tonic-gate * Handling of data and instruction parity errors (traps 0x71, 0x72).
20730Sstevel@tonic-gate *
20740Sstevel@tonic-gate * For Panther, P$ data parity errors during floating point load hits
20750Sstevel@tonic-gate * are also detected (reported as TT 0x71) and handled by this trap
20760Sstevel@tonic-gate * handler.
20770Sstevel@tonic-gate *
20780Sstevel@tonic-gate * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
20790Sstevel@tonic-gate * is available.
20800Sstevel@tonic-gate */
20810Sstevel@tonic-gate /*ARGSUSED*/
20820Sstevel@tonic-gate void
cpu_parity_error(struct regs * rp,uint_t flags,caddr_t tpc)20830Sstevel@tonic-gate cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
20840Sstevel@tonic-gate {
20850Sstevel@tonic-gate ch_async_flt_t ch_flt;
20860Sstevel@tonic-gate struct async_flt *aflt;
20870Sstevel@tonic-gate uchar_t tl = ((flags & CH_ERR_TL) != 0);
20880Sstevel@tonic-gate uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
20890Sstevel@tonic-gate uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
20900Sstevel@tonic-gate char *error_class;
2091*11991SChristopher.Baumbauer@Sun.COM int index, way, word;
2092*11991SChristopher.Baumbauer@Sun.COM ch_dc_data_t tmp_dcp;
2093*11991SChristopher.Baumbauer@Sun.COM int dc_set_size = dcache_size / CH_DCACHE_NWAY;
2094*11991SChristopher.Baumbauer@Sun.COM uint64_t parity_bits, pbits;
2095*11991SChristopher.Baumbauer@Sun.COM /* The parity bit array corresponds to the result of summing two bits */
2096*11991SChristopher.Baumbauer@Sun.COM static int parity_bits_popc[] = { 0, 1, 1, 0 };
20970Sstevel@tonic-gate
20980Sstevel@tonic-gate /*
20990Sstevel@tonic-gate * Log the error.
21000Sstevel@tonic-gate * For icache parity errors the fault address is the trap PC.
21010Sstevel@tonic-gate * For dcache/pcache parity errors the instruction would have to
21020Sstevel@tonic-gate * be decoded to determine the address and that isn't possible
21030Sstevel@tonic-gate * at high PIL.
21040Sstevel@tonic-gate */
21050Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
21060Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt;
21070Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree();
21080Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
21090Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id;
21100Sstevel@tonic-gate aflt->flt_pc = tpc;
21110Sstevel@tonic-gate aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
21120Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
21130Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
21140Sstevel@tonic-gate aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0;
21150Sstevel@tonic-gate aflt->flt_tl = tl;
21160Sstevel@tonic-gate aflt->flt_panic = panic;
21170Sstevel@tonic-gate aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
21180Sstevel@tonic-gate ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
21190Sstevel@tonic-gate
21200Sstevel@tonic-gate if (iparity) {
21210Sstevel@tonic-gate cpu_icache_parity_info(&ch_flt);
21220Sstevel@tonic-gate if (ch_flt.parity_data.ipe.cpl_off != -1)
21230Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_IDSPE;
21240Sstevel@tonic-gate else if (ch_flt.parity_data.ipe.cpl_way != -1)
21250Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_ITSPE;
21260Sstevel@tonic-gate else
21270Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_IPE;
21280Sstevel@tonic-gate aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
21290Sstevel@tonic-gate } else {
21300Sstevel@tonic-gate cpu_dcache_parity_info(&ch_flt);
2131*11991SChristopher.Baumbauer@Sun.COM if (ch_flt.parity_data.dpe.cpl_off != -1) {
2132*11991SChristopher.Baumbauer@Sun.COM /*
2133*11991SChristopher.Baumbauer@Sun.COM * If not at TL 0 and running on a Jalapeno processor,
2134*11991SChristopher.Baumbauer@Sun.COM * then process as a true ddspe. A true
2135*11991SChristopher.Baumbauer@Sun.COM * ddspe error can only occur if the way == 0
2136*11991SChristopher.Baumbauer@Sun.COM */
2137*11991SChristopher.Baumbauer@Sun.COM way = ch_flt.parity_data.dpe.cpl_way;
2138*11991SChristopher.Baumbauer@Sun.COM if ((tl == 0) && (way != 0) &&
2139*11991SChristopher.Baumbauer@Sun.COM IS_JALAPENO(cpunodes[CPU->cpu_id].implementation)) {
2140*11991SChristopher.Baumbauer@Sun.COM for (index = 0; index < dc_set_size;
2141*11991SChristopher.Baumbauer@Sun.COM index += dcache_linesize) {
2142*11991SChristopher.Baumbauer@Sun.COM get_dcache_dtag(index + way *
2143*11991SChristopher.Baumbauer@Sun.COM dc_set_size,
2144*11991SChristopher.Baumbauer@Sun.COM (uint64_t *)&tmp_dcp);
2145*11991SChristopher.Baumbauer@Sun.COM /*
2146*11991SChristopher.Baumbauer@Sun.COM * Check data array for even parity.
2147*11991SChristopher.Baumbauer@Sun.COM * The 8 parity bits are grouped into
2148*11991SChristopher.Baumbauer@Sun.COM * 4 pairs each of which covers a 64-bit
2149*11991SChristopher.Baumbauer@Sun.COM * word. The endianness is reversed
2150*11991SChristopher.Baumbauer@Sun.COM * -- the low-order parity bits cover
2151*11991SChristopher.Baumbauer@Sun.COM * the high-order data words.
2152*11991SChristopher.Baumbauer@Sun.COM */
2153*11991SChristopher.Baumbauer@Sun.COM parity_bits = tmp_dcp.dc_utag >> 8;
2154*11991SChristopher.Baumbauer@Sun.COM for (word = 0; word < 4; word++) {
2155*11991SChristopher.Baumbauer@Sun.COM pbits = (parity_bits >>
2156*11991SChristopher.Baumbauer@Sun.COM (6 - word * 2)) & 3;
2157*11991SChristopher.Baumbauer@Sun.COM if (((popc64(
2158*11991SChristopher.Baumbauer@Sun.COM tmp_dcp.dc_data[word]) +
2159*11991SChristopher.Baumbauer@Sun.COM parity_bits_popc[pbits]) &
2160*11991SChristopher.Baumbauer@Sun.COM 1) && (tmp_dcp.dc_tag &
2161*11991SChristopher.Baumbauer@Sun.COM VA13)) {
2162*11991SChristopher.Baumbauer@Sun.COM /* cleanup */
2163*11991SChristopher.Baumbauer@Sun.COM correct_dcache_parity(
2164*11991SChristopher.Baumbauer@Sun.COM dcache_size,
2165*11991SChristopher.Baumbauer@Sun.COM dcache_linesize);
2166*11991SChristopher.Baumbauer@Sun.COM if (cache_boot_state &
2167*11991SChristopher.Baumbauer@Sun.COM DCU_DC) {
2168*11991SChristopher.Baumbauer@Sun.COM flush_dcache();
2169*11991SChristopher.Baumbauer@Sun.COM }
2170*11991SChristopher.Baumbauer@Sun.COM
2171*11991SChristopher.Baumbauer@Sun.COM set_dcu(get_dcu() |
2172*11991SChristopher.Baumbauer@Sun.COM cache_boot_state);
2173*11991SChristopher.Baumbauer@Sun.COM return;
2174*11991SChristopher.Baumbauer@Sun.COM }
2175*11991SChristopher.Baumbauer@Sun.COM }
2176*11991SChristopher.Baumbauer@Sun.COM }
2177*11991SChristopher.Baumbauer@Sun.COM } /* (tl == 0) && (way != 0) && IS JALAPENO */
21780Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_DDSPE;
2179*11991SChristopher.Baumbauer@Sun.COM } else if (ch_flt.parity_data.dpe.cpl_way != -1)
21800Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_DTSPE;
21810Sstevel@tonic-gate else
21820Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_DPE;
21830Sstevel@tonic-gate aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
21840Sstevel@tonic-gate /*
21850Sstevel@tonic-gate * For panther we also need to check the P$ for parity errors.
21860Sstevel@tonic-gate */
21870Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
21880Sstevel@tonic-gate cpu_pcache_parity_info(&ch_flt);
21890Sstevel@tonic-gate if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
21900Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_PDSPE;
21910Sstevel@tonic-gate aflt->flt_payload =
21920Sstevel@tonic-gate FM_EREPORT_PAYLOAD_PCACHE_PE;
21930Sstevel@tonic-gate }
21940Sstevel@tonic-gate }
21950Sstevel@tonic-gate }
21960Sstevel@tonic-gate
21970Sstevel@tonic-gate cpu_errorq_dispatch(error_class, (void *)&ch_flt,
21980Sstevel@tonic-gate sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
21990Sstevel@tonic-gate
22000Sstevel@tonic-gate if (iparity) {
22010Sstevel@tonic-gate /*
22020Sstevel@tonic-gate * Invalidate entire I$.
22030Sstevel@tonic-gate * This is required due to the use of diagnostic ASI
22040Sstevel@tonic-gate * accesses that may result in a loss of I$ coherency.
22050Sstevel@tonic-gate */
22060Sstevel@tonic-gate if (cache_boot_state & DCU_IC) {
22070Sstevel@tonic-gate flush_icache();
22080Sstevel@tonic-gate }
22090Sstevel@tonic-gate /*
22100Sstevel@tonic-gate * According to section P.3.1 of the Panther PRM, we
22110Sstevel@tonic-gate * need to do a little more for recovery on those
22120Sstevel@tonic-gate * CPUs after encountering an I$ parity error.
22130Sstevel@tonic-gate */
22140Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
22150Sstevel@tonic-gate flush_ipb();
22160Sstevel@tonic-gate correct_dcache_parity(dcache_size,
22170Sstevel@tonic-gate dcache_linesize);
22180Sstevel@tonic-gate flush_pcache();
22190Sstevel@tonic-gate }
22200Sstevel@tonic-gate } else {
22210Sstevel@tonic-gate /*
22220Sstevel@tonic-gate * Since the valid bit is ignored when checking parity the
22230Sstevel@tonic-gate * D$ data and tag must also be corrected. Set D$ data bits
22240Sstevel@tonic-gate * to zero and set utag to 0, 1, 2, 3.
22250Sstevel@tonic-gate */
22260Sstevel@tonic-gate correct_dcache_parity(dcache_size, dcache_linesize);
22270Sstevel@tonic-gate
22280Sstevel@tonic-gate /*
22290Sstevel@tonic-gate * According to section P.3.3 of the Panther PRM, we
22300Sstevel@tonic-gate * need to do a little more for recovery on those
22310Sstevel@tonic-gate * CPUs after encountering a D$ or P$ parity error.
22320Sstevel@tonic-gate *
22330Sstevel@tonic-gate * As far as clearing P$ parity errors, it is enough to
22340Sstevel@tonic-gate * simply invalidate all entries in the P$ since P$ parity
22350Sstevel@tonic-gate * error traps are only generated for floating point load
22360Sstevel@tonic-gate * hits.
22370Sstevel@tonic-gate */
22380Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
22390Sstevel@tonic-gate flush_icache();
22400Sstevel@tonic-gate flush_ipb();
22410Sstevel@tonic-gate flush_pcache();
22420Sstevel@tonic-gate }
22430Sstevel@tonic-gate }
22440Sstevel@tonic-gate
22450Sstevel@tonic-gate /*
22460Sstevel@tonic-gate * Invalidate entire D$ if it was enabled.
22470Sstevel@tonic-gate * This is done to avoid stale data in the D$ which might
22480Sstevel@tonic-gate * occur with the D$ disabled and the trap handler doing
22490Sstevel@tonic-gate * stores affecting lines already in the D$.
22500Sstevel@tonic-gate */
22510Sstevel@tonic-gate if (cache_boot_state & DCU_DC) {
22520Sstevel@tonic-gate flush_dcache();
22530Sstevel@tonic-gate }
22540Sstevel@tonic-gate
22550Sstevel@tonic-gate /*
22560Sstevel@tonic-gate * Restore caches to their bootup state.
22570Sstevel@tonic-gate */
22580Sstevel@tonic-gate set_dcu(get_dcu() | cache_boot_state);
22590Sstevel@tonic-gate
22600Sstevel@tonic-gate /*
22610Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will
22620Sstevel@tonic-gate * be logged as part of the panic flow.
22630Sstevel@tonic-gate */
22640Sstevel@tonic-gate if (aflt->flt_panic)
22650Sstevel@tonic-gate fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
22660Sstevel@tonic-gate
22670Sstevel@tonic-gate /*
22680Sstevel@tonic-gate * If this error occurred at TL>0 then flush the E$ here to reduce
22690Sstevel@tonic-gate * the chance of getting an unrecoverable Fast ECC error. This
22700Sstevel@tonic-gate * flush will evict the part of the parity trap handler that is run
22710Sstevel@tonic-gate * at TL>1.
22720Sstevel@tonic-gate */
22730Sstevel@tonic-gate if (tl) {
22740Sstevel@tonic-gate cpu_flush_ecache();
22750Sstevel@tonic-gate }
22760Sstevel@tonic-gate }
22770Sstevel@tonic-gate
22780Sstevel@tonic-gate /*
22790Sstevel@tonic-gate * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
22800Sstevel@tonic-gate * to indicate which portions of the captured data should be in the ereport.
22810Sstevel@tonic-gate */
22820Sstevel@tonic-gate void
cpu_async_log_ic_parity_err(ch_async_flt_t * ch_flt)22830Sstevel@tonic-gate cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
22840Sstevel@tonic-gate {
22850Sstevel@tonic-gate int way = ch_flt->parity_data.ipe.cpl_way;
22860Sstevel@tonic-gate int offset = ch_flt->parity_data.ipe.cpl_off;
22870Sstevel@tonic-gate int tag_index;
22880Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
22890Sstevel@tonic-gate
22900Sstevel@tonic-gate
22910Sstevel@tonic-gate if ((offset != -1) || (way != -1)) {
22920Sstevel@tonic-gate /*
22930Sstevel@tonic-gate * Parity error in I$ tag or data
22940Sstevel@tonic-gate */
22950Sstevel@tonic-gate tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
22960Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
22970Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
22980Sstevel@tonic-gate PN_ICIDX_TO_WAY(tag_index);
22990Sstevel@tonic-gate else
23000Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
23010Sstevel@tonic-gate CH_ICIDX_TO_WAY(tag_index);
23020Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
23030Sstevel@tonic-gate IC_LOGFLAG_MAGIC;
23040Sstevel@tonic-gate } else {
23050Sstevel@tonic-gate /*
23060Sstevel@tonic-gate * Parity error was not identified.
23070Sstevel@tonic-gate * Log tags and data for all ways.
23080Sstevel@tonic-gate */
23090Sstevel@tonic-gate for (way = 0; way < CH_ICACHE_NWAY; way++) {
23100Sstevel@tonic-gate tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
23110Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
23120Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
23130Sstevel@tonic-gate PN_ICIDX_TO_WAY(tag_index);
23140Sstevel@tonic-gate else
23150Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
23160Sstevel@tonic-gate CH_ICIDX_TO_WAY(tag_index);
23170Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
23180Sstevel@tonic-gate IC_LOGFLAG_MAGIC;
23190Sstevel@tonic-gate }
23200Sstevel@tonic-gate }
23210Sstevel@tonic-gate }
23220Sstevel@tonic-gate
23230Sstevel@tonic-gate /*
23240Sstevel@tonic-gate * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
23250Sstevel@tonic-gate * to indicate which portions of the captured data should be in the ereport.
23260Sstevel@tonic-gate */
23270Sstevel@tonic-gate void
cpu_async_log_dc_parity_err(ch_async_flt_t * ch_flt)23280Sstevel@tonic-gate cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
23290Sstevel@tonic-gate {
23300Sstevel@tonic-gate int way = ch_flt->parity_data.dpe.cpl_way;
23310Sstevel@tonic-gate int offset = ch_flt->parity_data.dpe.cpl_off;
23320Sstevel@tonic-gate int tag_index;
23330Sstevel@tonic-gate
23340Sstevel@tonic-gate if (offset != -1) {
23350Sstevel@tonic-gate /*
23360Sstevel@tonic-gate * Parity error in D$ or P$ data array.
23370Sstevel@tonic-gate *
23380Sstevel@tonic-gate * First check to see whether the parity error is in D$ or P$
23390Sstevel@tonic-gate * since P$ data parity errors are reported in Panther using
23400Sstevel@tonic-gate * the same trap.
23410Sstevel@tonic-gate */
23420Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
23430Sstevel@tonic-gate tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
23440Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
23450Sstevel@tonic-gate CH_PCIDX_TO_WAY(tag_index);
23460Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
23470Sstevel@tonic-gate PC_LOGFLAG_MAGIC;
23480Sstevel@tonic-gate } else {
23490Sstevel@tonic-gate tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
23500Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
23510Sstevel@tonic-gate CH_DCIDX_TO_WAY(tag_index);
23520Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
23530Sstevel@tonic-gate DC_LOGFLAG_MAGIC;
23540Sstevel@tonic-gate }
23550Sstevel@tonic-gate } else if (way != -1) {
23560Sstevel@tonic-gate /*
23570Sstevel@tonic-gate * Parity error in D$ tag.
23580Sstevel@tonic-gate */
23590Sstevel@tonic-gate tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
23600Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
23610Sstevel@tonic-gate CH_DCIDX_TO_WAY(tag_index);
23620Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
23630Sstevel@tonic-gate DC_LOGFLAG_MAGIC;
23640Sstevel@tonic-gate }
23650Sstevel@tonic-gate }
23660Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
23670Sstevel@tonic-gate
23680Sstevel@tonic-gate /*
23690Sstevel@tonic-gate * The cpu_async_log_err() function is called via the [uc]e_drain() function to
23700Sstevel@tonic-gate * post-process CPU events that are dequeued. As such, it can be invoked
23710Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the
23720Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and take appropriate actions.
23730Sstevel@tonic-gate * Historically this entry point was used to log the actual cmn_err(9F) text;
23740Sstevel@tonic-gate * now with FMA it is used to prepare 'flt' to be converted into an ereport.
23750Sstevel@tonic-gate * With FMA this function now also returns a flag which indicates to the
23760Sstevel@tonic-gate * caller whether the ereport should be posted (1) or suppressed (0).
23770Sstevel@tonic-gate */
23780Sstevel@tonic-gate static int
cpu_async_log_err(void * flt,errorq_elem_t * eqep)23790Sstevel@tonic-gate cpu_async_log_err(void *flt, errorq_elem_t *eqep)
23800Sstevel@tonic-gate {
23810Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
23820Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)flt;
2383917Selowe uint64_t errors;
23842895Svb70745 extern void memscrub_induced_error(void);
23850Sstevel@tonic-gate
23860Sstevel@tonic-gate switch (ch_flt->flt_type) {
23870Sstevel@tonic-gate case CPU_INV_AFSR:
23880Sstevel@tonic-gate /*
23890Sstevel@tonic-gate * If it is a disrupting trap and the AFSR is zero, then
23900Sstevel@tonic-gate * the event has probably already been noted. Do not post
23910Sstevel@tonic-gate * an ereport.
23920Sstevel@tonic-gate */
23930Sstevel@tonic-gate if ((aflt->flt_status & ECC_C_TRAP) &&
23940Sstevel@tonic-gate (!(aflt->flt_stat & C_AFSR_MASK)))
23950Sstevel@tonic-gate return (0);
23960Sstevel@tonic-gate else
23970Sstevel@tonic-gate return (1);
23980Sstevel@tonic-gate case CPU_TO:
23990Sstevel@tonic-gate case CPU_BERR:
24000Sstevel@tonic-gate case CPU_FATAL:
24010Sstevel@tonic-gate case CPU_FPUERR:
24020Sstevel@tonic-gate return (1);
24030Sstevel@tonic-gate
24040Sstevel@tonic-gate case CPU_UE_ECACHE_RETIRE:
24050Sstevel@tonic-gate cpu_log_err(aflt);
24060Sstevel@tonic-gate cpu_page_retire(ch_flt);
24070Sstevel@tonic-gate return (1);
24080Sstevel@tonic-gate
24090Sstevel@tonic-gate /*
24100Sstevel@tonic-gate * Cases where we may want to suppress logging or perform
24110Sstevel@tonic-gate * extended diagnostics.
24120Sstevel@tonic-gate */
24130Sstevel@tonic-gate case CPU_CE:
24140Sstevel@tonic-gate case CPU_EMC:
24150Sstevel@tonic-gate /*
24160Sstevel@tonic-gate * We want to skip logging and further classification
24170Sstevel@tonic-gate * only if ALL the following conditions are true:
24180Sstevel@tonic-gate *
24190Sstevel@tonic-gate * 1. There is only one error
24200Sstevel@tonic-gate * 2. That error is a correctable memory error
24210Sstevel@tonic-gate * 3. The error is caused by the memory scrubber (in
24220Sstevel@tonic-gate * which case the error will have occurred under
24230Sstevel@tonic-gate * on_trap protection)
24240Sstevel@tonic-gate * 4. The error is on a retired page
24250Sstevel@tonic-gate *
24260Sstevel@tonic-gate * Note: AFLT_PROT_EC is used places other than the memory
24270Sstevel@tonic-gate * scrubber. However, none of those errors should occur
24280Sstevel@tonic-gate * on a retired page.
24290Sstevel@tonic-gate */
24300Sstevel@tonic-gate if ((ch_flt->afsr_errs &
24310Sstevel@tonic-gate (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
24320Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) {
24330Sstevel@tonic-gate
2434917Selowe if (page_retire_check(aflt->flt_addr, NULL) == 0) {
24355219Skm84432 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
24360Sstevel@tonic-gate
24370Sstevel@tonic-gate /*
24380Sstevel@tonic-gate * Since we're skipping logging, we'll need
24390Sstevel@tonic-gate * to schedule the re-enabling of CEEN
24400Sstevel@tonic-gate */
24410Sstevel@tonic-gate (void) timeout(cpu_delayed_check_ce_errors,
2442946Smathue (void *)(uintptr_t)aflt->flt_inst,
2443946Smathue drv_usectohz((clock_t)cpu_ceen_delay_secs
24445219Skm84432 * MICROSEC));
24455219Skm84432 }
24465219Skm84432
24472895Svb70745 /*
24482895Svb70745 * Inform memscrubber - scrubbing induced
24492895Svb70745 * CE on a retired page.
24502895Svb70745 */
24512895Svb70745 memscrub_induced_error();
24522895Svb70745 return (0);
24530Sstevel@tonic-gate }
24540Sstevel@tonic-gate }
24550Sstevel@tonic-gate
24560Sstevel@tonic-gate /*
24570Sstevel@tonic-gate * Perform/schedule further classification actions, but
24580Sstevel@tonic-gate * only if the page is healthy (we don't want bad
24590Sstevel@tonic-gate * pages inducing too much diagnostic activity). If we could
24600Sstevel@tonic-gate * not find a page pointer then we also skip this. If
24610Sstevel@tonic-gate * ce_scrub_xdiag_recirc returns nonzero then it has chosen
24620Sstevel@tonic-gate * to copy and recirculate the event (for further diagnostics)
24630Sstevel@tonic-gate * and we should not proceed to log it here.
24640Sstevel@tonic-gate *
24650Sstevel@tonic-gate * This must be the last step here before the cpu_log_err()
24660Sstevel@tonic-gate * below - if an event recirculates cpu_ce_log_err() will
24670Sstevel@tonic-gate * not call the current function but just proceed directly
24680Sstevel@tonic-gate * to cpu_ereport_post after the cpu_log_err() avoided below.
24690Sstevel@tonic-gate *
24700Sstevel@tonic-gate * Note: Check cpu_impl_async_log_err if changing this
24710Sstevel@tonic-gate */
2472917Selowe if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2473917Selowe CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2474917Selowe CE_XDIAG_SKIP_NOPP);
2475917Selowe } else {
2476917Selowe if (errors != PR_OK) {
24770Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
24780Sstevel@tonic-gate CE_XDIAG_SKIP_PAGEDET);
24790Sstevel@tonic-gate } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
24800Sstevel@tonic-gate offsetof(ch_async_flt_t, cmn_asyncflt))) {
24810Sstevel@tonic-gate return (0);
24820Sstevel@tonic-gate }
24830Sstevel@tonic-gate }
24840Sstevel@tonic-gate /*FALLTHRU*/
24850Sstevel@tonic-gate
24860Sstevel@tonic-gate /*
24870Sstevel@tonic-gate * Cases where we just want to report the error and continue.
24880Sstevel@tonic-gate */
24890Sstevel@tonic-gate case CPU_CE_ECACHE:
24900Sstevel@tonic-gate case CPU_UE_ECACHE:
24910Sstevel@tonic-gate case CPU_IV:
24920Sstevel@tonic-gate case CPU_ORPH:
24930Sstevel@tonic-gate cpu_log_err(aflt);
24940Sstevel@tonic-gate return (1);
24950Sstevel@tonic-gate
24960Sstevel@tonic-gate /*
24970Sstevel@tonic-gate * Cases where we want to fall through to handle panicking.
24980Sstevel@tonic-gate */
24990Sstevel@tonic-gate case CPU_UE:
25000Sstevel@tonic-gate /*
25010Sstevel@tonic-gate * We want to skip logging in the same conditions as the
25020Sstevel@tonic-gate * CE case. In addition, we want to make sure we're not
25030Sstevel@tonic-gate * panicking.
25040Sstevel@tonic-gate */
25050Sstevel@tonic-gate if (!panicstr && (ch_flt->afsr_errs &
25060Sstevel@tonic-gate (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
25070Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) {
2508917Selowe if (page_retire_check(aflt->flt_addr, NULL) == 0) {
25090Sstevel@tonic-gate /* Zero the address to clear the error */
25100Sstevel@tonic-gate softcall(ecc_page_zero, (void *)aflt->flt_addr);
25112895Svb70745 /*
25122895Svb70745 * Inform memscrubber - scrubbing induced
25132895Svb70745 * UE on a retired page.
25142895Svb70745 */
25152895Svb70745 memscrub_induced_error();
25160Sstevel@tonic-gate return (0);
25170Sstevel@tonic-gate }
25180Sstevel@tonic-gate }
25190Sstevel@tonic-gate cpu_log_err(aflt);
25200Sstevel@tonic-gate break;
25210Sstevel@tonic-gate
25220Sstevel@tonic-gate default:
25230Sstevel@tonic-gate /*
25240Sstevel@tonic-gate * If the us3_common.c code doesn't know the flt_type, it may
25250Sstevel@tonic-gate * be an implementation-specific code. Call into the impldep
25260Sstevel@tonic-gate * backend to find out what to do: if it tells us to continue,
25270Sstevel@tonic-gate * break and handle as if falling through from a UE; if not,
25280Sstevel@tonic-gate * the impldep backend has handled the error and we're done.
25290Sstevel@tonic-gate */
25300Sstevel@tonic-gate switch (cpu_impl_async_log_err(flt, eqep)) {
25310Sstevel@tonic-gate case CH_ASYNC_LOG_DONE:
25320Sstevel@tonic-gate return (1);
25330Sstevel@tonic-gate case CH_ASYNC_LOG_RECIRC:
25340Sstevel@tonic-gate return (0);
25350Sstevel@tonic-gate case CH_ASYNC_LOG_CONTINUE:
25360Sstevel@tonic-gate break; /* continue on to handle UE-like error */
25370Sstevel@tonic-gate default:
25380Sstevel@tonic-gate cmn_err(CE_WARN, "discarding error 0x%p with "
25390Sstevel@tonic-gate "invalid fault type (0x%x)",
25400Sstevel@tonic-gate (void *)aflt, ch_flt->flt_type);
25410Sstevel@tonic-gate return (0);
25420Sstevel@tonic-gate }
25430Sstevel@tonic-gate }
25440Sstevel@tonic-gate
25450Sstevel@tonic-gate /* ... fall through from the UE case */
25460Sstevel@tonic-gate
25470Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
25480Sstevel@tonic-gate if (!panicstr) {
25490Sstevel@tonic-gate cpu_page_retire(ch_flt);
25500Sstevel@tonic-gate } else {
25510Sstevel@tonic-gate /*
25520Sstevel@tonic-gate * Clear UEs on panic so that we don't
25530Sstevel@tonic-gate * get haunted by them during panic or
25540Sstevel@tonic-gate * after reboot
25550Sstevel@tonic-gate */
25560Sstevel@tonic-gate cpu_clearphys(aflt);
25570Sstevel@tonic-gate (void) clear_errors(NULL);
25580Sstevel@tonic-gate }
25590Sstevel@tonic-gate }
25600Sstevel@tonic-gate
25610Sstevel@tonic-gate return (1);
25620Sstevel@tonic-gate }
25630Sstevel@tonic-gate
25640Sstevel@tonic-gate /*
25650Sstevel@tonic-gate * Retire the bad page that may contain the flushed error.
25660Sstevel@tonic-gate */
25670Sstevel@tonic-gate void
cpu_page_retire(ch_async_flt_t * ch_flt)25680Sstevel@tonic-gate cpu_page_retire(ch_async_flt_t *ch_flt)
25690Sstevel@tonic-gate {
25700Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
2571917Selowe (void) page_retire(aflt->flt_addr, PR_UE);
25720Sstevel@tonic-gate }
25730Sstevel@tonic-gate
25740Sstevel@tonic-gate /*
25752381Smikechr * Return true if the error specified in the AFSR indicates
25762381Smikechr * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
25772381Smikechr * for Panther, none for Jalapeno/Serrano).
25782381Smikechr */
25792381Smikechr /* ARGSUSED */
25802381Smikechr static int
cpu_error_is_ecache_data(int cpuid,uint64_t t_afsr)25812381Smikechr cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
25822381Smikechr {
25832381Smikechr #if defined(JALAPENO) || defined(SERRANO)
25842381Smikechr return (0);
25852381Smikechr #elif defined(CHEETAH_PLUS)
25862381Smikechr if (IS_PANTHER(cpunodes[cpuid].implementation))
25872381Smikechr return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
25882381Smikechr return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
25892381Smikechr #else /* CHEETAH_PLUS */
25902381Smikechr return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
25912381Smikechr #endif
25922381Smikechr }
25932381Smikechr
25942381Smikechr /*
25950Sstevel@tonic-gate * The cpu_log_err() function is called by cpu_async_log_err() to perform the
25960Sstevel@tonic-gate * generic event post-processing for correctable and uncorrectable memory,
25970Sstevel@tonic-gate * E$, and MTag errors. Historically this entry point was used to log bits of
25980Sstevel@tonic-gate * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
25990Sstevel@tonic-gate * converted into an ereport. In addition, it transmits the error to any
26000Sstevel@tonic-gate * platform-specific service-processor FRU logging routines, if available.
26010Sstevel@tonic-gate */
26020Sstevel@tonic-gate void
cpu_log_err(struct async_flt * aflt)26030Sstevel@tonic-gate cpu_log_err(struct async_flt *aflt)
26040Sstevel@tonic-gate {
26050Sstevel@tonic-gate char unum[UNUM_NAMLEN];
26060Sstevel@tonic-gate int synd_status, synd_code, afar_status;
26070Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
26080Sstevel@tonic-gate
26092381Smikechr if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
26102381Smikechr aflt->flt_status |= ECC_ECACHE;
26112381Smikechr else
26122381Smikechr aflt->flt_status &= ~ECC_ECACHE;
26130Sstevel@tonic-gate /*
26140Sstevel@tonic-gate * Determine syndrome status.
26150Sstevel@tonic-gate */
26160Sstevel@tonic-gate synd_status = afsr_to_synd_status(aflt->flt_inst,
26170Sstevel@tonic-gate ch_flt->afsr_errs, ch_flt->flt_bit);
26180Sstevel@tonic-gate
26190Sstevel@tonic-gate /*
26200Sstevel@tonic-gate * Determine afar status.
26210Sstevel@tonic-gate */
26220Sstevel@tonic-gate if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
26230Sstevel@tonic-gate afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
26245219Skm84432 ch_flt->flt_bit);
26250Sstevel@tonic-gate else
26260Sstevel@tonic-gate afar_status = AFLT_STAT_INVALID;
26270Sstevel@tonic-gate
26282436Smb91622 synd_code = synd_to_synd_code(synd_status,
26292436Smb91622 aflt->flt_synd, ch_flt->flt_bit);
26302436Smb91622
26310Sstevel@tonic-gate /*
26320Sstevel@tonic-gate * If afar status is not invalid do a unum lookup.
26330Sstevel@tonic-gate */
26340Sstevel@tonic-gate if (afar_status != AFLT_STAT_INVALID) {
26352436Smb91622 (void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
26360Sstevel@tonic-gate } else {
26370Sstevel@tonic-gate unum[0] = '\0';
26380Sstevel@tonic-gate }
26390Sstevel@tonic-gate
26400Sstevel@tonic-gate /*
26410Sstevel@tonic-gate * Do not send the fruid message (plat_ecc_error_data_t)
26420Sstevel@tonic-gate * to the SC if it can handle the enhanced error information
26430Sstevel@tonic-gate * (plat_ecc_error2_data_t) or when the tunable
26440Sstevel@tonic-gate * ecc_log_fruid_enable is set to 0.
26450Sstevel@tonic-gate */
26460Sstevel@tonic-gate
26470Sstevel@tonic-gate if (&plat_ecc_capability_sc_get &&
26480Sstevel@tonic-gate plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
26490Sstevel@tonic-gate if (&plat_log_fruid_error)
26500Sstevel@tonic-gate plat_log_fruid_error(synd_code, aflt, unum,
26510Sstevel@tonic-gate ch_flt->flt_bit);
26520Sstevel@tonic-gate }
26530Sstevel@tonic-gate
26540Sstevel@tonic-gate if (aflt->flt_func != NULL)
26550Sstevel@tonic-gate aflt->flt_func(aflt, unum);
26560Sstevel@tonic-gate
26570Sstevel@tonic-gate if (afar_status != AFLT_STAT_INVALID)
26580Sstevel@tonic-gate cpu_log_diag_info(ch_flt);
26590Sstevel@tonic-gate
26600Sstevel@tonic-gate /*
26610Sstevel@tonic-gate * If we have a CEEN error , we do not reenable CEEN until after
26620Sstevel@tonic-gate * we exit the trap handler. Otherwise, another error may
26630Sstevel@tonic-gate * occur causing the handler to be entered recursively.
26640Sstevel@tonic-gate * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
26650Sstevel@tonic-gate * to try and ensure that the CPU makes progress in the face
26660Sstevel@tonic-gate * of a CE storm.
26670Sstevel@tonic-gate */
26680Sstevel@tonic-gate if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
26690Sstevel@tonic-gate (void) timeout(cpu_delayed_check_ce_errors,
2670946Smathue (void *)(uintptr_t)aflt->flt_inst,
26710Sstevel@tonic-gate drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
26720Sstevel@tonic-gate }
26730Sstevel@tonic-gate }
26740Sstevel@tonic-gate
26750Sstevel@tonic-gate /*
26760Sstevel@tonic-gate * Invoked by error_init() early in startup and therefore before
26770Sstevel@tonic-gate * startup_errorq() is called to drain any error Q -
26780Sstevel@tonic-gate *
26790Sstevel@tonic-gate * startup()
26800Sstevel@tonic-gate * startup_end()
26810Sstevel@tonic-gate * error_init()
26820Sstevel@tonic-gate * cpu_error_init()
26830Sstevel@tonic-gate * errorq_init()
26840Sstevel@tonic-gate * errorq_drain()
26850Sstevel@tonic-gate * start_other_cpus()
26860Sstevel@tonic-gate *
26870Sstevel@tonic-gate * The purpose of this routine is to create error-related taskqs. Taskqs
26880Sstevel@tonic-gate * are used for this purpose because cpu_lock can't be grabbed from interrupt
26890Sstevel@tonic-gate * context.
26900Sstevel@tonic-gate */
26910Sstevel@tonic-gate void
cpu_error_init(int items)26920Sstevel@tonic-gate cpu_error_init(int items)
26930Sstevel@tonic-gate {
26940Sstevel@tonic-gate /*
26950Sstevel@tonic-gate * Create taskq(s) to reenable CE
26960Sstevel@tonic-gate */
26970Sstevel@tonic-gate ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
26980Sstevel@tonic-gate items, items, TASKQ_PREPOPULATE);
26990Sstevel@tonic-gate }
27000Sstevel@tonic-gate
27010Sstevel@tonic-gate void
cpu_ce_log_err(struct async_flt * aflt,errorq_elem_t * eqep)27020Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
27030Sstevel@tonic-gate {
27040Sstevel@tonic-gate char unum[UNUM_NAMLEN];
27050Sstevel@tonic-gate int len;
27060Sstevel@tonic-gate
27070Sstevel@tonic-gate switch (aflt->flt_class) {
27080Sstevel@tonic-gate case CPU_FAULT:
27090Sstevel@tonic-gate cpu_ereport_init(aflt);
27100Sstevel@tonic-gate if (cpu_async_log_err(aflt, eqep))
27110Sstevel@tonic-gate cpu_ereport_post(aflt);
27120Sstevel@tonic-gate break;
27130Sstevel@tonic-gate
27140Sstevel@tonic-gate case BUS_FAULT:
27150Sstevel@tonic-gate if (aflt->flt_func != NULL) {
27160Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
27170Sstevel@tonic-gate unum, UNUM_NAMLEN, &len);
27180Sstevel@tonic-gate aflt->flt_func(aflt, unum);
27190Sstevel@tonic-gate }
27200Sstevel@tonic-gate break;
27210Sstevel@tonic-gate
27220Sstevel@tonic-gate case RECIRC_CPU_FAULT:
27230Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
27240Sstevel@tonic-gate cpu_log_err(aflt);
27250Sstevel@tonic-gate cpu_ereport_post(aflt);
27260Sstevel@tonic-gate break;
27270Sstevel@tonic-gate
27280Sstevel@tonic-gate case RECIRC_BUS_FAULT:
27290Sstevel@tonic-gate ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
27300Sstevel@tonic-gate /*FALLTHRU*/
27310Sstevel@tonic-gate default:
27320Sstevel@tonic-gate cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
27330Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class);
27340Sstevel@tonic-gate return;
27350Sstevel@tonic-gate }
27360Sstevel@tonic-gate }
27370Sstevel@tonic-gate
27380Sstevel@tonic-gate /*
27390Sstevel@tonic-gate * Scrub and classify a CE. This function must not modify the
27400Sstevel@tonic-gate * fault structure passed to it but instead should return the classification
27410Sstevel@tonic-gate * information.
27420Sstevel@tonic-gate */
27430Sstevel@tonic-gate
27440Sstevel@tonic-gate static uchar_t
cpu_ce_scrub_mem_err_common(struct async_flt * ecc,boolean_t logout_tried)27450Sstevel@tonic-gate cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
27460Sstevel@tonic-gate {
27470Sstevel@tonic-gate uchar_t disp = CE_XDIAG_EXTALG;
27480Sstevel@tonic-gate on_trap_data_t otd;
27490Sstevel@tonic-gate uint64_t orig_err;
27500Sstevel@tonic-gate ch_cpu_logout_t *clop;
27510Sstevel@tonic-gate
27520Sstevel@tonic-gate /*
27530Sstevel@tonic-gate * Clear CEEN. CPU CE TL > 0 trap handling will already have done
27540Sstevel@tonic-gate * this, but our other callers have not. Disable preemption to
27550Sstevel@tonic-gate * avoid CPU migration so that we restore CEEN on the correct
27560Sstevel@tonic-gate * cpu later.
27570Sstevel@tonic-gate *
27580Sstevel@tonic-gate * CEEN is cleared so that further CEs that our instruction and
27590Sstevel@tonic-gate * data footprint induce do not cause use to either creep down
27600Sstevel@tonic-gate * kernel stack to the point of overflow, or do so much CE
27610Sstevel@tonic-gate * notification as to make little real forward progress.
27620Sstevel@tonic-gate *
27630Sstevel@tonic-gate * NCEEN must not be cleared. However it is possible that
27640Sstevel@tonic-gate * our accesses to the flt_addr may provoke a bus error or timeout
27650Sstevel@tonic-gate * if the offending address has just been unconfigured as part of
27660Sstevel@tonic-gate * a DR action. So we must operate under on_trap protection.
27670Sstevel@tonic-gate */
27680Sstevel@tonic-gate kpreempt_disable();
27690Sstevel@tonic-gate orig_err = get_error_enable();
27700Sstevel@tonic-gate if (orig_err & EN_REG_CEEN)
27715219Skm84432 set_error_enable(orig_err & ~EN_REG_CEEN);
27720Sstevel@tonic-gate
27730Sstevel@tonic-gate /*
27740Sstevel@tonic-gate * Our classification algorithm includes the line state before
27750Sstevel@tonic-gate * the scrub; we'd like this captured after the detection and
27760Sstevel@tonic-gate * before the algorithm below - the earlier the better.
27770Sstevel@tonic-gate *
27780Sstevel@tonic-gate * If we've come from a cpu CE trap then this info already exists
27790Sstevel@tonic-gate * in the cpu logout area.
27800Sstevel@tonic-gate *
27810Sstevel@tonic-gate * For a CE detected by memscrub for which there was no trap
27820Sstevel@tonic-gate * (running with CEEN off) cpu_log_and_clear_ce has called
27830Sstevel@tonic-gate * cpu_ce_delayed_ec_logout to capture some cache data, and
27840Sstevel@tonic-gate * marked the fault structure as incomplete as a flag to later
27850Sstevel@tonic-gate * logging code.
27860Sstevel@tonic-gate *
27870Sstevel@tonic-gate * If called directly from an IO detected CE there has been
27880Sstevel@tonic-gate * no line data capture. In this case we logout to the cpu logout
27890Sstevel@tonic-gate * area - that's appropriate since it's the cpu cache data we need
27900Sstevel@tonic-gate * for classification. We thus borrow the cpu logout area for a
27910Sstevel@tonic-gate * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
27920Sstevel@tonic-gate * this time (we will invalidate it again below).
27930Sstevel@tonic-gate *
27940Sstevel@tonic-gate * If called from the partner check xcall handler then this cpu
27950Sstevel@tonic-gate * (the partner) has not necessarily experienced a CE at this
27960Sstevel@tonic-gate * address. But we want to capture line state before its scrub
27970Sstevel@tonic-gate * attempt since we use that in our classification.
27980Sstevel@tonic-gate */
27990Sstevel@tonic-gate if (logout_tried == B_FALSE) {
28000Sstevel@tonic-gate if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
28010Sstevel@tonic-gate disp |= CE_XDIAG_NOLOGOUT;
28020Sstevel@tonic-gate }
28030Sstevel@tonic-gate
28040Sstevel@tonic-gate /*
28050Sstevel@tonic-gate * Scrub memory, then check AFSR for errors. The AFAR we scrub may
28060Sstevel@tonic-gate * no longer be valid (if DR'd since the initial event) so we
28070Sstevel@tonic-gate * perform this scrub under on_trap protection. If this access is
28080Sstevel@tonic-gate * ok then further accesses below will also be ok - DR cannot
28090Sstevel@tonic-gate * proceed while this thread is active (preemption is disabled);
28100Sstevel@tonic-gate * to be safe we'll nonetheless use on_trap again below.
28110Sstevel@tonic-gate */
28120Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) {
28130Sstevel@tonic-gate cpu_scrubphys(ecc);
28140Sstevel@tonic-gate } else {
28150Sstevel@tonic-gate no_trap();
28160Sstevel@tonic-gate if (orig_err & EN_REG_CEEN)
28175219Skm84432 set_error_enable(orig_err);
28180Sstevel@tonic-gate kpreempt_enable();
28190Sstevel@tonic-gate return (disp);
28200Sstevel@tonic-gate }
28210Sstevel@tonic-gate no_trap();
28220Sstevel@tonic-gate
28230Sstevel@tonic-gate /*
28240Sstevel@tonic-gate * Did the casx read of the scrub log a CE that matches the AFAR?
28250Sstevel@tonic-gate * Note that it's quite possible that the read sourced the data from
28260Sstevel@tonic-gate * another cpu.
28270Sstevel@tonic-gate */
28280Sstevel@tonic-gate if (clear_ecc(ecc))
28290Sstevel@tonic-gate disp |= CE_XDIAG_CE1;
28300Sstevel@tonic-gate
28310Sstevel@tonic-gate /*
28320Sstevel@tonic-gate * Read the data again. This time the read is very likely to
28330Sstevel@tonic-gate * come from memory since the scrub induced a writeback to memory.
28340Sstevel@tonic-gate */
28350Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) {
28360Sstevel@tonic-gate (void) lddphys(P2ALIGN(ecc->flt_addr, 8));
28370Sstevel@tonic-gate } else {
28380Sstevel@tonic-gate no_trap();
28390Sstevel@tonic-gate if (orig_err & EN_REG_CEEN)
28405219Skm84432 set_error_enable(orig_err);
28410Sstevel@tonic-gate kpreempt_enable();
28420Sstevel@tonic-gate return (disp);
28430Sstevel@tonic-gate }
28440Sstevel@tonic-gate no_trap();
28450Sstevel@tonic-gate
28460Sstevel@tonic-gate /* Did that read induce a CE that matches the AFAR? */
28470Sstevel@tonic-gate if (clear_ecc(ecc))
28480Sstevel@tonic-gate disp |= CE_XDIAG_CE2;
28490Sstevel@tonic-gate
28500Sstevel@tonic-gate /*
28510Sstevel@tonic-gate * Look at the logout information and record whether we found the
28520Sstevel@tonic-gate * line in l2/l3 cache. For Panther we are interested in whether
28530Sstevel@tonic-gate * we found it in either cache (it won't reside in both but
28540Sstevel@tonic-gate * it is possible to read it that way given the moving target).
28550Sstevel@tonic-gate */
28560Sstevel@tonic-gate clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
28570Sstevel@tonic-gate if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
28580Sstevel@tonic-gate clop->clo_data.chd_afar != LOGOUT_INVALID) {
28590Sstevel@tonic-gate int hit, level;
28600Sstevel@tonic-gate int state;
28610Sstevel@tonic-gate int totalsize;
28620Sstevel@tonic-gate ch_ec_data_t *ecp;
28630Sstevel@tonic-gate
28640Sstevel@tonic-gate /*
28650Sstevel@tonic-gate * If hit is nonzero then a match was found and hit will
28660Sstevel@tonic-gate * be one greater than the index which hit. For Panther we
28670Sstevel@tonic-gate * also need to pay attention to level to see which of l2$ or
28680Sstevel@tonic-gate * l3$ it hit in.
28690Sstevel@tonic-gate */
28700Sstevel@tonic-gate hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
28710Sstevel@tonic-gate 0, &level);
28720Sstevel@tonic-gate
28730Sstevel@tonic-gate if (hit) {
28740Sstevel@tonic-gate --hit;
28750Sstevel@tonic-gate disp |= CE_XDIAG_AFARMATCH;
28760Sstevel@tonic-gate
28770Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
28780Sstevel@tonic-gate if (level == 2)
28790Sstevel@tonic-gate ecp = &clop->clo_data.chd_l2_data[hit];
28800Sstevel@tonic-gate else
28810Sstevel@tonic-gate ecp = &clop->clo_data.chd_ec_data[hit];
28820Sstevel@tonic-gate } else {
28830Sstevel@tonic-gate ASSERT(level == 2);
28840Sstevel@tonic-gate ecp = &clop->clo_data.chd_ec_data[hit];
28850Sstevel@tonic-gate }
28860Sstevel@tonic-gate totalsize = cpunodes[CPU->cpu_id].ecache_size;
28870Sstevel@tonic-gate state = cpu_ectag_pa_to_subblk_state(totalsize,
28880Sstevel@tonic-gate ecc->flt_addr, ecp->ec_tag);
28890Sstevel@tonic-gate
28900Sstevel@tonic-gate /*
28910Sstevel@tonic-gate * Cheetah variants use different state encodings -
28920Sstevel@tonic-gate * the CH_ECSTATE_* defines vary depending on the
28930Sstevel@tonic-gate * module we're compiled for. Translate into our
28940Sstevel@tonic-gate * one true version. Conflate Owner-Shared state
28950Sstevel@tonic-gate * of SSM mode with Owner as victimisation of such
28960Sstevel@tonic-gate * lines may cause a writeback.
28970Sstevel@tonic-gate */
28980Sstevel@tonic-gate switch (state) {
28990Sstevel@tonic-gate case CH_ECSTATE_MOD:
29000Sstevel@tonic-gate disp |= EC_STATE_M;
29010Sstevel@tonic-gate break;
29020Sstevel@tonic-gate
29030Sstevel@tonic-gate case CH_ECSTATE_OWN:
29040Sstevel@tonic-gate case CH_ECSTATE_OWS:
29050Sstevel@tonic-gate disp |= EC_STATE_O;
29060Sstevel@tonic-gate break;
29070Sstevel@tonic-gate
29080Sstevel@tonic-gate case CH_ECSTATE_EXL:
29090Sstevel@tonic-gate disp |= EC_STATE_E;
29100Sstevel@tonic-gate break;
29110Sstevel@tonic-gate
29120Sstevel@tonic-gate case CH_ECSTATE_SHR:
29130Sstevel@tonic-gate disp |= EC_STATE_S;
29140Sstevel@tonic-gate break;
29150Sstevel@tonic-gate
29160Sstevel@tonic-gate default:
29170Sstevel@tonic-gate disp |= EC_STATE_I;
29180Sstevel@tonic-gate break;
29190Sstevel@tonic-gate }
29200Sstevel@tonic-gate }
29210Sstevel@tonic-gate
29220Sstevel@tonic-gate /*
29230Sstevel@tonic-gate * If we initiated the delayed logout then we are responsible
29240Sstevel@tonic-gate * for invalidating the logout area.
29250Sstevel@tonic-gate */
29260Sstevel@tonic-gate if (logout_tried == B_FALSE) {
29270Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t));
29280Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID;
29290Sstevel@tonic-gate }
29300Sstevel@tonic-gate }
29310Sstevel@tonic-gate
29320Sstevel@tonic-gate /*
29330Sstevel@tonic-gate * Re-enable CEEN if we turned it off.
29340Sstevel@tonic-gate */
29350Sstevel@tonic-gate if (orig_err & EN_REG_CEEN)
29365219Skm84432 set_error_enable(orig_err);
29370Sstevel@tonic-gate kpreempt_enable();
29380Sstevel@tonic-gate
29390Sstevel@tonic-gate return (disp);
29400Sstevel@tonic-gate }
29410Sstevel@tonic-gate
29420Sstevel@tonic-gate /*
29430Sstevel@tonic-gate * Scrub a correctable memory error and collect data for classification
29440Sstevel@tonic-gate * of CE type. This function is called in the detection path, ie tl0 handling
29450Sstevel@tonic-gate * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
29460Sstevel@tonic-gate */
29470Sstevel@tonic-gate void
cpu_ce_scrub_mem_err(struct async_flt * ecc,boolean_t logout_tried)29480Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
29490Sstevel@tonic-gate {
29500Sstevel@tonic-gate /*
29510Sstevel@tonic-gate * Cheetah CE classification does not set any bits in flt_status.
29520Sstevel@tonic-gate * Instead we will record classification datapoints in flt_disp.
29530Sstevel@tonic-gate */
29540Sstevel@tonic-gate ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
29550Sstevel@tonic-gate
29560Sstevel@tonic-gate /*
29570Sstevel@tonic-gate * To check if the error detected by IO is persistent, sticky or
29580Sstevel@tonic-gate * intermittent. This is noticed by clear_ecc().
29590Sstevel@tonic-gate */
29600Sstevel@tonic-gate if (ecc->flt_status & ECC_IOBUS)
29610Sstevel@tonic-gate ecc->flt_stat = C_AFSR_MEMORY;
29620Sstevel@tonic-gate
29630Sstevel@tonic-gate /*
29640Sstevel@tonic-gate * Record information from this first part of the algorithm in
29650Sstevel@tonic-gate * flt_disp.
29660Sstevel@tonic-gate */
29670Sstevel@tonic-gate ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
29680Sstevel@tonic-gate }
29690Sstevel@tonic-gate
29700Sstevel@tonic-gate /*
29710Sstevel@tonic-gate * Select a partner to perform a further CE classification check from.
29720Sstevel@tonic-gate * Must be called with kernel preemption disabled (to stop the cpu list
29730Sstevel@tonic-gate * from changing). The detecting cpu we are partnering has cpuid
29740Sstevel@tonic-gate * aflt->flt_inst; we might not be running on the detecting cpu.
29750Sstevel@tonic-gate *
29760Sstevel@tonic-gate * Restrict choice to active cpus in the same cpu partition as ourselves in
29770Sstevel@tonic-gate * an effort to stop bad cpus in one partition causing other partitions to
29780Sstevel@tonic-gate * perform excessive diagnostic activity. Actually since the errorq drain
29790Sstevel@tonic-gate * is run from a softint most of the time and that is a global mechanism
29800Sstevel@tonic-gate * this isolation is only partial. Return NULL if we fail to find a
29810Sstevel@tonic-gate * suitable partner.
29820Sstevel@tonic-gate *
29830Sstevel@tonic-gate * We prefer a partner that is in a different latency group to ourselves as
29840Sstevel@tonic-gate * we will share fewer datapaths. If such a partner is unavailable then
29850Sstevel@tonic-gate * choose one in the same lgroup but prefer a different chip and only allow
29860Sstevel@tonic-gate * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and
29870Sstevel@tonic-gate * flags includes PTNR_SELFOK then permit selection of the original detector.
29880Sstevel@tonic-gate *
29890Sstevel@tonic-gate * We keep a cache of the last partner selected for a cpu, and we'll try to
29900Sstevel@tonic-gate * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
29910Sstevel@tonic-gate * have passed since that selection was made. This provides the benefit
29920Sstevel@tonic-gate * of the point-of-view of different partners over time but without
29930Sstevel@tonic-gate * requiring frequent cpu list traversals.
29940Sstevel@tonic-gate */
29950Sstevel@tonic-gate
29960Sstevel@tonic-gate #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */
29970Sstevel@tonic-gate #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */
29980Sstevel@tonic-gate
29990Sstevel@tonic-gate static cpu_t *
ce_ptnr_select(struct async_flt * aflt,int flags,int * typep)30000Sstevel@tonic-gate ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
30010Sstevel@tonic-gate {
30020Sstevel@tonic-gate cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
30030Sstevel@tonic-gate hrtime_t lasttime, thistime;
30040Sstevel@tonic-gate
30050Sstevel@tonic-gate ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
30060Sstevel@tonic-gate
30070Sstevel@tonic-gate dtcr = cpu[aflt->flt_inst];
30080Sstevel@tonic-gate
30090Sstevel@tonic-gate /*
30100Sstevel@tonic-gate * Short-circuit for the following cases:
30110Sstevel@tonic-gate * . the dtcr is not flagged active
30120Sstevel@tonic-gate * . there is just one cpu present
30130Sstevel@tonic-gate * . the detector has disappeared
30140Sstevel@tonic-gate * . we were given a bad flt_inst cpuid; this should not happen
30150Sstevel@tonic-gate * (eg PCI code now fills flt_inst) but if it does it is no
30160Sstevel@tonic-gate * reason to panic.
30170Sstevel@tonic-gate * . there is just one cpu left online in the cpu partition
30180Sstevel@tonic-gate *
30190Sstevel@tonic-gate * If we return NULL after this point then we do not update the
30200Sstevel@tonic-gate * chpr_ceptnr_seltime which will cause us to perform a full lookup
30210Sstevel@tonic-gate * again next time; this is the case where the only other cpu online
30220Sstevel@tonic-gate * in the detector's partition is on the same chip as the detector
30230Sstevel@tonic-gate * and since CEEN re-enable is throttled even that case should not
30240Sstevel@tonic-gate * hurt performance.
30250Sstevel@tonic-gate */
30260Sstevel@tonic-gate if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
30270Sstevel@tonic-gate return (NULL);
30280Sstevel@tonic-gate }
30290Sstevel@tonic-gate if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
30300Sstevel@tonic-gate if (flags & PTNR_SELFOK) {
30310Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SELF;
30320Sstevel@tonic-gate return (dtcr);
30330Sstevel@tonic-gate } else {
30340Sstevel@tonic-gate return (NULL);
30350Sstevel@tonic-gate }
30360Sstevel@tonic-gate }
30370Sstevel@tonic-gate
30380Sstevel@tonic-gate thistime = gethrtime();
30390Sstevel@tonic-gate lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
30400Sstevel@tonic-gate
30410Sstevel@tonic-gate /*
30420Sstevel@tonic-gate * Select a starting point.
30430Sstevel@tonic-gate */
30440Sstevel@tonic-gate if (!lasttime) {
30450Sstevel@tonic-gate /*
30460Sstevel@tonic-gate * We've never selected a partner for this detector before.
30470Sstevel@tonic-gate * Start the scan at the next online cpu in the same cpu
30480Sstevel@tonic-gate * partition.
30490Sstevel@tonic-gate */
30500Sstevel@tonic-gate sp = dtcr->cpu_next_part;
30510Sstevel@tonic-gate } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
30520Sstevel@tonic-gate /*
30530Sstevel@tonic-gate * Our last selection has not aged yet. If this partner:
30540Sstevel@tonic-gate * . is still a valid cpu,
30550Sstevel@tonic-gate * . is still in the same partition as the detector
30560Sstevel@tonic-gate * . is still marked active
30570Sstevel@tonic-gate * . satisfies the 'flags' argument criteria
30580Sstevel@tonic-gate * then select it again without updating the timestamp.
30590Sstevel@tonic-gate */
30600Sstevel@tonic-gate sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
30610Sstevel@tonic-gate if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
30620Sstevel@tonic-gate !cpu_flagged_active(sp->cpu_flags) ||
30630Sstevel@tonic-gate (sp == dtcr && !(flags & PTNR_SELFOK)) ||
30643434Sesaxe (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
30650Sstevel@tonic-gate !(flags & PTNR_SIBLINGOK))) {
30660Sstevel@tonic-gate sp = dtcr->cpu_next_part;
30670Sstevel@tonic-gate } else {
30680Sstevel@tonic-gate if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
30690Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_REMOTE;
30700Sstevel@tonic-gate } else if (sp == dtcr) {
30710Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SELF;
30723434Sesaxe } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
30730Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SIBLING;
30740Sstevel@tonic-gate } else {
30750Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_LOCAL;
30760Sstevel@tonic-gate }
30770Sstevel@tonic-gate return (sp);
30780Sstevel@tonic-gate }
30790Sstevel@tonic-gate } else {
30800Sstevel@tonic-gate /*
30810Sstevel@tonic-gate * Our last selection has aged. If it is nonetheless still a
30820Sstevel@tonic-gate * valid cpu then start the scan at the next cpu in the
30830Sstevel@tonic-gate * partition after our last partner. If the last selection
30840Sstevel@tonic-gate * is no longer a valid cpu then go with our default. In
30850Sstevel@tonic-gate * this way we slowly cycle through possible partners to
30860Sstevel@tonic-gate * obtain multiple viewpoints over time.
30870Sstevel@tonic-gate */
30880Sstevel@tonic-gate sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
30890Sstevel@tonic-gate if (sp == NULL) {
30900Sstevel@tonic-gate sp = dtcr->cpu_next_part;
30910Sstevel@tonic-gate } else {
30920Sstevel@tonic-gate sp = sp->cpu_next_part; /* may be dtcr */
30930Sstevel@tonic-gate if (sp->cpu_part != dtcr->cpu_part)
30940Sstevel@tonic-gate sp = dtcr;
30950Sstevel@tonic-gate }
30960Sstevel@tonic-gate }
30970Sstevel@tonic-gate
30980Sstevel@tonic-gate /*
30990Sstevel@tonic-gate * We have a proposed starting point for our search, but if this
31000Sstevel@tonic-gate * cpu is offline then its cpu_next_part will point to itself
31010Sstevel@tonic-gate * so we can't use that to iterate over cpus in this partition in
31020Sstevel@tonic-gate * the loop below. We still want to avoid iterating over cpus not
31030Sstevel@tonic-gate * in our partition, so in the case that our starting point is offline
31040Sstevel@tonic-gate * we will repoint it to be the detector itself; and if the detector
31050Sstevel@tonic-gate * happens to be offline we'll return NULL from the following loop.
31060Sstevel@tonic-gate */
31070Sstevel@tonic-gate if (!cpu_flagged_active(sp->cpu_flags)) {
31080Sstevel@tonic-gate sp = dtcr;
31090Sstevel@tonic-gate }
31100Sstevel@tonic-gate
31110Sstevel@tonic-gate ptnr = sp;
31120Sstevel@tonic-gate locptnr = NULL;
31130Sstevel@tonic-gate sibptnr = NULL;
31140Sstevel@tonic-gate do {
31150Sstevel@tonic-gate if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
31160Sstevel@tonic-gate continue;
31170Sstevel@tonic-gate if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
31180Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
31190Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
31200Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_REMOTE;
31210Sstevel@tonic-gate return (ptnr);
31220Sstevel@tonic-gate }
31233434Sesaxe if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
31240Sstevel@tonic-gate if (sibptnr == NULL)
31250Sstevel@tonic-gate sibptnr = ptnr;
31260Sstevel@tonic-gate continue;
31270Sstevel@tonic-gate }
31280Sstevel@tonic-gate if (locptnr == NULL)
31290Sstevel@tonic-gate locptnr = ptnr;
31300Sstevel@tonic-gate } while ((ptnr = ptnr->cpu_next_part) != sp);
31310Sstevel@tonic-gate
31320Sstevel@tonic-gate /*
31330Sstevel@tonic-gate * A foreign partner has already been returned if one was available.
31340Sstevel@tonic-gate *
31350Sstevel@tonic-gate * If locptnr is not NULL it is a cpu in the same lgroup as the
31360Sstevel@tonic-gate * detector, is active, and is not a sibling of the detector.
31370Sstevel@tonic-gate *
31380Sstevel@tonic-gate * If sibptnr is not NULL it is a sibling of the detector, and is
31390Sstevel@tonic-gate * active.
31400Sstevel@tonic-gate *
31410Sstevel@tonic-gate * If we have to resort to using the detector itself we have already
31420Sstevel@tonic-gate * checked that it is active.
31430Sstevel@tonic-gate */
31440Sstevel@tonic-gate if (locptnr) {
31450Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
31460Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
31470Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_LOCAL;
31480Sstevel@tonic-gate return (locptnr);
31490Sstevel@tonic-gate } else if (sibptnr && flags & PTNR_SIBLINGOK) {
31500Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
31510Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
31520Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SIBLING;
31530Sstevel@tonic-gate return (sibptnr);
31540Sstevel@tonic-gate } else if (flags & PTNR_SELFOK) {
31550Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
31560Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
31570Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SELF;
31580Sstevel@tonic-gate return (dtcr);
31590Sstevel@tonic-gate }
31600Sstevel@tonic-gate
31610Sstevel@tonic-gate return (NULL);
31620Sstevel@tonic-gate }
31630Sstevel@tonic-gate
31640Sstevel@tonic-gate /*
31650Sstevel@tonic-gate * Cross call handler that is requested to run on the designated partner of
31660Sstevel@tonic-gate * a cpu that experienced a possibly sticky or possibly persistnet CE.
31670Sstevel@tonic-gate */
31680Sstevel@tonic-gate static void
ce_ptnrchk_xc(struct async_flt * aflt,uchar_t * dispp)31690Sstevel@tonic-gate ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
31700Sstevel@tonic-gate {
31710Sstevel@tonic-gate *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
31720Sstevel@tonic-gate }
31730Sstevel@tonic-gate
31740Sstevel@tonic-gate /*
31750Sstevel@tonic-gate * The associated errorqs are never destroyed so we do not need to deal with
31760Sstevel@tonic-gate * them disappearing before this timeout fires. If the affected memory
31770Sstevel@tonic-gate * has been DR'd out since the original event the scrub algrithm will catch
31780Sstevel@tonic-gate * any errors and return null disposition info. If the original detecting
31790Sstevel@tonic-gate * cpu has been DR'd out then ereport detector info will not be able to
31800Sstevel@tonic-gate * lookup CPU type; with a small timeout this is unlikely.
31810Sstevel@tonic-gate */
31820Sstevel@tonic-gate static void
ce_lkychk_cb(ce_lkychk_cb_t * cbarg)31830Sstevel@tonic-gate ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
31840Sstevel@tonic-gate {
31850Sstevel@tonic-gate struct async_flt *aflt = cbarg->lkycb_aflt;
31860Sstevel@tonic-gate uchar_t disp;
31870Sstevel@tonic-gate cpu_t *cp;
31880Sstevel@tonic-gate int ptnrtype;
31890Sstevel@tonic-gate
31900Sstevel@tonic-gate kpreempt_disable();
31910Sstevel@tonic-gate if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
31920Sstevel@tonic-gate &ptnrtype)) {
31930Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
31940Sstevel@tonic-gate (uint64_t)&disp);
31950Sstevel@tonic-gate CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
31960Sstevel@tonic-gate CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
31970Sstevel@tonic-gate CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
31980Sstevel@tonic-gate } else {
31990Sstevel@tonic-gate ce_xdiag_lkydrops++;
32000Sstevel@tonic-gate if (ncpus > 1)
32010Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
32020Sstevel@tonic-gate CE_XDIAG_SKIP_NOPTNR);
32030Sstevel@tonic-gate }
32040Sstevel@tonic-gate kpreempt_enable();
32050Sstevel@tonic-gate
32060Sstevel@tonic-gate errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
32070Sstevel@tonic-gate kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
32080Sstevel@tonic-gate }
32090Sstevel@tonic-gate
32100Sstevel@tonic-gate /*
32110Sstevel@tonic-gate * Called from errorq drain code when processing a CE error, both from
32120Sstevel@tonic-gate * CPU and PCI drain functions. Decide what further classification actions,
32130Sstevel@tonic-gate * if any, we will perform. Perform immediate actions now, and schedule
32140Sstevel@tonic-gate * delayed actions as required. Note that we are no longer necessarily running
32150Sstevel@tonic-gate * on the detecting cpu, and that the async_flt structure will not persist on
32160Sstevel@tonic-gate * return from this function.
32170Sstevel@tonic-gate *
32180Sstevel@tonic-gate * Calls to this function should aim to be self-throtlling in some way. With
32190Sstevel@tonic-gate * the delayed re-enable of CEEN the absolute rate of calls should not
32200Sstevel@tonic-gate * be excessive. Callers should also avoid performing in-depth classification
32210Sstevel@tonic-gate * for events in pages that are already known to be suspect.
32220Sstevel@tonic-gate *
32230Sstevel@tonic-gate * We return nonzero to indicate that the event has been copied and
32240Sstevel@tonic-gate * recirculated for further testing. The caller should not log the event
32250Sstevel@tonic-gate * in this case - it will be logged when further test results are available.
32260Sstevel@tonic-gate *
32270Sstevel@tonic-gate * Our possible contexts are that of errorq_drain: below lock level or from
32280Sstevel@tonic-gate * panic context. We can assume that the cpu we are running on is online.
32290Sstevel@tonic-gate */
32300Sstevel@tonic-gate
32310Sstevel@tonic-gate
32320Sstevel@tonic-gate #ifdef DEBUG
32330Sstevel@tonic-gate static int ce_xdiag_forceaction;
32340Sstevel@tonic-gate #endif
32350Sstevel@tonic-gate
32360Sstevel@tonic-gate int
ce_scrub_xdiag_recirc(struct async_flt * aflt,errorq_t * eqp,errorq_elem_t * eqep,size_t afltoffset)32370Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
32380Sstevel@tonic-gate errorq_elem_t *eqep, size_t afltoffset)
32390Sstevel@tonic-gate {
32400Sstevel@tonic-gate ce_dispact_t dispact, action;
32410Sstevel@tonic-gate cpu_t *cp;
32420Sstevel@tonic-gate uchar_t dtcrinfo, disp;
32430Sstevel@tonic-gate int ptnrtype;
32440Sstevel@tonic-gate
32450Sstevel@tonic-gate if (!ce_disp_inited || panicstr || ce_xdiag_off) {
32460Sstevel@tonic-gate ce_xdiag_drops++;
32470Sstevel@tonic-gate return (0);
32480Sstevel@tonic-gate } else if (!aflt->flt_in_memory) {
32490Sstevel@tonic-gate ce_xdiag_drops++;
32500Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
32510Sstevel@tonic-gate return (0);
32520Sstevel@tonic-gate }
32530Sstevel@tonic-gate
32540Sstevel@tonic-gate dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
32550Sstevel@tonic-gate
32560Sstevel@tonic-gate /*
32570Sstevel@tonic-gate * Some correctable events are not scrubbed/classified, such as those
32580Sstevel@tonic-gate * noticed at the tail of cpu_deferred_error. So if there is no
32590Sstevel@tonic-gate * initial detector classification go no further.
32600Sstevel@tonic-gate */
32610Sstevel@tonic-gate if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
32620Sstevel@tonic-gate ce_xdiag_drops++;
32630Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
32640Sstevel@tonic-gate return (0);
32650Sstevel@tonic-gate }
32660Sstevel@tonic-gate
32670Sstevel@tonic-gate dispact = CE_DISPACT(ce_disp_table,
32680Sstevel@tonic-gate CE_XDIAG_AFARMATCHED(dtcrinfo),
32690Sstevel@tonic-gate CE_XDIAG_STATE(dtcrinfo),
32700Sstevel@tonic-gate CE_XDIAG_CE1SEEN(dtcrinfo),
32710Sstevel@tonic-gate CE_XDIAG_CE2SEEN(dtcrinfo));
32720Sstevel@tonic-gate
32730Sstevel@tonic-gate
32740Sstevel@tonic-gate action = CE_ACT(dispact); /* bad lookup caught below */
32750Sstevel@tonic-gate #ifdef DEBUG
32760Sstevel@tonic-gate if (ce_xdiag_forceaction != 0)
32770Sstevel@tonic-gate action = ce_xdiag_forceaction;
32780Sstevel@tonic-gate #endif
32790Sstevel@tonic-gate
32800Sstevel@tonic-gate switch (action) {
32810Sstevel@tonic-gate case CE_ACT_LKYCHK: {
32820Sstevel@tonic-gate caddr_t ndata;
32830Sstevel@tonic-gate errorq_elem_t *neqep;
32840Sstevel@tonic-gate struct async_flt *ecc;
32850Sstevel@tonic-gate ce_lkychk_cb_t *cbargp;
32860Sstevel@tonic-gate
32870Sstevel@tonic-gate if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
32880Sstevel@tonic-gate ce_xdiag_lkydrops++;
32890Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
32900Sstevel@tonic-gate CE_XDIAG_SKIP_DUPFAIL);
32910Sstevel@tonic-gate break;
32920Sstevel@tonic-gate }
32930Sstevel@tonic-gate ecc = (struct async_flt *)(ndata + afltoffset);
32940Sstevel@tonic-gate
32950Sstevel@tonic-gate ASSERT(ecc->flt_class == CPU_FAULT ||
32960Sstevel@tonic-gate ecc->flt_class == BUS_FAULT);
32970Sstevel@tonic-gate ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
32980Sstevel@tonic-gate RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
32990Sstevel@tonic-gate
33000Sstevel@tonic-gate cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
33010Sstevel@tonic-gate cbargp->lkycb_aflt = ecc;
33020Sstevel@tonic-gate cbargp->lkycb_eqp = eqp;
33030Sstevel@tonic-gate cbargp->lkycb_eqep = neqep;
33040Sstevel@tonic-gate
33050Sstevel@tonic-gate (void) timeout((void (*)(void *))ce_lkychk_cb,
33060Sstevel@tonic-gate (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
33070Sstevel@tonic-gate return (1);
33080Sstevel@tonic-gate }
33090Sstevel@tonic-gate
33100Sstevel@tonic-gate case CE_ACT_PTNRCHK:
33110Sstevel@tonic-gate kpreempt_disable(); /* stop cpu list changing */
33120Sstevel@tonic-gate if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
33130Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
33140Sstevel@tonic-gate (uint64_t)aflt, (uint64_t)&disp);
33150Sstevel@tonic-gate CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
33160Sstevel@tonic-gate CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
33170Sstevel@tonic-gate CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
33180Sstevel@tonic-gate } else if (ncpus > 1) {
33190Sstevel@tonic-gate ce_xdiag_ptnrdrops++;
33200Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
33210Sstevel@tonic-gate CE_XDIAG_SKIP_NOPTNR);
33220Sstevel@tonic-gate } else {
33230Sstevel@tonic-gate ce_xdiag_ptnrdrops++;
33240Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
33250Sstevel@tonic-gate CE_XDIAG_SKIP_UNIPROC);
33260Sstevel@tonic-gate }
33270Sstevel@tonic-gate kpreempt_enable();
33280Sstevel@tonic-gate break;
33290Sstevel@tonic-gate
33300Sstevel@tonic-gate case CE_ACT_DONE:
33310Sstevel@tonic-gate break;
33320Sstevel@tonic-gate
33330Sstevel@tonic-gate case CE_ACT(CE_DISP_BAD):
33340Sstevel@tonic-gate default:
33350Sstevel@tonic-gate #ifdef DEBUG
33360Sstevel@tonic-gate cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
33370Sstevel@tonic-gate #endif
33380Sstevel@tonic-gate ce_xdiag_bad++;
33390Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
33400Sstevel@tonic-gate break;
33410Sstevel@tonic-gate }
33420Sstevel@tonic-gate
33430Sstevel@tonic-gate return (0);
33440Sstevel@tonic-gate }
33450Sstevel@tonic-gate
33460Sstevel@tonic-gate /*
33470Sstevel@tonic-gate * We route all errors through a single switch statement.
33480Sstevel@tonic-gate */
33490Sstevel@tonic-gate void
cpu_ue_log_err(struct async_flt * aflt)33500Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
33510Sstevel@tonic-gate {
33520Sstevel@tonic-gate switch (aflt->flt_class) {
33530Sstevel@tonic-gate case CPU_FAULT:
33540Sstevel@tonic-gate cpu_ereport_init(aflt);
33550Sstevel@tonic-gate if (cpu_async_log_err(aflt, NULL))
33560Sstevel@tonic-gate cpu_ereport_post(aflt);
33570Sstevel@tonic-gate break;
33580Sstevel@tonic-gate
33590Sstevel@tonic-gate case BUS_FAULT:
33600Sstevel@tonic-gate bus_async_log_err(aflt);
33610Sstevel@tonic-gate break;
33620Sstevel@tonic-gate
33630Sstevel@tonic-gate default:
33640Sstevel@tonic-gate cmn_err(CE_WARN, "discarding async error %p with invalid "
33650Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class);
33660Sstevel@tonic-gate return;
33670Sstevel@tonic-gate }
33680Sstevel@tonic-gate }
33690Sstevel@tonic-gate
33700Sstevel@tonic-gate /*
33710Sstevel@tonic-gate * Routine for panic hook callback from panic_idle().
33720Sstevel@tonic-gate */
33730Sstevel@tonic-gate void
cpu_async_panic_callb(void)33740Sstevel@tonic-gate cpu_async_panic_callb(void)
33750Sstevel@tonic-gate {
33760Sstevel@tonic-gate ch_async_flt_t ch_flt;
33770Sstevel@tonic-gate struct async_flt *aflt;
33780Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
33790Sstevel@tonic-gate uint64_t afsr_errs;
33800Sstevel@tonic-gate
33810Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
33820Sstevel@tonic-gate
33830Sstevel@tonic-gate afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
33842381Smikechr (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
33850Sstevel@tonic-gate
33860Sstevel@tonic-gate if (afsr_errs) {
33870Sstevel@tonic-gate
33880Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
33890Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt;
33900Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree();
33910Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
33920Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id;
33930Sstevel@tonic-gate aflt->flt_stat = cpu_error_regs.afsr;
33940Sstevel@tonic-gate aflt->flt_addr = cpu_error_regs.afar;
33950Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
33960Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
33970Sstevel@tonic-gate aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
33980Sstevel@tonic-gate aflt->flt_panic = 1;
33990Sstevel@tonic-gate ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
34000Sstevel@tonic-gate ch_flt.afsr_errs = afsr_errs;
34010Sstevel@tonic-gate #if defined(SERRANO)
34020Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2;
34030Sstevel@tonic-gate #endif /* SERRANO */
34040Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
34050Sstevel@tonic-gate }
34060Sstevel@tonic-gate }
34070Sstevel@tonic-gate
34080Sstevel@tonic-gate /*
34090Sstevel@tonic-gate * Routine to convert a syndrome into a syndrome code.
34100Sstevel@tonic-gate */
34110Sstevel@tonic-gate static int
synd_to_synd_code(int synd_status,ushort_t synd,uint64_t afsr_bit)34120Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
34130Sstevel@tonic-gate {
34140Sstevel@tonic-gate if (synd_status == AFLT_STAT_INVALID)
34150Sstevel@tonic-gate return (-1);
34160Sstevel@tonic-gate
34170Sstevel@tonic-gate /*
34180Sstevel@tonic-gate * Use the syndrome to index the appropriate syndrome table,
34190Sstevel@tonic-gate * to get the code indicating which bit(s) is(are) bad.
34200Sstevel@tonic-gate */
34210Sstevel@tonic-gate if (afsr_bit &
34220Sstevel@tonic-gate (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
34230Sstevel@tonic-gate if (afsr_bit & C_AFSR_MSYND_ERRS) {
34240Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
34250Sstevel@tonic-gate if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
34260Sstevel@tonic-gate return (-1);
34270Sstevel@tonic-gate else
34280Sstevel@tonic-gate return (BPAR0 + synd);
34290Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
34300Sstevel@tonic-gate if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
34310Sstevel@tonic-gate return (-1);
34320Sstevel@tonic-gate else
34330Sstevel@tonic-gate return (mtag_syndrome_tab[synd]);
34340Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
34350Sstevel@tonic-gate } else {
34360Sstevel@tonic-gate if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
34370Sstevel@tonic-gate return (-1);
34380Sstevel@tonic-gate else
34390Sstevel@tonic-gate return (ecc_syndrome_tab[synd]);
34400Sstevel@tonic-gate }
34410Sstevel@tonic-gate } else {
34420Sstevel@tonic-gate return (-1);
34430Sstevel@tonic-gate }
34440Sstevel@tonic-gate }
34450Sstevel@tonic-gate
34461186Sayznaga int
cpu_get_mem_sid(char * unum,char * buf,int buflen,int * lenp)34471186Sayznaga cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
34481186Sayznaga {
34491186Sayznaga if (&plat_get_mem_sid)
34501186Sayznaga return (plat_get_mem_sid(unum, buf, buflen, lenp));
34511186Sayznaga else
34521186Sayznaga return (ENOTSUP);
34531186Sayznaga }
34541186Sayznaga
34551186Sayznaga int
cpu_get_mem_offset(uint64_t flt_addr,uint64_t * offp)34561186Sayznaga cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
34571186Sayznaga {
34581186Sayznaga if (&plat_get_mem_offset)
34591186Sayznaga return (plat_get_mem_offset(flt_addr, offp));
34601186Sayznaga else
34611186Sayznaga return (ENOTSUP);
34621186Sayznaga }
34631186Sayznaga
34641186Sayznaga int
cpu_get_mem_addr(char * unum,char * sid,uint64_t offset,uint64_t * addrp)34651186Sayznaga cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
34661186Sayznaga {
34671186Sayznaga if (&plat_get_mem_addr)
34681186Sayznaga return (plat_get_mem_addr(unum, sid, offset, addrp));
34691186Sayznaga else
34701186Sayznaga return (ENOTSUP);
34711186Sayznaga }
34721186Sayznaga
34730Sstevel@tonic-gate /*
34740Sstevel@tonic-gate * Routine to return a string identifying the physical name
34750Sstevel@tonic-gate * associated with a memory/cache error.
34760Sstevel@tonic-gate */
34770Sstevel@tonic-gate int
cpu_get_mem_unum(int synd_status,ushort_t flt_synd,uint64_t flt_stat,uint64_t flt_addr,int flt_bus_id,int flt_in_memory,ushort_t flt_status,char * buf,int buflen,int * lenp)34780Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
34790Sstevel@tonic-gate uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
34800Sstevel@tonic-gate ushort_t flt_status, char *buf, int buflen, int *lenp)
34810Sstevel@tonic-gate {
34820Sstevel@tonic-gate int synd_code;
34830Sstevel@tonic-gate int ret;
34840Sstevel@tonic-gate
34850Sstevel@tonic-gate /*
34860Sstevel@tonic-gate * An AFSR of -1 defaults to a memory syndrome.
34870Sstevel@tonic-gate */
34880Sstevel@tonic-gate if (flt_stat == (uint64_t)-1)
34890Sstevel@tonic-gate flt_stat = C_AFSR_CE;
34900Sstevel@tonic-gate
34910Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
34920Sstevel@tonic-gate
34930Sstevel@tonic-gate /*
34940Sstevel@tonic-gate * Syndrome code must be either a single-bit error code
34950Sstevel@tonic-gate * (0...143) or -1 for unum lookup.
34960Sstevel@tonic-gate */
34970Sstevel@tonic-gate if (synd_code < 0 || synd_code >= M2)
34980Sstevel@tonic-gate synd_code = -1;
34990Sstevel@tonic-gate if (&plat_get_mem_unum) {
35000Sstevel@tonic-gate if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
35010Sstevel@tonic-gate flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
35020Sstevel@tonic-gate buf[0] = '\0';
35030Sstevel@tonic-gate *lenp = 0;
35040Sstevel@tonic-gate }
35050Sstevel@tonic-gate
35060Sstevel@tonic-gate return (ret);
35070Sstevel@tonic-gate }
35080Sstevel@tonic-gate
35090Sstevel@tonic-gate return (ENOTSUP);
35100Sstevel@tonic-gate }
35110Sstevel@tonic-gate
35120Sstevel@tonic-gate /*
35130Sstevel@tonic-gate * Wrapper for cpu_get_mem_unum() routine that takes an
35140Sstevel@tonic-gate * async_flt struct rather than explicit arguments.
35150Sstevel@tonic-gate */
35160Sstevel@tonic-gate int
cpu_get_mem_unum_aflt(int synd_status,struct async_flt * aflt,char * buf,int buflen,int * lenp)35170Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
35180Sstevel@tonic-gate char *buf, int buflen, int *lenp)
35190Sstevel@tonic-gate {
35200Sstevel@tonic-gate /*
35210Sstevel@tonic-gate * If we come thru here for an IO bus error aflt->flt_stat will
35220Sstevel@tonic-gate * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
35230Sstevel@tonic-gate * so it will interpret this as a memory error.
35240Sstevel@tonic-gate */
35250Sstevel@tonic-gate return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
35260Sstevel@tonic-gate (aflt->flt_class == BUS_FAULT) ?
35272381Smikechr (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
35280Sstevel@tonic-gate aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
35290Sstevel@tonic-gate aflt->flt_status, buf, buflen, lenp));
35300Sstevel@tonic-gate }
35310Sstevel@tonic-gate
35320Sstevel@tonic-gate /*
35332436Smb91622 * Return unum string given synd_code and async_flt into
35342436Smb91622 * the buf with size UNUM_NAMLEN
35352436Smb91622 */
35362436Smb91622 static int
cpu_get_mem_unum_synd(int synd_code,struct async_flt * aflt,char * buf)35372436Smb91622 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
35382436Smb91622 {
35392436Smb91622 int ret, len;
35402436Smb91622
35412436Smb91622 /*
35422436Smb91622 * Syndrome code must be either a single-bit error code
35432436Smb91622 * (0...143) or -1 for unum lookup.
35442436Smb91622 */
35452436Smb91622 if (synd_code < 0 || synd_code >= M2)
35462436Smb91622 synd_code = -1;
35472436Smb91622 if (&plat_get_mem_unum) {
35482436Smb91622 if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
35492436Smb91622 aflt->flt_bus_id, aflt->flt_in_memory,
35502436Smb91622 aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
35512436Smb91622 buf[0] = '\0';
35522436Smb91622 }
35532436Smb91622 return (ret);
35542436Smb91622 }
35552436Smb91622
35562436Smb91622 buf[0] = '\0';
35572436Smb91622 return (ENOTSUP);
35582436Smb91622 }
35592436Smb91622
35602436Smb91622 /*
35610Sstevel@tonic-gate * This routine is a more generic interface to cpu_get_mem_unum()
35622381Smikechr * that may be used by other modules (e.g. the 'mm' driver, through
35632381Smikechr * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
35642381Smikechr * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
35650Sstevel@tonic-gate */
35660Sstevel@tonic-gate int
cpu_get_mem_name(uint64_t synd,uint64_t * afsr,uint64_t afar,char * buf,int buflen,int * lenp)35670Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
35680Sstevel@tonic-gate char *buf, int buflen, int *lenp)
35690Sstevel@tonic-gate {
35700Sstevel@tonic-gate int synd_status, flt_in_memory, ret;
35710Sstevel@tonic-gate ushort_t flt_status = 0;
35720Sstevel@tonic-gate char unum[UNUM_NAMLEN];
35732381Smikechr uint64_t t_afsr_errs;
35740Sstevel@tonic-gate
35750Sstevel@tonic-gate /*
35760Sstevel@tonic-gate * Check for an invalid address.
35770Sstevel@tonic-gate */
35780Sstevel@tonic-gate if (afar == (uint64_t)-1)
35790Sstevel@tonic-gate return (ENXIO);
35800Sstevel@tonic-gate
35810Sstevel@tonic-gate if (synd == (uint64_t)-1)
35820Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID;
35830Sstevel@tonic-gate else
35840Sstevel@tonic-gate synd_status = AFLT_STAT_VALID;
35850Sstevel@tonic-gate
35860Sstevel@tonic-gate flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
35870Sstevel@tonic-gate pf_is_memory(afar >> MMU_PAGESHIFT);
35880Sstevel@tonic-gate
35890Sstevel@tonic-gate /*
35902381Smikechr * Get aggregate AFSR for call to cpu_error_is_ecache_data.
35912381Smikechr */
35922381Smikechr if (*afsr == (uint64_t)-1)
35932381Smikechr t_afsr_errs = C_AFSR_CE;
35942381Smikechr else {
35952381Smikechr t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
35962381Smikechr #if defined(CHEETAH_PLUS)
35972381Smikechr if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
35982381Smikechr t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
35992381Smikechr #endif /* CHEETAH_PLUS */
36002381Smikechr }
36012381Smikechr
36022381Smikechr /*
36032381Smikechr * Turn on ECC_ECACHE if error type is E$ Data.
36042381Smikechr */
36052381Smikechr if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
36062381Smikechr flt_status |= ECC_ECACHE;
36072381Smikechr
36082381Smikechr ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
36090Sstevel@tonic-gate CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
36100Sstevel@tonic-gate if (ret != 0)
36110Sstevel@tonic-gate return (ret);
36120Sstevel@tonic-gate
36130Sstevel@tonic-gate if (*lenp >= buflen)
36140Sstevel@tonic-gate return (ENAMETOOLONG);
36150Sstevel@tonic-gate
36160Sstevel@tonic-gate (void) strncpy(buf, unum, buflen);
36170Sstevel@tonic-gate
36180Sstevel@tonic-gate return (0);
36190Sstevel@tonic-gate }
36200Sstevel@tonic-gate
36210Sstevel@tonic-gate /*
36220Sstevel@tonic-gate * Routine to return memory information associated
36230Sstevel@tonic-gate * with a physical address and syndrome.
36240Sstevel@tonic-gate */
36250Sstevel@tonic-gate int
cpu_get_mem_info(uint64_t synd,uint64_t afar,uint64_t * mem_sizep,uint64_t * seg_sizep,uint64_t * bank_sizep,int * segsp,int * banksp,int * mcidp)36260Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar,
36270Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
36280Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp)
36290Sstevel@tonic-gate {
36300Sstevel@tonic-gate int synd_status, synd_code;
36310Sstevel@tonic-gate
36320Sstevel@tonic-gate if (afar == (uint64_t)-1)
36330Sstevel@tonic-gate return (ENXIO);
36340Sstevel@tonic-gate
36350Sstevel@tonic-gate if (synd == (uint64_t)-1)
36360Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID;
36370Sstevel@tonic-gate else
36380Sstevel@tonic-gate synd_status = AFLT_STAT_VALID;
36390Sstevel@tonic-gate
36400Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
36410Sstevel@tonic-gate
36420Sstevel@tonic-gate if (p2get_mem_info != NULL)
36430Sstevel@tonic-gate return ((p2get_mem_info)(synd_code, afar,
36445219Skm84432 mem_sizep, seg_sizep, bank_sizep,
36455219Skm84432 segsp, banksp, mcidp));
36460Sstevel@tonic-gate else
36470Sstevel@tonic-gate return (ENOTSUP);
36480Sstevel@tonic-gate }
36490Sstevel@tonic-gate
36500Sstevel@tonic-gate /*
36510Sstevel@tonic-gate * Routine to return a string identifying the physical
36520Sstevel@tonic-gate * name associated with a cpuid.
36530Sstevel@tonic-gate */
36540Sstevel@tonic-gate int
cpu_get_cpu_unum(int cpuid,char * buf,int buflen,int * lenp)36550Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
36560Sstevel@tonic-gate {
36570Sstevel@tonic-gate int ret;
36580Sstevel@tonic-gate char unum[UNUM_NAMLEN];
36590Sstevel@tonic-gate
36600Sstevel@tonic-gate if (&plat_get_cpu_unum) {
36610Sstevel@tonic-gate if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
36620Sstevel@tonic-gate != 0)
36630Sstevel@tonic-gate return (ret);
36640Sstevel@tonic-gate } else {
36650Sstevel@tonic-gate return (ENOTSUP);
36660Sstevel@tonic-gate }
36670Sstevel@tonic-gate
36680Sstevel@tonic-gate if (*lenp >= buflen)
36690Sstevel@tonic-gate return (ENAMETOOLONG);
36700Sstevel@tonic-gate
36710Sstevel@tonic-gate (void) strncpy(buf, unum, buflen);
36720Sstevel@tonic-gate
36730Sstevel@tonic-gate return (0);
36740Sstevel@tonic-gate }
36750Sstevel@tonic-gate
36760Sstevel@tonic-gate /*
36770Sstevel@tonic-gate * This routine exports the name buffer size.
36780Sstevel@tonic-gate */
36790Sstevel@tonic-gate size_t
cpu_get_name_bufsize()36800Sstevel@tonic-gate cpu_get_name_bufsize()
36810Sstevel@tonic-gate {
36820Sstevel@tonic-gate return (UNUM_NAMLEN);
36830Sstevel@tonic-gate }
36840Sstevel@tonic-gate
36850Sstevel@tonic-gate /*
36860Sstevel@tonic-gate * Historical function, apparantly not used.
36870Sstevel@tonic-gate */
36880Sstevel@tonic-gate /* ARGSUSED */
36890Sstevel@tonic-gate void
cpu_read_paddr(struct async_flt * ecc,short verbose,short ce_err)36900Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
36910Sstevel@tonic-gate {}
36920Sstevel@tonic-gate
36930Sstevel@tonic-gate /*
36940Sstevel@tonic-gate * Historical function only called for SBus errors in debugging.
36950Sstevel@tonic-gate */
36960Sstevel@tonic-gate /*ARGSUSED*/
36970Sstevel@tonic-gate void
read_ecc_data(struct async_flt * aflt,short verbose,short ce_err)36980Sstevel@tonic-gate read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
36990Sstevel@tonic-gate {}
37000Sstevel@tonic-gate
37010Sstevel@tonic-gate /*
37020Sstevel@tonic-gate * Clear the AFSR sticky bits. The routine returns a non-zero value if
37030Sstevel@tonic-gate * any of the AFSR's sticky errors are detected. If a non-null pointer to
37040Sstevel@tonic-gate * an async fault structure argument is passed in, the captured error state
37050Sstevel@tonic-gate * (AFSR, AFAR) info will be returned in the structure.
37060Sstevel@tonic-gate */
37070Sstevel@tonic-gate int
clear_errors(ch_async_flt_t * ch_flt)37080Sstevel@tonic-gate clear_errors(ch_async_flt_t *ch_flt)
37090Sstevel@tonic-gate {
37100Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
37110Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
37120Sstevel@tonic-gate
37130Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
37140Sstevel@tonic-gate
37150Sstevel@tonic-gate if (ch_flt != NULL) {
37160Sstevel@tonic-gate aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
37170Sstevel@tonic-gate aflt->flt_addr = cpu_error_regs.afar;
37180Sstevel@tonic-gate ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
37190Sstevel@tonic-gate ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
37200Sstevel@tonic-gate (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
37210Sstevel@tonic-gate #if defined(SERRANO)
37220Sstevel@tonic-gate ch_flt->afar2 = cpu_error_regs.afar2;
37230Sstevel@tonic-gate #endif /* SERRANO */
37240Sstevel@tonic-gate }
37250Sstevel@tonic-gate
37260Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs);
37270Sstevel@tonic-gate
37280Sstevel@tonic-gate return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
37290Sstevel@tonic-gate (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
37300Sstevel@tonic-gate }
37310Sstevel@tonic-gate
37320Sstevel@tonic-gate /*
37330Sstevel@tonic-gate * Clear any AFSR error bits, and check for persistence.
37340Sstevel@tonic-gate *
37350Sstevel@tonic-gate * It would be desirable to also insist that syndrome match. PCI handling
37360Sstevel@tonic-gate * has already filled flt_synd. For errors trapped by CPU we only fill
37370Sstevel@tonic-gate * flt_synd when we queue the event, so we do not have a valid flt_synd
37380Sstevel@tonic-gate * during initial classification (it is valid if we're called as part of
37390Sstevel@tonic-gate * subsequent low-pil additional classification attempts). We could try
37400Sstevel@tonic-gate * to determine which syndrome to use: we know we're only called for
37410Sstevel@tonic-gate * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
37420Sstevel@tonic-gate * would be esynd/none and esynd/msynd, respectively. If that is
37430Sstevel@tonic-gate * implemented then what do we do in the case that we do experience an
37440Sstevel@tonic-gate * error on the same afar but with different syndrome? At the very least
37450Sstevel@tonic-gate * we should count such occurences. Anyway, for now, we'll leave it as
37460Sstevel@tonic-gate * it has been for ages.
37470Sstevel@tonic-gate */
37480Sstevel@tonic-gate static int
clear_ecc(struct async_flt * aflt)37490Sstevel@tonic-gate clear_ecc(struct async_flt *aflt)
37500Sstevel@tonic-gate {
37510Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
37520Sstevel@tonic-gate
37530Sstevel@tonic-gate /*
37540Sstevel@tonic-gate * Snapshot the AFSR and AFAR and clear any errors
37550Sstevel@tonic-gate */
37560Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
37570Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs);
37580Sstevel@tonic-gate
37590Sstevel@tonic-gate /*
37600Sstevel@tonic-gate * If any of the same memory access error bits are still on and
37610Sstevel@tonic-gate * the AFAR matches, return that the error is persistent.
37620Sstevel@tonic-gate */
37630Sstevel@tonic-gate return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
37640Sstevel@tonic-gate cpu_error_regs.afar == aflt->flt_addr);
37650Sstevel@tonic-gate }
37660Sstevel@tonic-gate
37670Sstevel@tonic-gate /*
37680Sstevel@tonic-gate * Turn off all cpu error detection, normally only used for panics.
37690Sstevel@tonic-gate */
37700Sstevel@tonic-gate void
cpu_disable_errors(void)37710Sstevel@tonic-gate cpu_disable_errors(void)
37720Sstevel@tonic-gate {
37730Sstevel@tonic-gate xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3774960Srscott
3775960Srscott /*
3776960Srscott * With error detection now turned off, check the other cpus
3777960Srscott * logout areas for any unlogged errors.
3778960Srscott */
3779960Srscott if (enable_check_other_cpus_logout) {
3780960Srscott cpu_check_other_cpus_logout();
3781960Srscott /*
3782960Srscott * Make a second pass over the logout areas, in case
3783960Srscott * there is a failing CPU in an error-trap loop which
3784960Srscott * will write to the logout area once it is emptied.
3785960Srscott */
3786960Srscott cpu_check_other_cpus_logout();
3787960Srscott }
37880Sstevel@tonic-gate }
37890Sstevel@tonic-gate
37900Sstevel@tonic-gate /*
37910Sstevel@tonic-gate * Enable errors.
37920Sstevel@tonic-gate */
37930Sstevel@tonic-gate void
cpu_enable_errors(void)37940Sstevel@tonic-gate cpu_enable_errors(void)
37950Sstevel@tonic-gate {
37960Sstevel@tonic-gate xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
37970Sstevel@tonic-gate }
37980Sstevel@tonic-gate
37990Sstevel@tonic-gate /*
38000Sstevel@tonic-gate * Flush the entire ecache using displacement flush by reading through a
38010Sstevel@tonic-gate * physical address range twice as large as the Ecache.
38020Sstevel@tonic-gate */
38030Sstevel@tonic-gate void
cpu_flush_ecache(void)38040Sstevel@tonic-gate cpu_flush_ecache(void)
38050Sstevel@tonic-gate {
38060Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
38070Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize);
38080Sstevel@tonic-gate }
38090Sstevel@tonic-gate
38100Sstevel@tonic-gate /*
38110Sstevel@tonic-gate * Return CPU E$ set size - E$ size divided by the associativity.
38120Sstevel@tonic-gate * We use this function in places where the CPU_PRIVATE ptr may not be
38130Sstevel@tonic-gate * initialized yet. Note that for send_mondo and in the Ecache scrubber,
38140Sstevel@tonic-gate * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set
38150Sstevel@tonic-gate * up before the kernel switches from OBP's to the kernel's trap table, so
38160Sstevel@tonic-gate * we don't have to worry about cpunodes being unitialized.
38170Sstevel@tonic-gate */
38180Sstevel@tonic-gate int
cpu_ecache_set_size(struct cpu * cp)38190Sstevel@tonic-gate cpu_ecache_set_size(struct cpu *cp)
38200Sstevel@tonic-gate {
38210Sstevel@tonic-gate if (CPU_PRIVATE(cp))
38220Sstevel@tonic-gate return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
38230Sstevel@tonic-gate
38240Sstevel@tonic-gate return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
38250Sstevel@tonic-gate }
38260Sstevel@tonic-gate
38270Sstevel@tonic-gate /*
38280Sstevel@tonic-gate * Flush Ecache line.
38290Sstevel@tonic-gate * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
38300Sstevel@tonic-gate * Uses normal displacement flush for Cheetah.
38310Sstevel@tonic-gate */
38320Sstevel@tonic-gate static void
cpu_flush_ecache_line(ch_async_flt_t * ch_flt)38330Sstevel@tonic-gate cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
38340Sstevel@tonic-gate {
38350Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
38360Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU);
38370Sstevel@tonic-gate
38380Sstevel@tonic-gate ecache_flush_line(aflt->flt_addr, ec_set_size);
38390Sstevel@tonic-gate }
38400Sstevel@tonic-gate
38410Sstevel@tonic-gate /*
38420Sstevel@tonic-gate * Scrub physical address.
38430Sstevel@tonic-gate * Scrub code is different depending upon whether this a Cheetah+ with 2-way
38440Sstevel@tonic-gate * Ecache or direct-mapped Ecache.
38450Sstevel@tonic-gate */
38460Sstevel@tonic-gate static void
cpu_scrubphys(struct async_flt * aflt)38470Sstevel@tonic-gate cpu_scrubphys(struct async_flt *aflt)
38480Sstevel@tonic-gate {
38490Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU);
38500Sstevel@tonic-gate
38510Sstevel@tonic-gate scrubphys(aflt->flt_addr, ec_set_size);
38520Sstevel@tonic-gate }
38530Sstevel@tonic-gate
38540Sstevel@tonic-gate /*
38550Sstevel@tonic-gate * Clear physical address.
38560Sstevel@tonic-gate * Scrub code is different depending upon whether this a Cheetah+ with 2-way
38570Sstevel@tonic-gate * Ecache or direct-mapped Ecache.
38580Sstevel@tonic-gate */
38590Sstevel@tonic-gate void
cpu_clearphys(struct async_flt * aflt)38600Sstevel@tonic-gate cpu_clearphys(struct async_flt *aflt)
38610Sstevel@tonic-gate {
38620Sstevel@tonic-gate int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
38630Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU);
38640Sstevel@tonic-gate
38650Sstevel@tonic-gate
38664567Sanbui clearphys(aflt->flt_addr, ec_set_size, lsize);
38670Sstevel@tonic-gate }
38680Sstevel@tonic-gate
38690Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
38700Sstevel@tonic-gate /*
38710Sstevel@tonic-gate * Check for a matching valid line in all the sets.
38720Sstevel@tonic-gate * If found, return set# + 1. Otherwise return 0.
38730Sstevel@tonic-gate */
38740Sstevel@tonic-gate static int
cpu_ecache_line_valid(ch_async_flt_t * ch_flt)38750Sstevel@tonic-gate cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
38760Sstevel@tonic-gate {
38770Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
38780Sstevel@tonic-gate int totalsize = cpunodes[CPU->cpu_id].ecache_size;
38790Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU);
38800Sstevel@tonic-gate ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
38810Sstevel@tonic-gate int nway = cpu_ecache_nway();
38820Sstevel@tonic-gate int i;
38830Sstevel@tonic-gate
38840Sstevel@tonic-gate for (i = 0; i < nway; i++, ecp++) {
38850Sstevel@tonic-gate if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
38860Sstevel@tonic-gate (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
38870Sstevel@tonic-gate cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
38880Sstevel@tonic-gate return (i+1);
38890Sstevel@tonic-gate }
38900Sstevel@tonic-gate return (0);
38910Sstevel@tonic-gate }
38920Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
38930Sstevel@tonic-gate
38940Sstevel@tonic-gate /*
38950Sstevel@tonic-gate * Check whether a line in the given logout info matches the specified
38960Sstevel@tonic-gate * fault address. If reqval is set then the line must not be Invalid.
38970Sstevel@tonic-gate * Returns 0 on failure; on success (way + 1) is returned an *level is
38980Sstevel@tonic-gate * set to 2 for l2$ or 3 for l3$.
38990Sstevel@tonic-gate */
39000Sstevel@tonic-gate static int
cpu_matching_ecache_line(uint64_t faddr,void * data,int reqval,int * level)39010Sstevel@tonic-gate cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
39020Sstevel@tonic-gate {
39030Sstevel@tonic-gate ch_diag_data_t *cdp = data;
39040Sstevel@tonic-gate ch_ec_data_t *ecp;
39050Sstevel@tonic-gate int totalsize, ec_set_size;
39060Sstevel@tonic-gate int i, ways;
39070Sstevel@tonic-gate int match = 0;
39080Sstevel@tonic-gate int tagvalid;
39090Sstevel@tonic-gate uint64_t addr, tagpa;
39100Sstevel@tonic-gate int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
39110Sstevel@tonic-gate
39120Sstevel@tonic-gate /*
39130Sstevel@tonic-gate * Check the l2$ logout data
39140Sstevel@tonic-gate */
39150Sstevel@tonic-gate if (ispanther) {
39160Sstevel@tonic-gate ecp = &cdp->chd_l2_data[0];
39170Sstevel@tonic-gate ec_set_size = PN_L2_SET_SIZE;
39180Sstevel@tonic-gate ways = PN_L2_NWAYS;
39190Sstevel@tonic-gate } else {
39200Sstevel@tonic-gate ecp = &cdp->chd_ec_data[0];
39210Sstevel@tonic-gate ec_set_size = cpu_ecache_set_size(CPU);
39220Sstevel@tonic-gate ways = cpu_ecache_nway();
39230Sstevel@tonic-gate totalsize = cpunodes[CPU->cpu_id].ecache_size;
39240Sstevel@tonic-gate }
39250Sstevel@tonic-gate /* remove low order PA bits from fault address not used in PA tag */
39260Sstevel@tonic-gate addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
39270Sstevel@tonic-gate for (i = 0; i < ways; i++, ecp++) {
39280Sstevel@tonic-gate if (ispanther) {
39290Sstevel@tonic-gate tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
39300Sstevel@tonic-gate tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
39310Sstevel@tonic-gate } else {
39320Sstevel@tonic-gate tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
39330Sstevel@tonic-gate tagvalid = !cpu_ectag_line_invalid(totalsize,
39340Sstevel@tonic-gate ecp->ec_tag);
39350Sstevel@tonic-gate }
39360Sstevel@tonic-gate if (tagpa == addr && (!reqval || tagvalid)) {
39370Sstevel@tonic-gate match = i + 1;
39380Sstevel@tonic-gate *level = 2;
39390Sstevel@tonic-gate break;
39400Sstevel@tonic-gate }
39410Sstevel@tonic-gate }
39420Sstevel@tonic-gate
39430Sstevel@tonic-gate if (match || !ispanther)
39440Sstevel@tonic-gate return (match);
39450Sstevel@tonic-gate
39460Sstevel@tonic-gate /* For Panther we also check the l3$ */
39470Sstevel@tonic-gate ecp = &cdp->chd_ec_data[0];
39480Sstevel@tonic-gate ec_set_size = PN_L3_SET_SIZE;
39490Sstevel@tonic-gate ways = PN_L3_NWAYS;
39500Sstevel@tonic-gate addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
39510Sstevel@tonic-gate
39520Sstevel@tonic-gate for (i = 0; i < ways; i++, ecp++) {
39530Sstevel@tonic-gate if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
39540Sstevel@tonic-gate !PN_L3_LINE_INVALID(ecp->ec_tag))) {
39550Sstevel@tonic-gate match = i + 1;
39560Sstevel@tonic-gate *level = 3;
39570Sstevel@tonic-gate break;
39580Sstevel@tonic-gate }
39590Sstevel@tonic-gate }
39600Sstevel@tonic-gate
39610Sstevel@tonic-gate return (match);
39620Sstevel@tonic-gate }
39630Sstevel@tonic-gate
39640Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
39650Sstevel@tonic-gate /*
39660Sstevel@tonic-gate * Record information related to the source of an Dcache Parity Error.
39670Sstevel@tonic-gate */
39680Sstevel@tonic-gate static void
cpu_dcache_parity_info(ch_async_flt_t * ch_flt)39690Sstevel@tonic-gate cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
39700Sstevel@tonic-gate {
39710Sstevel@tonic-gate int dc_set_size = dcache_size / CH_DCACHE_NWAY;
39720Sstevel@tonic-gate int index;
39730Sstevel@tonic-gate
39740Sstevel@tonic-gate /*
39750Sstevel@tonic-gate * Since instruction decode cannot be done at high PIL
39760Sstevel@tonic-gate * just examine the entire Dcache to locate the error.
39770Sstevel@tonic-gate */
39780Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
39790Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = -1;
39800Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = -1;
39810Sstevel@tonic-gate }
39820Sstevel@tonic-gate for (index = 0; index < dc_set_size; index += dcache_linesize)
39830Sstevel@tonic-gate cpu_dcache_parity_check(ch_flt, index);
39840Sstevel@tonic-gate }
39850Sstevel@tonic-gate
39860Sstevel@tonic-gate /*
39870Sstevel@tonic-gate * Check all ways of the Dcache at a specified index for good parity.
39880Sstevel@tonic-gate */
39890Sstevel@tonic-gate static void
cpu_dcache_parity_check(ch_async_flt_t * ch_flt,int index)39900Sstevel@tonic-gate cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
39910Sstevel@tonic-gate {
39920Sstevel@tonic-gate int dc_set_size = dcache_size / CH_DCACHE_NWAY;
39930Sstevel@tonic-gate uint64_t parity_bits, pbits, data_word;
39940Sstevel@tonic-gate static int parity_bits_popc[] = { 0, 1, 1, 0 };
39950Sstevel@tonic-gate int way, word, data_byte;
39960Sstevel@tonic-gate ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
39970Sstevel@tonic-gate ch_dc_data_t tmp_dcp;
39980Sstevel@tonic-gate
39990Sstevel@tonic-gate for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
40000Sstevel@tonic-gate /*
40010Sstevel@tonic-gate * Perform diagnostic read.
40020Sstevel@tonic-gate */
40030Sstevel@tonic-gate get_dcache_dtag(index + way * dc_set_size,
40045219Skm84432 (uint64_t *)&tmp_dcp);
40050Sstevel@tonic-gate
40060Sstevel@tonic-gate /*
40070Sstevel@tonic-gate * Check tag for even parity.
40080Sstevel@tonic-gate * Sum of 1 bits (including parity bit) should be even.
40090Sstevel@tonic-gate */
40100Sstevel@tonic-gate if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
40110Sstevel@tonic-gate /*
40120Sstevel@tonic-gate * If this is the first error log detailed information
40130Sstevel@tonic-gate * about it and check the snoop tag. Otherwise just
40140Sstevel@tonic-gate * record the fact that we found another error.
40150Sstevel@tonic-gate */
40160Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
40170Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = way;
40180Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_cache =
40190Sstevel@tonic-gate CPU_DC_PARITY;
40200Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
40210Sstevel@tonic-gate
40220Sstevel@tonic-gate if (popc64(tmp_dcp.dc_sntag &
40235219Skm84432 CHP_DCSNTAG_PARMASK) & 1) {
40240Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_tag |=
40255219Skm84432 CHP_DC_SNTAG;
40260Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++;
40270Sstevel@tonic-gate }
40280Sstevel@tonic-gate
40290Sstevel@tonic-gate bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
40300Sstevel@tonic-gate }
40310Sstevel@tonic-gate
40320Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++;
40330Sstevel@tonic-gate }
40340Sstevel@tonic-gate
40350Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
40360Sstevel@tonic-gate /*
40370Sstevel@tonic-gate * Panther has more parity bits than the other
40380Sstevel@tonic-gate * processors for covering dcache data and so each
40390Sstevel@tonic-gate * byte of data in each word has its own parity bit.
40400Sstevel@tonic-gate */
40410Sstevel@tonic-gate parity_bits = tmp_dcp.dc_pn_data_parity;
40420Sstevel@tonic-gate for (word = 0; word < 4; word++) {
40430Sstevel@tonic-gate data_word = tmp_dcp.dc_data[word];
40440Sstevel@tonic-gate pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
40450Sstevel@tonic-gate for (data_byte = 0; data_byte < 8;
40460Sstevel@tonic-gate data_byte++) {
40470Sstevel@tonic-gate if (((popc64(data_word &
40480Sstevel@tonic-gate PN_DC_DATA_PARITY_MASK)) & 1) ^
40490Sstevel@tonic-gate (pbits & 1)) {
40500Sstevel@tonic-gate cpu_record_dc_data_parity(
40515219Skm84432 ch_flt, dcp, &tmp_dcp, way,
40525219Skm84432 word);
40530Sstevel@tonic-gate }
40540Sstevel@tonic-gate pbits >>= 1;
40550Sstevel@tonic-gate data_word >>= 8;
40560Sstevel@tonic-gate }
40570Sstevel@tonic-gate parity_bits >>= 8;
40580Sstevel@tonic-gate }
40590Sstevel@tonic-gate } else {
40600Sstevel@tonic-gate /*
40610Sstevel@tonic-gate * Check data array for even parity.
40620Sstevel@tonic-gate * The 8 parity bits are grouped into 4 pairs each
40630Sstevel@tonic-gate * of which covers a 64-bit word. The endianness is
40640Sstevel@tonic-gate * reversed -- the low-order parity bits cover the
40650Sstevel@tonic-gate * high-order data words.
40660Sstevel@tonic-gate */
40670Sstevel@tonic-gate parity_bits = tmp_dcp.dc_utag >> 8;
40680Sstevel@tonic-gate for (word = 0; word < 4; word++) {
40690Sstevel@tonic-gate pbits = (parity_bits >> (6 - word * 2)) & 3;
40700Sstevel@tonic-gate if ((popc64(tmp_dcp.dc_data[word]) +
40710Sstevel@tonic-gate parity_bits_popc[pbits]) & 1) {
40720Sstevel@tonic-gate cpu_record_dc_data_parity(ch_flt, dcp,
40730Sstevel@tonic-gate &tmp_dcp, way, word);
40740Sstevel@tonic-gate }
40750Sstevel@tonic-gate }
40760Sstevel@tonic-gate }
40770Sstevel@tonic-gate }
40780Sstevel@tonic-gate }
40790Sstevel@tonic-gate
40800Sstevel@tonic-gate static void
cpu_record_dc_data_parity(ch_async_flt_t * ch_flt,ch_dc_data_t * dest_dcp,ch_dc_data_t * src_dcp,int way,int word)40810Sstevel@tonic-gate cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
40820Sstevel@tonic-gate ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
40830Sstevel@tonic-gate {
40840Sstevel@tonic-gate /*
40850Sstevel@tonic-gate * If this is the first error log detailed information about it.
40860Sstevel@tonic-gate * Otherwise just record the fact that we found another error.
40870Sstevel@tonic-gate */
40880Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
40890Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = way;
40900Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
40910Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = word * 8;
40920Sstevel@tonic-gate bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
40930Sstevel@tonic-gate }
40940Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++;
40950Sstevel@tonic-gate }
40960Sstevel@tonic-gate
40970Sstevel@tonic-gate /*
40980Sstevel@tonic-gate * Record information related to the source of an Icache Parity Error.
40990Sstevel@tonic-gate *
41000Sstevel@tonic-gate * Called with the Icache disabled so any diagnostic accesses are safe.
41010Sstevel@tonic-gate */
41020Sstevel@tonic-gate static void
cpu_icache_parity_info(ch_async_flt_t * ch_flt)41030Sstevel@tonic-gate cpu_icache_parity_info(ch_async_flt_t *ch_flt)
41040Sstevel@tonic-gate {
41050Sstevel@tonic-gate int ic_set_size;
41060Sstevel@tonic-gate int ic_linesize;
41070Sstevel@tonic-gate int index;
41080Sstevel@tonic-gate
41090Sstevel@tonic-gate if (CPU_PRIVATE(CPU)) {
41100Sstevel@tonic-gate ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
41110Sstevel@tonic-gate CH_ICACHE_NWAY;
41120Sstevel@tonic-gate ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
41130Sstevel@tonic-gate } else {
41140Sstevel@tonic-gate ic_set_size = icache_size / CH_ICACHE_NWAY;
41150Sstevel@tonic-gate ic_linesize = icache_linesize;
41160Sstevel@tonic-gate }
41170Sstevel@tonic-gate
41180Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_way = -1;
41190Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_off = -1;
41200Sstevel@tonic-gate
41210Sstevel@tonic-gate for (index = 0; index < ic_set_size; index += ic_linesize)
41220Sstevel@tonic-gate cpu_icache_parity_check(ch_flt, index);
41230Sstevel@tonic-gate }
41240Sstevel@tonic-gate
41250Sstevel@tonic-gate /*
41260Sstevel@tonic-gate * Check all ways of the Icache at a specified index for good parity.
41270Sstevel@tonic-gate */
41280Sstevel@tonic-gate static void
cpu_icache_parity_check(ch_async_flt_t * ch_flt,int index)41290Sstevel@tonic-gate cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
41300Sstevel@tonic-gate {
41310Sstevel@tonic-gate uint64_t parmask, pn_inst_parity;
41320Sstevel@tonic-gate int ic_set_size;
41330Sstevel@tonic-gate int ic_linesize;
41340Sstevel@tonic-gate int flt_index, way, instr, num_instr;
41350Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
41360Sstevel@tonic-gate ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
41370Sstevel@tonic-gate ch_ic_data_t tmp_icp;
41380Sstevel@tonic-gate
41390Sstevel@tonic-gate if (CPU_PRIVATE(CPU)) {
41400Sstevel@tonic-gate ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
41410Sstevel@tonic-gate CH_ICACHE_NWAY;
41420Sstevel@tonic-gate ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
41430Sstevel@tonic-gate } else {
41440Sstevel@tonic-gate ic_set_size = icache_size / CH_ICACHE_NWAY;
41450Sstevel@tonic-gate ic_linesize = icache_linesize;
41460Sstevel@tonic-gate }
41470Sstevel@tonic-gate
41480Sstevel@tonic-gate /*
41490Sstevel@tonic-gate * Panther has twice as many instructions per icache line and the
41500Sstevel@tonic-gate * instruction parity bit is in a different location.
41510Sstevel@tonic-gate */
41520Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
41530Sstevel@tonic-gate num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
41540Sstevel@tonic-gate pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
41550Sstevel@tonic-gate } else {
41560Sstevel@tonic-gate num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
41570Sstevel@tonic-gate pn_inst_parity = 0;
41580Sstevel@tonic-gate }
41590Sstevel@tonic-gate
41600Sstevel@tonic-gate /*
41610Sstevel@tonic-gate * Index at which we expect to find the parity error.
41620Sstevel@tonic-gate */
41630Sstevel@tonic-gate flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
41640Sstevel@tonic-gate
41650Sstevel@tonic-gate for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
41660Sstevel@tonic-gate /*
41670Sstevel@tonic-gate * Diagnostic reads expect address argument in ASI format.
41680Sstevel@tonic-gate */
41690Sstevel@tonic-gate get_icache_dtag(2 * (index + way * ic_set_size),
41705219Skm84432 (uint64_t *)&tmp_icp);
41710Sstevel@tonic-gate
41720Sstevel@tonic-gate /*
41730Sstevel@tonic-gate * If this is the index in which we expect to find the
41740Sstevel@tonic-gate * error log detailed information about each of the ways.
41750Sstevel@tonic-gate * This information will be displayed later if we can't
41760Sstevel@tonic-gate * determine the exact way in which the error is located.
41770Sstevel@tonic-gate */
41780Sstevel@tonic-gate if (flt_index == index)
41790Sstevel@tonic-gate bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
41800Sstevel@tonic-gate
41810Sstevel@tonic-gate /*
41820Sstevel@tonic-gate * Check tag for even parity.
41830Sstevel@tonic-gate * Sum of 1 bits (including parity bit) should be even.
41840Sstevel@tonic-gate */
41850Sstevel@tonic-gate if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
41860Sstevel@tonic-gate /*
41870Sstevel@tonic-gate * If this way is the one in which we expected
41880Sstevel@tonic-gate * to find the error record the way and check the
41890Sstevel@tonic-gate * snoop tag. Otherwise just record the fact we
41900Sstevel@tonic-gate * found another error.
41910Sstevel@tonic-gate */
41920Sstevel@tonic-gate if (flt_index == index) {
41930Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_way = way;
41940Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
41950Sstevel@tonic-gate
41960Sstevel@tonic-gate if (popc64(tmp_icp.ic_sntag &
41975219Skm84432 CHP_ICSNTAG_PARMASK) & 1) {
41980Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_tag |=
41995219Skm84432 CHP_IC_SNTAG;
42000Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_lcnt++;
42010Sstevel@tonic-gate }
42020Sstevel@tonic-gate
42030Sstevel@tonic-gate }
42040Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_lcnt++;
42050Sstevel@tonic-gate continue;
42060Sstevel@tonic-gate }
42070Sstevel@tonic-gate
42080Sstevel@tonic-gate /*
42090Sstevel@tonic-gate * Check instruction data for even parity.
42100Sstevel@tonic-gate * Bits participating in parity differ for PC-relative
42110Sstevel@tonic-gate * versus non-PC-relative instructions.
42120Sstevel@tonic-gate */
42130Sstevel@tonic-gate for (instr = 0; instr < num_instr; instr++) {
42140Sstevel@tonic-gate parmask = (tmp_icp.ic_data[instr] &
42155219Skm84432 CH_ICDATA_PRED_ISPCREL) ?
42165219Skm84432 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
42175219Skm84432 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
42180Sstevel@tonic-gate if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
42190Sstevel@tonic-gate /*
42200Sstevel@tonic-gate * If this way is the one in which we expected
42210Sstevel@tonic-gate * to find the error record the way and offset.
42220Sstevel@tonic-gate * Otherwise just log the fact we found another
42230Sstevel@tonic-gate * error.
42240Sstevel@tonic-gate */
42250Sstevel@tonic-gate if (flt_index == index) {
42260Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_way = way;
42270Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_off =
42285219Skm84432 instr * 4;
42290Sstevel@tonic-gate }
42300Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_lcnt++;
42310Sstevel@tonic-gate continue;
42320Sstevel@tonic-gate }
42330Sstevel@tonic-gate }
42340Sstevel@tonic-gate }
42350Sstevel@tonic-gate }
42360Sstevel@tonic-gate
42370Sstevel@tonic-gate /*
42380Sstevel@tonic-gate * Record information related to the source of an Pcache Parity Error.
42390Sstevel@tonic-gate */
42400Sstevel@tonic-gate static void
cpu_pcache_parity_info(ch_async_flt_t * ch_flt)42410Sstevel@tonic-gate cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
42420Sstevel@tonic-gate {
42430Sstevel@tonic-gate int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
42440Sstevel@tonic-gate int index;
42450Sstevel@tonic-gate
42460Sstevel@tonic-gate /*
42470Sstevel@tonic-gate * Since instruction decode cannot be done at high PIL just
42480Sstevel@tonic-gate * examine the entire Pcache to check for any parity errors.
42490Sstevel@tonic-gate */
42500Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
42510Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = -1;
42520Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = -1;
42530Sstevel@tonic-gate }
42540Sstevel@tonic-gate for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
42550Sstevel@tonic-gate cpu_pcache_parity_check(ch_flt, index);
42560Sstevel@tonic-gate }
42570Sstevel@tonic-gate
42580Sstevel@tonic-gate /*
42590Sstevel@tonic-gate * Check all ways of the Pcache at a specified index for good parity.
42600Sstevel@tonic-gate */
42610Sstevel@tonic-gate static void
cpu_pcache_parity_check(ch_async_flt_t * ch_flt,int index)42620Sstevel@tonic-gate cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
42630Sstevel@tonic-gate {
42640Sstevel@tonic-gate int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
42650Sstevel@tonic-gate int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
42660Sstevel@tonic-gate int way, word, pbit, parity_bits;
42670Sstevel@tonic-gate ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
42680Sstevel@tonic-gate ch_pc_data_t tmp_pcp;
42690Sstevel@tonic-gate
42700Sstevel@tonic-gate for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
42710Sstevel@tonic-gate /*
42720Sstevel@tonic-gate * Perform diagnostic read.
42730Sstevel@tonic-gate */
42740Sstevel@tonic-gate get_pcache_dtag(index + way * pc_set_size,
42755219Skm84432 (uint64_t *)&tmp_pcp);
42760Sstevel@tonic-gate /*
42770Sstevel@tonic-gate * Check data array for odd parity. There are 8 parity
42780Sstevel@tonic-gate * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
42790Sstevel@tonic-gate * of those bits covers exactly 8 bytes of the data
42800Sstevel@tonic-gate * array:
42810Sstevel@tonic-gate *
42820Sstevel@tonic-gate * parity bit P$ data bytes covered
42830Sstevel@tonic-gate * ---------- ---------------------
42840Sstevel@tonic-gate * 50 63:56
42850Sstevel@tonic-gate * 51 55:48
42860Sstevel@tonic-gate * 52 47:40
42870Sstevel@tonic-gate * 53 39:32
42880Sstevel@tonic-gate * 54 31:24
42890Sstevel@tonic-gate * 55 23:16
42900Sstevel@tonic-gate * 56 15:8
42910Sstevel@tonic-gate * 57 7:0
42920Sstevel@tonic-gate */
42930Sstevel@tonic-gate parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
42940Sstevel@tonic-gate for (word = 0; word < pc_data_words; word++) {
42950Sstevel@tonic-gate pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
42960Sstevel@tonic-gate if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
42970Sstevel@tonic-gate /*
42980Sstevel@tonic-gate * If this is the first error log detailed
42990Sstevel@tonic-gate * information about it. Otherwise just record
43000Sstevel@tonic-gate * the fact that we found another error.
43010Sstevel@tonic-gate */
43020Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
43030Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = way;
43040Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_cache =
43050Sstevel@tonic-gate CPU_PC_PARITY;
43060Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off =
43070Sstevel@tonic-gate word * sizeof (uint64_t);
43080Sstevel@tonic-gate bcopy(&tmp_pcp, pcp,
43095219Skm84432 sizeof (ch_pc_data_t));
43100Sstevel@tonic-gate }
43110Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++;
43120Sstevel@tonic-gate }
43130Sstevel@tonic-gate }
43140Sstevel@tonic-gate }
43150Sstevel@tonic-gate }
43160Sstevel@tonic-gate
43170Sstevel@tonic-gate
43180Sstevel@tonic-gate /*
43190Sstevel@tonic-gate * Add L1 Data cache data to the ereport payload.
43200Sstevel@tonic-gate */
43210Sstevel@tonic-gate static void
cpu_payload_add_dcache(struct async_flt * aflt,nvlist_t * nvl)43220Sstevel@tonic-gate cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
43230Sstevel@tonic-gate {
43240Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
43250Sstevel@tonic-gate ch_dc_data_t *dcp;
43260Sstevel@tonic-gate ch_dc_data_t dcdata[CH_DCACHE_NWAY];
43270Sstevel@tonic-gate uint_t nelem;
43280Sstevel@tonic-gate int i, ways_to_check, ways_logged = 0;
43290Sstevel@tonic-gate
43300Sstevel@tonic-gate /*
43310Sstevel@tonic-gate * If this is an D$ fault then there may be multiple
43320Sstevel@tonic-gate * ways captured in the ch_parity_log_t structure.
43330Sstevel@tonic-gate * Otherwise, there will be at most one way captured
43340Sstevel@tonic-gate * in the ch_diag_data_t struct.
43350Sstevel@tonic-gate * Check each way to see if it should be encoded.
43360Sstevel@tonic-gate */
43370Sstevel@tonic-gate if (ch_flt->flt_type == CPU_DC_PARITY)
43380Sstevel@tonic-gate ways_to_check = CH_DCACHE_NWAY;
43390Sstevel@tonic-gate else
43400Sstevel@tonic-gate ways_to_check = 1;
43410Sstevel@tonic-gate for (i = 0; i < ways_to_check; i++) {
43420Sstevel@tonic-gate if (ch_flt->flt_type == CPU_DC_PARITY)
43430Sstevel@tonic-gate dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
43440Sstevel@tonic-gate else
43450Sstevel@tonic-gate dcp = &ch_flt->flt_diag_data.chd_dc_data;
43460Sstevel@tonic-gate if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
43470Sstevel@tonic-gate bcopy(dcp, &dcdata[ways_logged],
43485219Skm84432 sizeof (ch_dc_data_t));
43490Sstevel@tonic-gate ways_logged++;
43500Sstevel@tonic-gate }
43510Sstevel@tonic-gate }
43520Sstevel@tonic-gate
43530Sstevel@tonic-gate /*
43540Sstevel@tonic-gate * Add the dcache data to the payload.
43550Sstevel@tonic-gate */
43560Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
43570Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
43580Sstevel@tonic-gate if (ways_logged != 0) {
43590Sstevel@tonic-gate nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
43600Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
43610Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
43620Sstevel@tonic-gate }
43630Sstevel@tonic-gate }
43640Sstevel@tonic-gate
43650Sstevel@tonic-gate /*
43660Sstevel@tonic-gate * Add L1 Instruction cache data to the ereport payload.
43670Sstevel@tonic-gate */
43680Sstevel@tonic-gate static void
cpu_payload_add_icache(struct async_flt * aflt,nvlist_t * nvl)43690Sstevel@tonic-gate cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
43700Sstevel@tonic-gate {
43710Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
43720Sstevel@tonic-gate ch_ic_data_t *icp;
43730Sstevel@tonic-gate ch_ic_data_t icdata[CH_ICACHE_NWAY];
43740Sstevel@tonic-gate uint_t nelem;
43750Sstevel@tonic-gate int i, ways_to_check, ways_logged = 0;
43760Sstevel@tonic-gate
43770Sstevel@tonic-gate /*
43780Sstevel@tonic-gate * If this is an I$ fault then there may be multiple
43790Sstevel@tonic-gate * ways captured in the ch_parity_log_t structure.
43800Sstevel@tonic-gate * Otherwise, there will be at most one way captured
43810Sstevel@tonic-gate * in the ch_diag_data_t struct.
43820Sstevel@tonic-gate * Check each way to see if it should be encoded.
43830Sstevel@tonic-gate */
43840Sstevel@tonic-gate if (ch_flt->flt_type == CPU_IC_PARITY)
43850Sstevel@tonic-gate ways_to_check = CH_ICACHE_NWAY;
43860Sstevel@tonic-gate else
43870Sstevel@tonic-gate ways_to_check = 1;
43880Sstevel@tonic-gate for (i = 0; i < ways_to_check; i++) {
43890Sstevel@tonic-gate if (ch_flt->flt_type == CPU_IC_PARITY)
43900Sstevel@tonic-gate icp = &ch_flt->parity_data.ipe.cpl_ic[i];
43910Sstevel@tonic-gate else
43920Sstevel@tonic-gate icp = &ch_flt->flt_diag_data.chd_ic_data;
43930Sstevel@tonic-gate if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
43940Sstevel@tonic-gate bcopy(icp, &icdata[ways_logged],
43955219Skm84432 sizeof (ch_ic_data_t));
43960Sstevel@tonic-gate ways_logged++;
43970Sstevel@tonic-gate }
43980Sstevel@tonic-gate }
43990Sstevel@tonic-gate
44000Sstevel@tonic-gate /*
44010Sstevel@tonic-gate * Add the icache data to the payload.
44020Sstevel@tonic-gate */
44030Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
44040Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
44050Sstevel@tonic-gate if (ways_logged != 0) {
44060Sstevel@tonic-gate nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
44070Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
44080Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
44090Sstevel@tonic-gate }
44100Sstevel@tonic-gate }
44110Sstevel@tonic-gate
44120Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
44130Sstevel@tonic-gate
44140Sstevel@tonic-gate /*
44150Sstevel@tonic-gate * Add ecache data to payload.
44160Sstevel@tonic-gate */
44170Sstevel@tonic-gate static void
cpu_payload_add_ecache(struct async_flt * aflt,nvlist_t * nvl)44180Sstevel@tonic-gate cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
44190Sstevel@tonic-gate {
44200Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
44210Sstevel@tonic-gate ch_ec_data_t *ecp;
44220Sstevel@tonic-gate ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
44230Sstevel@tonic-gate uint_t nelem;
44240Sstevel@tonic-gate int i, ways_logged = 0;
44250Sstevel@tonic-gate
44260Sstevel@tonic-gate /*
44270Sstevel@tonic-gate * Check each way to see if it should be encoded
44280Sstevel@tonic-gate * and concatinate it into a temporary buffer.
44290Sstevel@tonic-gate */
44300Sstevel@tonic-gate for (i = 0; i < CHD_EC_DATA_SETS; i++) {
44310Sstevel@tonic-gate ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
44320Sstevel@tonic-gate if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
44330Sstevel@tonic-gate bcopy(ecp, &ecdata[ways_logged],
44345219Skm84432 sizeof (ch_ec_data_t));
44350Sstevel@tonic-gate ways_logged++;
44360Sstevel@tonic-gate }
44370Sstevel@tonic-gate }
44380Sstevel@tonic-gate
44390Sstevel@tonic-gate /*
44400Sstevel@tonic-gate * Panther CPUs have an additional level of cache and so
44410Sstevel@tonic-gate * what we just collected was the L3 (ecache) and not the
44420Sstevel@tonic-gate * L2 cache.
44430Sstevel@tonic-gate */
44440Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
44450Sstevel@tonic-gate /*
44460Sstevel@tonic-gate * Add the L3 (ecache) data to the payload.
44470Sstevel@tonic-gate */
44480Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
44490Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
44500Sstevel@tonic-gate if (ways_logged != 0) {
44510Sstevel@tonic-gate nelem = sizeof (ch_ec_data_t) /
44520Sstevel@tonic-gate sizeof (uint64_t) * ways_logged;
44530Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
44540Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem,
44550Sstevel@tonic-gate (uint64_t *)ecdata, NULL);
44560Sstevel@tonic-gate }
44570Sstevel@tonic-gate
44580Sstevel@tonic-gate /*
44590Sstevel@tonic-gate * Now collect the L2 cache.
44600Sstevel@tonic-gate */
44610Sstevel@tonic-gate ways_logged = 0;
44620Sstevel@tonic-gate for (i = 0; i < PN_L2_NWAYS; i++) {
44630Sstevel@tonic-gate ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
44640Sstevel@tonic-gate if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
44650Sstevel@tonic-gate bcopy(ecp, &ecdata[ways_logged],
44660Sstevel@tonic-gate sizeof (ch_ec_data_t));
44670Sstevel@tonic-gate ways_logged++;
44680Sstevel@tonic-gate }
44690Sstevel@tonic-gate }
44700Sstevel@tonic-gate }
44710Sstevel@tonic-gate
44720Sstevel@tonic-gate /*
44730Sstevel@tonic-gate * Add the L2 cache data to the payload.
44740Sstevel@tonic-gate */
44750Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
44760Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
44770Sstevel@tonic-gate if (ways_logged != 0) {
44780Sstevel@tonic-gate nelem = sizeof (ch_ec_data_t) /
44795219Skm84432 sizeof (uint64_t) * ways_logged;
44800Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
44810Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL);
44820Sstevel@tonic-gate }
44830Sstevel@tonic-gate }
44840Sstevel@tonic-gate
44850Sstevel@tonic-gate /*
44862381Smikechr * Initialize cpu scheme for specified cpu.
44872381Smikechr */
44882381Smikechr static void
cpu_fmri_cpu_set(nvlist_t * cpu_fmri,int cpuid)44892381Smikechr cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
44902381Smikechr {
44912381Smikechr char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
44922381Smikechr uint8_t mask;
44932381Smikechr
44942381Smikechr mask = cpunodes[cpuid].version;
44952381Smikechr (void) snprintf(sbuf, sizeof (sbuf), "%llX",
44962381Smikechr (u_longlong_t)cpunodes[cpuid].device_id);
44972381Smikechr (void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
44982381Smikechr cpuid, &mask, (const char *)sbuf);
44992381Smikechr }
45002381Smikechr
45012381Smikechr /*
45022381Smikechr * Returns ereport resource type.
45032381Smikechr */
45042381Smikechr static int
cpu_error_to_resource_type(struct async_flt * aflt)45052381Smikechr cpu_error_to_resource_type(struct async_flt *aflt)
45062381Smikechr {
45072381Smikechr ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
45082381Smikechr
45092381Smikechr switch (ch_flt->flt_type) {
45102381Smikechr
45112381Smikechr case CPU_CE_ECACHE:
45122381Smikechr case CPU_UE_ECACHE:
45132381Smikechr case CPU_UE_ECACHE_RETIRE:
45142381Smikechr case CPU_ORPH:
45152381Smikechr /*
45162381Smikechr * If AFSR error bit indicates L2$ Data for Cheetah,
45172381Smikechr * Cheetah+ or Jaguar, or L3$ Data for Panther, return
45182381Smikechr * E$ Data type, otherwise, return CPU type.
45192381Smikechr */
45202381Smikechr if (cpu_error_is_ecache_data(aflt->flt_inst,
45212381Smikechr ch_flt->flt_bit))
45222381Smikechr return (ERRTYPE_ECACHE_DATA);
45232381Smikechr return (ERRTYPE_CPU);
45242381Smikechr
45252381Smikechr case CPU_CE:
45262381Smikechr case CPU_UE:
45272381Smikechr case CPU_EMC:
45282381Smikechr case CPU_DUE:
45292381Smikechr case CPU_RCE:
45302381Smikechr case CPU_RUE:
45312381Smikechr case CPU_FRC:
45322381Smikechr case CPU_FRU:
45332381Smikechr return (ERRTYPE_MEMORY);
45342381Smikechr
45352381Smikechr case CPU_IC_PARITY:
45362381Smikechr case CPU_DC_PARITY:
45372381Smikechr case CPU_FPUERR:
45382381Smikechr case CPU_PC_PARITY:
45392381Smikechr case CPU_ITLB_PARITY:
45402381Smikechr case CPU_DTLB_PARITY:
45412381Smikechr return (ERRTYPE_CPU);
45422381Smikechr }
45432381Smikechr return (ERRTYPE_UNKNOWN);
45442381Smikechr }
45452381Smikechr
45462381Smikechr /*
45470Sstevel@tonic-gate * Encode the data saved in the ch_async_flt_t struct into
45480Sstevel@tonic-gate * the FM ereport payload.
45490Sstevel@tonic-gate */
45500Sstevel@tonic-gate static void
cpu_payload_add_aflt(struct async_flt * aflt,nvlist_t * payload,nvlist_t * resource,int * afar_status,int * synd_status)45510Sstevel@tonic-gate cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
45520Sstevel@tonic-gate nvlist_t *resource, int *afar_status, int *synd_status)
45530Sstevel@tonic-gate {
45540Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
45550Sstevel@tonic-gate *synd_status = AFLT_STAT_INVALID;
45560Sstevel@tonic-gate *afar_status = AFLT_STAT_INVALID;
45570Sstevel@tonic-gate
45580Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
45590Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
45600Sstevel@tonic-gate DATA_TYPE_UINT64, aflt->flt_stat, NULL);
45610Sstevel@tonic-gate }
45620Sstevel@tonic-gate
45630Sstevel@tonic-gate if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
45640Sstevel@tonic-gate IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
45650Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
45660Sstevel@tonic-gate DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
45670Sstevel@tonic-gate }
45680Sstevel@tonic-gate
45690Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
45700Sstevel@tonic-gate *afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
45710Sstevel@tonic-gate ch_flt->flt_bit);
45720Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
45730Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
45740Sstevel@tonic-gate }
45750Sstevel@tonic-gate
45760Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
45770Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
45780Sstevel@tonic-gate DATA_TYPE_UINT64, aflt->flt_addr, NULL);
45790Sstevel@tonic-gate }
45800Sstevel@tonic-gate
45810Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
45820Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
45830Sstevel@tonic-gate DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
45840Sstevel@tonic-gate }
45850Sstevel@tonic-gate
45860Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
45870Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
45880Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
45890Sstevel@tonic-gate }
45900Sstevel@tonic-gate
45910Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
45920Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
45930Sstevel@tonic-gate DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
45940Sstevel@tonic-gate }
45950Sstevel@tonic-gate
45960Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
45970Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
45980Sstevel@tonic-gate DATA_TYPE_BOOLEAN_VALUE,
45990Sstevel@tonic-gate (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
46000Sstevel@tonic-gate }
46010Sstevel@tonic-gate
46020Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
46030Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
46040Sstevel@tonic-gate DATA_TYPE_BOOLEAN_VALUE,
46050Sstevel@tonic-gate (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
46060Sstevel@tonic-gate }
46070Sstevel@tonic-gate
46080Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
46090Sstevel@tonic-gate *synd_status = afsr_to_synd_status(aflt->flt_inst,
46100Sstevel@tonic-gate ch_flt->afsr_errs, ch_flt->flt_bit);
46110Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
46120Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
46130Sstevel@tonic-gate }
46140Sstevel@tonic-gate
46150Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
46160Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
46170Sstevel@tonic-gate DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
46180Sstevel@tonic-gate }
46190Sstevel@tonic-gate
46200Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
46210Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
46220Sstevel@tonic-gate DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
46230Sstevel@tonic-gate }
46240Sstevel@tonic-gate
46250Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
46260Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
46270Sstevel@tonic-gate DATA_TYPE_UINT64, aflt->flt_disp, NULL);
46280Sstevel@tonic-gate }
46290Sstevel@tonic-gate
46300Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
46310Sstevel@tonic-gate cpu_payload_add_ecache(aflt, payload);
46320Sstevel@tonic-gate
46330Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
46340Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
46350Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
46360Sstevel@tonic-gate }
46370Sstevel@tonic-gate
46380Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
46390Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
46400Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
46410Sstevel@tonic-gate }
46420Sstevel@tonic-gate
46430Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
46440Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
46450Sstevel@tonic-gate DATA_TYPE_UINT32_ARRAY, 16,
46460Sstevel@tonic-gate (uint32_t *)&ch_flt->flt_fpdata, NULL);
46470Sstevel@tonic-gate }
46480Sstevel@tonic-gate
46490Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
46500Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
46510Sstevel@tonic-gate cpu_payload_add_dcache(aflt, payload);
46520Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
46530Sstevel@tonic-gate cpu_payload_add_icache(aflt, payload);
46540Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
46550Sstevel@tonic-gate
46560Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
46570Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
46580Sstevel@tonic-gate cpu_payload_add_pcache(aflt, payload);
46590Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
46600Sstevel@tonic-gate cpu_payload_add_tlb(aflt, payload);
46610Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
46620Sstevel@tonic-gate /*
46630Sstevel@tonic-gate * Create the FMRI that goes into the payload
46640Sstevel@tonic-gate * and contains the unum info if necessary.
46650Sstevel@tonic-gate */
46662381Smikechr if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
46671186Sayznaga char unum[UNUM_NAMLEN] = "";
46681186Sayznaga char sid[DIMM_SERIAL_ID_LEN] = "";
46692436Smb91622 int len, ret, rtype, synd_code;
46702381Smikechr uint64_t offset = (uint64_t)-1;
46712381Smikechr
46722381Smikechr rtype = cpu_error_to_resource_type(aflt);
46732381Smikechr switch (rtype) {
46742381Smikechr
46752381Smikechr case ERRTYPE_MEMORY:
46762381Smikechr case ERRTYPE_ECACHE_DATA:
46772381Smikechr
46782381Smikechr /*
46792381Smikechr * Memory errors, do unum lookup
46802381Smikechr */
46812381Smikechr if (*afar_status == AFLT_STAT_INVALID)
46822381Smikechr break;
46832381Smikechr
46842381Smikechr if (rtype == ERRTYPE_ECACHE_DATA)
46852381Smikechr aflt->flt_status |= ECC_ECACHE;
46862381Smikechr else
46872381Smikechr aflt->flt_status &= ~ECC_ECACHE;
46882381Smikechr
46892436Smb91622 synd_code = synd_to_synd_code(*synd_status,
46902436Smb91622 aflt->flt_synd, ch_flt->flt_bit);
46912436Smb91622
46922436Smb91622 if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
46932381Smikechr break;
46941186Sayznaga
46951186Sayznaga ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
46961186Sayznaga &len);
46971186Sayznaga
46981186Sayznaga if (ret == 0) {
46991186Sayznaga (void) cpu_get_mem_offset(aflt->flt_addr,
47001186Sayznaga &offset);
47011186Sayznaga }
47021186Sayznaga
47030Sstevel@tonic-gate fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
47041186Sayznaga NULL, unum, (ret == 0) ? sid : NULL, offset);
47050Sstevel@tonic-gate fm_payload_set(payload,
47060Sstevel@tonic-gate FM_EREPORT_PAYLOAD_NAME_RESOURCE,
47070Sstevel@tonic-gate DATA_TYPE_NVLIST, resource, NULL);
47082381Smikechr break;
47092381Smikechr
47102381Smikechr case ERRTYPE_CPU:
47112381Smikechr /*
47122381Smikechr * On-board processor array error, add cpu resource.
47132381Smikechr */
47142381Smikechr cpu_fmri_cpu_set(resource, aflt->flt_inst);
47152381Smikechr fm_payload_set(payload,
47162381Smikechr FM_EREPORT_PAYLOAD_NAME_RESOURCE,
47172381Smikechr DATA_TYPE_NVLIST, resource, NULL);
47182381Smikechr break;
47190Sstevel@tonic-gate }
47200Sstevel@tonic-gate }
47210Sstevel@tonic-gate }
47220Sstevel@tonic-gate
47230Sstevel@tonic-gate /*
47240Sstevel@tonic-gate * Initialize the way info if necessary.
47250Sstevel@tonic-gate */
47260Sstevel@tonic-gate void
cpu_ereport_init(struct async_flt * aflt)47270Sstevel@tonic-gate cpu_ereport_init(struct async_flt *aflt)
47280Sstevel@tonic-gate {
47290Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
47300Sstevel@tonic-gate ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
47310Sstevel@tonic-gate ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
47320Sstevel@tonic-gate int i;
47330Sstevel@tonic-gate
47340Sstevel@tonic-gate /*
47350Sstevel@tonic-gate * Initialize the info in the CPU logout structure.
47360Sstevel@tonic-gate * The I$/D$ way information is not initialized here
47370Sstevel@tonic-gate * since it is captured in the logout assembly code.
47380Sstevel@tonic-gate */
47390Sstevel@tonic-gate for (i = 0; i < CHD_EC_DATA_SETS; i++)
47400Sstevel@tonic-gate (ecp + i)->ec_way = i;
47410Sstevel@tonic-gate
47420Sstevel@tonic-gate for (i = 0; i < PN_L2_NWAYS; i++)
47430Sstevel@tonic-gate (l2p + i)->ec_way = i;
47440Sstevel@tonic-gate }
47450Sstevel@tonic-gate
47460Sstevel@tonic-gate /*
47470Sstevel@tonic-gate * Returns whether fault address is valid for this error bit and
47480Sstevel@tonic-gate * whether the address is "in memory" (i.e. pf_is_memory returns 1).
47490Sstevel@tonic-gate */
47500Sstevel@tonic-gate int
cpu_flt_in_memory(ch_async_flt_t * ch_flt,uint64_t t_afsr_bit)47510Sstevel@tonic-gate cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
47520Sstevel@tonic-gate {
47530Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
47540Sstevel@tonic-gate
47552381Smikechr return ((t_afsr_bit & C_AFSR_MEMORY) &&
47560Sstevel@tonic-gate afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
47570Sstevel@tonic-gate AFLT_STAT_VALID &&
47580Sstevel@tonic-gate pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
47590Sstevel@tonic-gate }
47600Sstevel@tonic-gate
47612436Smb91622 /*
47622436Smb91622 * Returns whether fault address is valid based on the error bit for the
47632436Smb91622 * one event being queued and whether the address is "in memory".
47642436Smb91622 */
47652436Smb91622 static int
cpu_flt_in_memory_one_event(ch_async_flt_t * ch_flt,uint64_t t_afsr_bit)47662436Smb91622 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
47672436Smb91622 {
47682436Smb91622 struct async_flt *aflt = (struct async_flt *)ch_flt;
47692436Smb91622 int afar_status;
47702436Smb91622 uint64_t afsr_errs, afsr_ow, *ow_bits;
47712436Smb91622
47722436Smb91622 if (!(t_afsr_bit & C_AFSR_MEMORY) ||
47732436Smb91622 !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
47742436Smb91622 return (0);
47752436Smb91622
47762436Smb91622 afsr_errs = ch_flt->afsr_errs;
47772436Smb91622 afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
47782436Smb91622
47792436Smb91622 switch (afar_status) {
47802436Smb91622 case AFLT_STAT_VALID:
47812436Smb91622 return (1);
47822436Smb91622
47832436Smb91622 case AFLT_STAT_AMBIGUOUS:
47842436Smb91622 /*
47852436Smb91622 * Status is ambiguous since another error bit (or bits)
47862436Smb91622 * of equal priority to the specified bit on in the afsr,
47872436Smb91622 * so check those bits. Return 1 only if the bits on in the
47882436Smb91622 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
47892436Smb91622 * Otherwise not all the equal priority bits are for memory
47902436Smb91622 * errors, so return 0.
47912436Smb91622 */
47922436Smb91622 ow_bits = afar_overwrite;
47932436Smb91622 while ((afsr_ow = *ow_bits++) != 0) {
47942436Smb91622 /*
47952436Smb91622 * Get other bits that are on in t_afsr_bit's priority
47962436Smb91622 * class to check for Memory Error bits only.
47972436Smb91622 */
47982436Smb91622 if (afsr_ow & t_afsr_bit) {
47992436Smb91622 if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
48002436Smb91622 return (0);
48012436Smb91622 else
48022436Smb91622 return (1);
48032436Smb91622 }
48042436Smb91622 }
48052436Smb91622 /*FALLTHRU*/
48062436Smb91622
48072436Smb91622 default:
48082436Smb91622 return (0);
48092436Smb91622 }
48102436Smb91622 }
48112436Smb91622
48120Sstevel@tonic-gate static void
cpu_log_diag_info(ch_async_flt_t * ch_flt)48130Sstevel@tonic-gate cpu_log_diag_info(ch_async_flt_t *ch_flt)
48140Sstevel@tonic-gate {
48150Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
48160Sstevel@tonic-gate ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
48170Sstevel@tonic-gate ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
48180Sstevel@tonic-gate ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
48190Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
48200Sstevel@tonic-gate int i, nway;
48210Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
48220Sstevel@tonic-gate
48230Sstevel@tonic-gate /*
48240Sstevel@tonic-gate * Check if the CPU log out captured was valid.
48250Sstevel@tonic-gate */
48260Sstevel@tonic-gate if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
48270Sstevel@tonic-gate ch_flt->flt_data_incomplete)
48280Sstevel@tonic-gate return;
48290Sstevel@tonic-gate
48300Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
48310Sstevel@tonic-gate nway = cpu_ecache_nway();
48320Sstevel@tonic-gate i = cpu_ecache_line_valid(ch_flt);
48330Sstevel@tonic-gate if (i == 0 || i > nway) {
48340Sstevel@tonic-gate for (i = 0; i < nway; i++)
48350Sstevel@tonic-gate ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
48360Sstevel@tonic-gate } else
48370Sstevel@tonic-gate ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
48380Sstevel@tonic-gate #else /* CPU_IMP_ECACHE_ASSOC */
48390Sstevel@tonic-gate ecp->ec_logflag = EC_LOGFLAG_MAGIC;
48400Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
48410Sstevel@tonic-gate
48420Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
48430Sstevel@tonic-gate pn_cpu_log_diag_l2_info(ch_flt);
48440Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
48450Sstevel@tonic-gate
48460Sstevel@tonic-gate if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
48470Sstevel@tonic-gate dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
48480Sstevel@tonic-gate dcp->dc_logflag = DC_LOGFLAG_MAGIC;
48490Sstevel@tonic-gate }
48500Sstevel@tonic-gate
48510Sstevel@tonic-gate if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
48520Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
48530Sstevel@tonic-gate icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
48540Sstevel@tonic-gate else
48550Sstevel@tonic-gate icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
48560Sstevel@tonic-gate icp->ic_logflag = IC_LOGFLAG_MAGIC;
48570Sstevel@tonic-gate }
48580Sstevel@tonic-gate }
48590Sstevel@tonic-gate
48600Sstevel@tonic-gate /*
48610Sstevel@tonic-gate * Cheetah ECC calculation.
48620Sstevel@tonic-gate *
48630Sstevel@tonic-gate * We only need to do the calculation on the data bits and can ignore check
48640Sstevel@tonic-gate * bit and Mtag bit terms in the calculation.
48650Sstevel@tonic-gate */
48660Sstevel@tonic-gate static uint64_t ch_ecc_table[9][2] = {
48670Sstevel@tonic-gate /*
48680Sstevel@tonic-gate * low order 64-bits high-order 64-bits
48690Sstevel@tonic-gate */
48700Sstevel@tonic-gate { 0x46bffffeccd1177f, 0x488800022100014c },
48710Sstevel@tonic-gate { 0x42fccc81331ff77f, 0x14424f1010249184 },
48720Sstevel@tonic-gate { 0x8898827c222f1ffe, 0x22c1222808184aaf },
48730Sstevel@tonic-gate { 0xf7632203e131ccf1, 0xe1241121848292b8 },
48740Sstevel@tonic-gate { 0x7f5511421b113809, 0x901c88d84288aafe },
48750Sstevel@tonic-gate { 0x1d49412184882487, 0x8f338c87c044c6ef },
48760Sstevel@tonic-gate { 0xf552181014448344, 0x7ff8f4443e411911 },
48770Sstevel@tonic-gate { 0x2189240808f24228, 0xfeeff8cc81333f42 },
48780Sstevel@tonic-gate { 0x3280008440001112, 0xfee88b337ffffd62 },
48790Sstevel@tonic-gate };
48800Sstevel@tonic-gate
48810Sstevel@tonic-gate /*
48820Sstevel@tonic-gate * 64-bit population count, use well-known popcnt trick.
48830Sstevel@tonic-gate * We could use the UltraSPARC V9 POPC instruction, but some
48840Sstevel@tonic-gate * CPUs including Cheetahplus and Jaguar do not support that
48850Sstevel@tonic-gate * instruction.
48860Sstevel@tonic-gate */
48870Sstevel@tonic-gate int
popc64(uint64_t val)48880Sstevel@tonic-gate popc64(uint64_t val)
48890Sstevel@tonic-gate {
48900Sstevel@tonic-gate int cnt;
48910Sstevel@tonic-gate
48920Sstevel@tonic-gate for (cnt = 0; val != 0; val &= val - 1)
48930Sstevel@tonic-gate cnt++;
48940Sstevel@tonic-gate return (cnt);
48950Sstevel@tonic-gate }
48960Sstevel@tonic-gate
48970Sstevel@tonic-gate /*
48980Sstevel@tonic-gate * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
48990Sstevel@tonic-gate * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
49000Sstevel@tonic-gate * of 1 bits == 0, so we can just use the least significant bit of the popcnt
49010Sstevel@tonic-gate * instead of doing all the xor's.
49020Sstevel@tonic-gate */
49030Sstevel@tonic-gate uint32_t
us3_gen_ecc(uint64_t data_low,uint64_t data_high)49040Sstevel@tonic-gate us3_gen_ecc(uint64_t data_low, uint64_t data_high)
49050Sstevel@tonic-gate {
49060Sstevel@tonic-gate int bitno, s;
49070Sstevel@tonic-gate int synd = 0;
49080Sstevel@tonic-gate
49090Sstevel@tonic-gate for (bitno = 0; bitno < 9; bitno++) {
49100Sstevel@tonic-gate s = (popc64(data_low & ch_ecc_table[bitno][0]) +
49110Sstevel@tonic-gate popc64(data_high & ch_ecc_table[bitno][1])) & 1;
49120Sstevel@tonic-gate synd |= (s << bitno);
49130Sstevel@tonic-gate }
49140Sstevel@tonic-gate return (synd);
49150Sstevel@tonic-gate
49160Sstevel@tonic-gate }
49170Sstevel@tonic-gate
49180Sstevel@tonic-gate /*
49190Sstevel@tonic-gate * Queue one event based on ecc_type_to_info entry. If the event has an AFT1
49200Sstevel@tonic-gate * tag associated with it or is a fatal event (aflt_panic set), it is sent to
49210Sstevel@tonic-gate * the UE event queue. Otherwise it is dispatched to the CE event queue.
49220Sstevel@tonic-gate */
49230Sstevel@tonic-gate static void
cpu_queue_one_event(ch_async_flt_t * ch_flt,char * reason,ecc_type_to_info_t * eccp,ch_diag_data_t * cdp)49240Sstevel@tonic-gate cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
49250Sstevel@tonic-gate ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
49260Sstevel@tonic-gate {
49270Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
49280Sstevel@tonic-gate
49290Sstevel@tonic-gate if (reason &&
49300Sstevel@tonic-gate strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
49310Sstevel@tonic-gate (void) strcat(reason, eccp->ec_reason);
49320Sstevel@tonic-gate }
49330Sstevel@tonic-gate
49340Sstevel@tonic-gate ch_flt->flt_bit = eccp->ec_afsr_bit;
49350Sstevel@tonic-gate ch_flt->flt_type = eccp->ec_flt_type;
49360Sstevel@tonic-gate if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
49370Sstevel@tonic-gate ch_flt->flt_diag_data = *cdp;
49380Sstevel@tonic-gate else
49390Sstevel@tonic-gate ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
49402436Smb91622 aflt->flt_in_memory =
49412436Smb91622 cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
49420Sstevel@tonic-gate
49430Sstevel@tonic-gate if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
49440Sstevel@tonic-gate aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
49450Sstevel@tonic-gate else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
49460Sstevel@tonic-gate aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
49470Sstevel@tonic-gate else
49480Sstevel@tonic-gate aflt->flt_synd = 0;
49490Sstevel@tonic-gate
49500Sstevel@tonic-gate aflt->flt_payload = eccp->ec_err_payload;
49510Sstevel@tonic-gate
49520Sstevel@tonic-gate if (aflt->flt_panic || (eccp->ec_afsr_bit &
49530Sstevel@tonic-gate (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
49540Sstevel@tonic-gate cpu_errorq_dispatch(eccp->ec_err_class,
49550Sstevel@tonic-gate (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
49560Sstevel@tonic-gate aflt->flt_panic);
49570Sstevel@tonic-gate else
49580Sstevel@tonic-gate cpu_errorq_dispatch(eccp->ec_err_class,
49590Sstevel@tonic-gate (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
49600Sstevel@tonic-gate aflt->flt_panic);
49610Sstevel@tonic-gate }
49620Sstevel@tonic-gate
49630Sstevel@tonic-gate /*
49640Sstevel@tonic-gate * Queue events on async event queue one event per error bit. First we
49650Sstevel@tonic-gate * queue the events that we "expect" for the given trap, then we queue events
49660Sstevel@tonic-gate * that we may not expect. Return number of events queued.
49670Sstevel@tonic-gate */
49680Sstevel@tonic-gate int
cpu_queue_events(ch_async_flt_t * ch_flt,char * reason,uint64_t t_afsr_errs,ch_cpu_logout_t * clop)49690Sstevel@tonic-gate cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
49700Sstevel@tonic-gate ch_cpu_logout_t *clop)
49710Sstevel@tonic-gate {
49720Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
49730Sstevel@tonic-gate ecc_type_to_info_t *eccp;
49740Sstevel@tonic-gate int nevents = 0;
49750Sstevel@tonic-gate uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
49760Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
49770Sstevel@tonic-gate uint64_t orig_t_afsr_errs;
49780Sstevel@tonic-gate #endif
49790Sstevel@tonic-gate uint64_t primary_afsr_ext = ch_flt->afsr_ext;
49800Sstevel@tonic-gate uint64_t primary_afsr_errs = ch_flt->afsr_errs;
49810Sstevel@tonic-gate ch_diag_data_t *cdp = NULL;
49820Sstevel@tonic-gate
49830Sstevel@tonic-gate t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
49840Sstevel@tonic-gate
49850Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
49860Sstevel@tonic-gate orig_t_afsr_errs = t_afsr_errs;
49870Sstevel@tonic-gate
49880Sstevel@tonic-gate /*
49890Sstevel@tonic-gate * For Cheetah+, log the shadow AFSR/AFAR bits first.
49900Sstevel@tonic-gate */
49910Sstevel@tonic-gate if (clop != NULL) {
49920Sstevel@tonic-gate /*
49930Sstevel@tonic-gate * Set the AFSR and AFAR fields to the shadow registers. The
49940Sstevel@tonic-gate * flt_addr and flt_stat fields will be reset to the primaries
49950Sstevel@tonic-gate * below, but the sdw_addr and sdw_stat will stay as the
49960Sstevel@tonic-gate * secondaries.
49970Sstevel@tonic-gate */
49980Sstevel@tonic-gate cdp = &clop->clo_sdw_data;
49990Sstevel@tonic-gate aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
50000Sstevel@tonic-gate aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
50010Sstevel@tonic-gate ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
50020Sstevel@tonic-gate ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
50030Sstevel@tonic-gate (cdp->chd_afsr & C_AFSR_ALL_ERRS);
50040Sstevel@tonic-gate
50050Sstevel@tonic-gate /*
50060Sstevel@tonic-gate * If the primary and shadow AFSR differ, tag the shadow as
50070Sstevel@tonic-gate * the first fault.
50080Sstevel@tonic-gate */
50090Sstevel@tonic-gate if ((primary_afar != cdp->chd_afar) ||
50100Sstevel@tonic-gate (primary_afsr_errs != ch_flt->afsr_errs)) {
50110Sstevel@tonic-gate aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
50120Sstevel@tonic-gate }
50130Sstevel@tonic-gate
50140Sstevel@tonic-gate /*
50150Sstevel@tonic-gate * Check AFSR bits as well as AFSR_EXT bits in order of
50160Sstevel@tonic-gate * the AFAR overwrite priority. Our stored AFSR_EXT value
50170Sstevel@tonic-gate * is expected to be zero for those CPUs which do not have
50180Sstevel@tonic-gate * an AFSR_EXT register.
50190Sstevel@tonic-gate */
50200Sstevel@tonic-gate for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
50210Sstevel@tonic-gate if ((eccp->ec_afsr_bit &
50220Sstevel@tonic-gate (ch_flt->afsr_errs & t_afsr_errs)) &&
50230Sstevel@tonic-gate ((eccp->ec_flags & aflt->flt_status) != 0)) {
50240Sstevel@tonic-gate cpu_queue_one_event(ch_flt, reason, eccp, cdp);
50250Sstevel@tonic-gate cdp = NULL;
50260Sstevel@tonic-gate t_afsr_errs &= ~eccp->ec_afsr_bit;
50270Sstevel@tonic-gate nevents++;
50280Sstevel@tonic-gate }
50290Sstevel@tonic-gate }
50300Sstevel@tonic-gate
50310Sstevel@tonic-gate /*
50320Sstevel@tonic-gate * If the ME bit is on in the primary AFSR turn all the
50330Sstevel@tonic-gate * error bits on again that may set the ME bit to make
50340Sstevel@tonic-gate * sure we see the ME AFSR error logs.
50350Sstevel@tonic-gate */
50360Sstevel@tonic-gate if ((primary_afsr & C_AFSR_ME) != 0)
50370Sstevel@tonic-gate t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
50380Sstevel@tonic-gate }
50390Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
50400Sstevel@tonic-gate
50410Sstevel@tonic-gate if (clop != NULL)
50420Sstevel@tonic-gate cdp = &clop->clo_data;
50430Sstevel@tonic-gate
50440Sstevel@tonic-gate /*
50450Sstevel@tonic-gate * Queue expected errors, error bit and fault type must match
50460Sstevel@tonic-gate * in the ecc_type_to_info table.
50470Sstevel@tonic-gate */
50480Sstevel@tonic-gate for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
50490Sstevel@tonic-gate eccp++) {
50500Sstevel@tonic-gate if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
50510Sstevel@tonic-gate (eccp->ec_flags & aflt->flt_status) != 0) {
50520Sstevel@tonic-gate #if defined(SERRANO)
50530Sstevel@tonic-gate /*
50540Sstevel@tonic-gate * For FRC/FRU errors on Serrano the afar2 captures
50550Sstevel@tonic-gate * the address and the associated data is
50560Sstevel@tonic-gate * in the shadow logout area.
50570Sstevel@tonic-gate */
50580Sstevel@tonic-gate if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) {
50590Sstevel@tonic-gate if (clop != NULL)
50600Sstevel@tonic-gate cdp = &clop->clo_sdw_data;
50610Sstevel@tonic-gate aflt->flt_addr = ch_flt->afar2;
50620Sstevel@tonic-gate } else {
50630Sstevel@tonic-gate if (clop != NULL)
50640Sstevel@tonic-gate cdp = &clop->clo_data;
50650Sstevel@tonic-gate aflt->flt_addr = primary_afar;
50660Sstevel@tonic-gate }
50670Sstevel@tonic-gate #else /* SERRANO */
50680Sstevel@tonic-gate aflt->flt_addr = primary_afar;
50690Sstevel@tonic-gate #endif /* SERRANO */
50700Sstevel@tonic-gate aflt->flt_stat = primary_afsr;
50710Sstevel@tonic-gate ch_flt->afsr_ext = primary_afsr_ext;
50720Sstevel@tonic-gate ch_flt->afsr_errs = primary_afsr_errs;
50730Sstevel@tonic-gate cpu_queue_one_event(ch_flt, reason, eccp, cdp);
50740Sstevel@tonic-gate cdp = NULL;
50750Sstevel@tonic-gate t_afsr_errs &= ~eccp->ec_afsr_bit;
50760Sstevel@tonic-gate nevents++;
50770Sstevel@tonic-gate }
50780Sstevel@tonic-gate }
50790Sstevel@tonic-gate
50800Sstevel@tonic-gate /*
50810Sstevel@tonic-gate * Queue unexpected errors, error bit only match.
50820Sstevel@tonic-gate */
50830Sstevel@tonic-gate for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
50840Sstevel@tonic-gate eccp++) {
50850Sstevel@tonic-gate if (eccp->ec_afsr_bit & t_afsr_errs) {
50860Sstevel@tonic-gate #if defined(SERRANO)
50870Sstevel@tonic-gate /*
50880Sstevel@tonic-gate * For FRC/FRU errors on Serrano the afar2 captures
50890Sstevel@tonic-gate * the address and the associated data is
50900Sstevel@tonic-gate * in the shadow logout area.
50910Sstevel@tonic-gate */
50920Sstevel@tonic-gate if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) {
50930Sstevel@tonic-gate if (clop != NULL)
50940Sstevel@tonic-gate cdp = &clop->clo_sdw_data;
50950Sstevel@tonic-gate aflt->flt_addr = ch_flt->afar2;
50960Sstevel@tonic-gate } else {
50970Sstevel@tonic-gate if (clop != NULL)
50980Sstevel@tonic-gate cdp = &clop->clo_data;
50990Sstevel@tonic-gate aflt->flt_addr = primary_afar;
51000Sstevel@tonic-gate }
51010Sstevel@tonic-gate #else /* SERRANO */
51020Sstevel@tonic-gate aflt->flt_addr = primary_afar;
51030Sstevel@tonic-gate #endif /* SERRANO */
51040Sstevel@tonic-gate aflt->flt_stat = primary_afsr;
51050Sstevel@tonic-gate ch_flt->afsr_ext = primary_afsr_ext;
51060Sstevel@tonic-gate ch_flt->afsr_errs = primary_afsr_errs;
51070Sstevel@tonic-gate cpu_queue_one_event(ch_flt, reason, eccp, cdp);
51080Sstevel@tonic-gate cdp = NULL;
51090Sstevel@tonic-gate t_afsr_errs &= ~eccp->ec_afsr_bit;
51100Sstevel@tonic-gate nevents++;
51110Sstevel@tonic-gate }
51120Sstevel@tonic-gate }
51130Sstevel@tonic-gate return (nevents);
51140Sstevel@tonic-gate }
51150Sstevel@tonic-gate
51160Sstevel@tonic-gate /*
51170Sstevel@tonic-gate * Return trap type number.
51180Sstevel@tonic-gate */
51190Sstevel@tonic-gate uint8_t
flt_to_trap_type(struct async_flt * aflt)51200Sstevel@tonic-gate flt_to_trap_type(struct async_flt *aflt)
51210Sstevel@tonic-gate {
51220Sstevel@tonic-gate if (aflt->flt_status & ECC_I_TRAP)
51230Sstevel@tonic-gate return (TRAP_TYPE_ECC_I);
51240Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP)
51250Sstevel@tonic-gate return (TRAP_TYPE_ECC_D);
51260Sstevel@tonic-gate if (aflt->flt_status & ECC_F_TRAP)
51270Sstevel@tonic-gate return (TRAP_TYPE_ECC_F);
51280Sstevel@tonic-gate if (aflt->flt_status & ECC_C_TRAP)
51290Sstevel@tonic-gate return (TRAP_TYPE_ECC_C);
51300Sstevel@tonic-gate if (aflt->flt_status & ECC_DP_TRAP)
51310Sstevel@tonic-gate return (TRAP_TYPE_ECC_DP);
51320Sstevel@tonic-gate if (aflt->flt_status & ECC_IP_TRAP)
51330Sstevel@tonic-gate return (TRAP_TYPE_ECC_IP);
51340Sstevel@tonic-gate if (aflt->flt_status & ECC_ITLB_TRAP)
51350Sstevel@tonic-gate return (TRAP_TYPE_ECC_ITLB);
51360Sstevel@tonic-gate if (aflt->flt_status & ECC_DTLB_TRAP)
51370Sstevel@tonic-gate return (TRAP_TYPE_ECC_DTLB);
51380Sstevel@tonic-gate return (TRAP_TYPE_UNKNOWN);
51390Sstevel@tonic-gate }
51400Sstevel@tonic-gate
51410Sstevel@tonic-gate /*
51420Sstevel@tonic-gate * Decide an error type based on detector and leaky/partner tests.
51430Sstevel@tonic-gate * The following array is used for quick translation - it must
51440Sstevel@tonic-gate * stay in sync with ce_dispact_t.
51450Sstevel@tonic-gate */
51460Sstevel@tonic-gate
51470Sstevel@tonic-gate static char *cetypes[] = {
51480Sstevel@tonic-gate CE_DISP_DESC_U,
51490Sstevel@tonic-gate CE_DISP_DESC_I,
51500Sstevel@tonic-gate CE_DISP_DESC_PP,
51510Sstevel@tonic-gate CE_DISP_DESC_P,
51520Sstevel@tonic-gate CE_DISP_DESC_L,
51530Sstevel@tonic-gate CE_DISP_DESC_PS,
51540Sstevel@tonic-gate CE_DISP_DESC_S
51550Sstevel@tonic-gate };
51560Sstevel@tonic-gate
51570Sstevel@tonic-gate char *
flt_to_error_type(struct async_flt * aflt)51580Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt)
51590Sstevel@tonic-gate {
51600Sstevel@tonic-gate ce_dispact_t dispact, disp;
51610Sstevel@tonic-gate uchar_t dtcrinfo, ptnrinfo, lkyinfo;
51620Sstevel@tonic-gate
51630Sstevel@tonic-gate /*
51640Sstevel@tonic-gate * The memory payload bundle is shared by some events that do
51650Sstevel@tonic-gate * not perform any classification. For those flt_disp will be
51660Sstevel@tonic-gate * 0 and we will return "unknown".
51670Sstevel@tonic-gate */
51680Sstevel@tonic-gate if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
51690Sstevel@tonic-gate return (cetypes[CE_DISP_UNKNOWN]);
51700Sstevel@tonic-gate
51710Sstevel@tonic-gate dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
51720Sstevel@tonic-gate
51730Sstevel@tonic-gate /*
51740Sstevel@tonic-gate * It is also possible that no scrub/classification was performed
51750Sstevel@tonic-gate * by the detector, for instance where a disrupting error logged
51760Sstevel@tonic-gate * in the AFSR while CEEN was off in cpu_deferred_error.
51770Sstevel@tonic-gate */
51780Sstevel@tonic-gate if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
51790Sstevel@tonic-gate return (cetypes[CE_DISP_UNKNOWN]);
51800Sstevel@tonic-gate
51810Sstevel@tonic-gate /*
51820Sstevel@tonic-gate * Lookup type in initial classification/action table
51830Sstevel@tonic-gate */
51840Sstevel@tonic-gate dispact = CE_DISPACT(ce_disp_table,
51850Sstevel@tonic-gate CE_XDIAG_AFARMATCHED(dtcrinfo),
51860Sstevel@tonic-gate CE_XDIAG_STATE(dtcrinfo),
51870Sstevel@tonic-gate CE_XDIAG_CE1SEEN(dtcrinfo),
51880Sstevel@tonic-gate CE_XDIAG_CE2SEEN(dtcrinfo));
51890Sstevel@tonic-gate
51900Sstevel@tonic-gate /*
51910Sstevel@tonic-gate * A bad lookup is not something to panic production systems for.
51920Sstevel@tonic-gate */
51930Sstevel@tonic-gate ASSERT(dispact != CE_DISP_BAD);
51940Sstevel@tonic-gate if (dispact == CE_DISP_BAD)
51950Sstevel@tonic-gate return (cetypes[CE_DISP_UNKNOWN]);
51960Sstevel@tonic-gate
51970Sstevel@tonic-gate disp = CE_DISP(dispact);
51980Sstevel@tonic-gate
51990Sstevel@tonic-gate switch (disp) {
52000Sstevel@tonic-gate case CE_DISP_UNKNOWN:
52010Sstevel@tonic-gate case CE_DISP_INTERMITTENT:
52020Sstevel@tonic-gate break;
52030Sstevel@tonic-gate
52040Sstevel@tonic-gate case CE_DISP_POSS_PERS:
52050Sstevel@tonic-gate /*
52060Sstevel@tonic-gate * "Possible persistent" errors to which we have applied a valid
52070Sstevel@tonic-gate * leaky test can be separated into "persistent" or "leaky".
52080Sstevel@tonic-gate */
52090Sstevel@tonic-gate lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
52100Sstevel@tonic-gate if (CE_XDIAG_TESTVALID(lkyinfo)) {
52110Sstevel@tonic-gate if (CE_XDIAG_CE1SEEN(lkyinfo) ||
52120Sstevel@tonic-gate CE_XDIAG_CE2SEEN(lkyinfo))
52130Sstevel@tonic-gate disp = CE_DISP_LEAKY;
52140Sstevel@tonic-gate else
52150Sstevel@tonic-gate disp = CE_DISP_PERS;
52160Sstevel@tonic-gate }
52170Sstevel@tonic-gate break;
52180Sstevel@tonic-gate
52190Sstevel@tonic-gate case CE_DISP_POSS_STICKY:
52200Sstevel@tonic-gate /*
52210Sstevel@tonic-gate * Promote "possible sticky" results that have been
52220Sstevel@tonic-gate * confirmed by a partner test to "sticky". Unconfirmed
52230Sstevel@tonic-gate * "possible sticky" events are left at that status - we do not
52240Sstevel@tonic-gate * guess at any bad reader/writer etc status here.
52250Sstevel@tonic-gate */
52260Sstevel@tonic-gate ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
52270Sstevel@tonic-gate if (CE_XDIAG_TESTVALID(ptnrinfo) &&
52280Sstevel@tonic-gate CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
52290Sstevel@tonic-gate disp = CE_DISP_STICKY;
52300Sstevel@tonic-gate
52310Sstevel@tonic-gate /*
52320Sstevel@tonic-gate * Promote "possible sticky" results on a uniprocessor
52330Sstevel@tonic-gate * to "sticky"
52340Sstevel@tonic-gate */
52350Sstevel@tonic-gate if (disp == CE_DISP_POSS_STICKY &&
52360Sstevel@tonic-gate CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
52370Sstevel@tonic-gate disp = CE_DISP_STICKY;
52380Sstevel@tonic-gate break;
52390Sstevel@tonic-gate
52400Sstevel@tonic-gate default:
52410Sstevel@tonic-gate disp = CE_DISP_UNKNOWN;
52420Sstevel@tonic-gate break;
52430Sstevel@tonic-gate }
52440Sstevel@tonic-gate
52450Sstevel@tonic-gate return (cetypes[disp]);
52460Sstevel@tonic-gate }
52470Sstevel@tonic-gate
52480Sstevel@tonic-gate /*
52490Sstevel@tonic-gate * Given the entire afsr, the specific bit to check and a prioritized list of
52500Sstevel@tonic-gate * error bits, determine the validity of the various overwrite priority
52510Sstevel@tonic-gate * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
52520Sstevel@tonic-gate * different overwrite priorities.
52530Sstevel@tonic-gate *
52540Sstevel@tonic-gate * Given a specific afsr error bit and the entire afsr, there are three cases:
52550Sstevel@tonic-gate * INVALID: The specified bit is lower overwrite priority than some other
52560Sstevel@tonic-gate * error bit which is on in the afsr (or IVU/IVC).
52570Sstevel@tonic-gate * VALID: The specified bit is higher priority than all other error bits
52580Sstevel@tonic-gate * which are on in the afsr.
52590Sstevel@tonic-gate * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
52600Sstevel@tonic-gate * bit is on in the afsr.
52610Sstevel@tonic-gate */
52620Sstevel@tonic-gate int
afsr_to_overw_status(uint64_t afsr,uint64_t afsr_bit,uint64_t * ow_bits)52630Sstevel@tonic-gate afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
52640Sstevel@tonic-gate {
52650Sstevel@tonic-gate uint64_t afsr_ow;
52660Sstevel@tonic-gate
52670Sstevel@tonic-gate while ((afsr_ow = *ow_bits++) != 0) {
52680Sstevel@tonic-gate /*
52690Sstevel@tonic-gate * If bit is in the priority class, check to see if another
52700Sstevel@tonic-gate * bit in the same class is on => ambiguous. Otherwise,
52710Sstevel@tonic-gate * the value is valid. If the bit is not on at this priority
52720Sstevel@tonic-gate * class, but a higher priority bit is on, then the value is
52730Sstevel@tonic-gate * invalid.
52740Sstevel@tonic-gate */
52750Sstevel@tonic-gate if (afsr_ow & afsr_bit) {
52760Sstevel@tonic-gate /*
52770Sstevel@tonic-gate * If equal pri bit is on, ambiguous.
52780Sstevel@tonic-gate */
52790Sstevel@tonic-gate if (afsr & (afsr_ow & ~afsr_bit))
52800Sstevel@tonic-gate return (AFLT_STAT_AMBIGUOUS);
52810Sstevel@tonic-gate return (AFLT_STAT_VALID);
52820Sstevel@tonic-gate } else if (afsr & afsr_ow)
52830Sstevel@tonic-gate break;
52840Sstevel@tonic-gate }
52850Sstevel@tonic-gate
52860Sstevel@tonic-gate /*
52870Sstevel@tonic-gate * We didn't find a match or a higher priority bit was on. Not
52880Sstevel@tonic-gate * finding a match handles the case of invalid AFAR for IVC, IVU.
52890Sstevel@tonic-gate */
52900Sstevel@tonic-gate return (AFLT_STAT_INVALID);
52910Sstevel@tonic-gate }
52920Sstevel@tonic-gate
52930Sstevel@tonic-gate static int
afsr_to_afar_status(uint64_t afsr,uint64_t afsr_bit)52940Sstevel@tonic-gate afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
52950Sstevel@tonic-gate {
52960Sstevel@tonic-gate #if defined(SERRANO)
52970Sstevel@tonic-gate if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
52980Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
52990Sstevel@tonic-gate else
53000Sstevel@tonic-gate #endif /* SERRANO */
53010Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
53020Sstevel@tonic-gate }
53030Sstevel@tonic-gate
53040Sstevel@tonic-gate static int
afsr_to_esynd_status(uint64_t afsr,uint64_t afsr_bit)53050Sstevel@tonic-gate afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
53060Sstevel@tonic-gate {
53070Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
53080Sstevel@tonic-gate }
53090Sstevel@tonic-gate
53100Sstevel@tonic-gate static int
afsr_to_msynd_status(uint64_t afsr,uint64_t afsr_bit)53110Sstevel@tonic-gate afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
53120Sstevel@tonic-gate {
53130Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
53140Sstevel@tonic-gate }
53150Sstevel@tonic-gate
53160Sstevel@tonic-gate static int
afsr_to_synd_status(uint_t cpuid,uint64_t afsr,uint64_t afsr_bit)53170Sstevel@tonic-gate afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
53180Sstevel@tonic-gate {
53190Sstevel@tonic-gate #ifdef lint
53200Sstevel@tonic-gate cpuid = cpuid;
53210Sstevel@tonic-gate #endif
53222436Smb91622 #if defined(CHEETAH_PLUS)
53232436Smb91622 /*
53242436Smb91622 * The M_SYND overwrite policy is combined with the E_SYND overwrite
53252436Smb91622 * policy for Cheetah+ and separate for Panther CPUs.
53262436Smb91622 */
53270Sstevel@tonic-gate if (afsr_bit & C_AFSR_MSYND_ERRS) {
53282436Smb91622 if (IS_PANTHER(cpunodes[cpuid].implementation))
53292436Smb91622 return (afsr_to_msynd_status(afsr, afsr_bit));
53302436Smb91622 else
53312436Smb91622 return (afsr_to_esynd_status(afsr, afsr_bit));
53320Sstevel@tonic-gate } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
53330Sstevel@tonic-gate if (IS_PANTHER(cpunodes[cpuid].implementation))
53340Sstevel@tonic-gate return (afsr_to_pn_esynd_status(afsr, afsr_bit));
53350Sstevel@tonic-gate else
53360Sstevel@tonic-gate return (afsr_to_esynd_status(afsr, afsr_bit));
53370Sstevel@tonic-gate #else /* CHEETAH_PLUS */
53382436Smb91622 if (afsr_bit & C_AFSR_MSYND_ERRS) {
53392436Smb91622 return (afsr_to_msynd_status(afsr, afsr_bit));
53402436Smb91622 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
53410Sstevel@tonic-gate return (afsr_to_esynd_status(afsr, afsr_bit));
53420Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
53430Sstevel@tonic-gate } else {
53440Sstevel@tonic-gate return (AFLT_STAT_INVALID);
53450Sstevel@tonic-gate }
53460Sstevel@tonic-gate }
53470Sstevel@tonic-gate
53480Sstevel@tonic-gate /*
53490Sstevel@tonic-gate * Slave CPU stick synchronization.
53500Sstevel@tonic-gate */
53510Sstevel@tonic-gate void
53520Sstevel@tonic-gate sticksync_slave(void)
53530Sstevel@tonic-gate {
53540Sstevel@tonic-gate int i;
53550Sstevel@tonic-gate int tries = 0;
53560Sstevel@tonic-gate int64_t tskew;
53570Sstevel@tonic-gate int64_t av_tskew;
53580Sstevel@tonic-gate
53590Sstevel@tonic-gate kpreempt_disable();
53600Sstevel@tonic-gate /* wait for the master side */
53610Sstevel@tonic-gate while (stick_sync_cmd != SLAVE_START)
53620Sstevel@tonic-gate ;
53630Sstevel@tonic-gate /*
53640Sstevel@tonic-gate * Synchronization should only take a few tries at most. But in the
53650Sstevel@tonic-gate * odd case where the cpu isn't cooperating we'll keep trying. A cpu
53660Sstevel@tonic-gate * without it's stick synchronized wouldn't be a good citizen.
53670Sstevel@tonic-gate */
53680Sstevel@tonic-gate while (slave_done == 0) {
53690Sstevel@tonic-gate /*
53700Sstevel@tonic-gate * Time skew calculation.
53710Sstevel@tonic-gate */
53720Sstevel@tonic-gate av_tskew = tskew = 0;
53730Sstevel@tonic-gate
53740Sstevel@tonic-gate for (i = 0; i < stick_iter; i++) {
53750Sstevel@tonic-gate /* make location hot */
53760Sstevel@tonic-gate timestamp[EV_A_START] = 0;
53770Sstevel@tonic-gate stick_timestamp(×tamp[EV_A_START]);
53780Sstevel@tonic-gate
53790Sstevel@tonic-gate /* tell the master we're ready */
53800Sstevel@tonic-gate stick_sync_cmd = MASTER_START;
53810Sstevel@tonic-gate
53820Sstevel@tonic-gate /* and wait */
53830Sstevel@tonic-gate while (stick_sync_cmd != SLAVE_CONT)
53840Sstevel@tonic-gate ;
53850Sstevel@tonic-gate /* Event B end */
53860Sstevel@tonic-gate stick_timestamp(×tamp[EV_B_END]);
53870Sstevel@tonic-gate
53880Sstevel@tonic-gate /* calculate time skew */
53890Sstevel@tonic-gate tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
53905219Skm84432 - (timestamp[EV_A_END] - timestamp[EV_A_START]))
53915219Skm84432 / 2;
53920Sstevel@tonic-gate
53930Sstevel@tonic-gate /* keep running count */
53940Sstevel@tonic-gate av_tskew += tskew;
53950Sstevel@tonic-gate } /* for */
53960Sstevel@tonic-gate
53970Sstevel@tonic-gate /*
53980Sstevel@tonic-gate * Adjust stick for time skew if not within the max allowed;
53990Sstevel@tonic-gate * otherwise we're all done.
54000Sstevel@tonic-gate */
54010Sstevel@tonic-gate if (stick_iter != 0)
54020Sstevel@tonic-gate av_tskew = av_tskew/stick_iter;
54030Sstevel@tonic-gate if (ABS(av_tskew) > stick_tsk) {
54040Sstevel@tonic-gate /*
54050Sstevel@tonic-gate * If the skew is 1 (the slave's STICK register
54060Sstevel@tonic-gate * is 1 STICK ahead of the master's), stick_adj
54070Sstevel@tonic-gate * could fail to adjust the slave's STICK register
54080Sstevel@tonic-gate * if the STICK read on the slave happens to
54090Sstevel@tonic-gate * align with the increment of the STICK.
54100Sstevel@tonic-gate * Therefore, we increment the skew to 2.
54110Sstevel@tonic-gate */
54120Sstevel@tonic-gate if (av_tskew == 1)
54130Sstevel@tonic-gate av_tskew++;
54140Sstevel@tonic-gate stick_adj(-av_tskew);
54150Sstevel@tonic-gate } else
54160Sstevel@tonic-gate slave_done = 1;
54170Sstevel@tonic-gate #ifdef DEBUG
54180Sstevel@tonic-gate if (tries < DSYNC_ATTEMPTS)
54190Sstevel@tonic-gate stick_sync_stats[CPU->cpu_id].skew_val[tries] =
54205219Skm84432 av_tskew;
54210Sstevel@tonic-gate ++tries;
54220Sstevel@tonic-gate #endif /* DEBUG */
54230Sstevel@tonic-gate #ifdef lint
54240Sstevel@tonic-gate tries = tries;
54250Sstevel@tonic-gate #endif
54260Sstevel@tonic-gate
54270Sstevel@tonic-gate } /* while */
54280Sstevel@tonic-gate
54290Sstevel@tonic-gate /* allow the master to finish */
54300Sstevel@tonic-gate stick_sync_cmd = EVENT_NULL;
54310Sstevel@tonic-gate kpreempt_enable();
54320Sstevel@tonic-gate }
54330Sstevel@tonic-gate
54340Sstevel@tonic-gate /*
54350Sstevel@tonic-gate * Master CPU side of stick synchronization.
54360Sstevel@tonic-gate * - timestamp end of Event A
54370Sstevel@tonic-gate * - timestamp beginning of Event B
54380Sstevel@tonic-gate */
54390Sstevel@tonic-gate void
54400Sstevel@tonic-gate sticksync_master(void)
54410Sstevel@tonic-gate {
54420Sstevel@tonic-gate int i;
54430Sstevel@tonic-gate
54440Sstevel@tonic-gate kpreempt_disable();
54450Sstevel@tonic-gate /* tell the slave we've started */
54460Sstevel@tonic-gate slave_done = 0;
54470Sstevel@tonic-gate stick_sync_cmd = SLAVE_START;
54480Sstevel@tonic-gate
54490Sstevel@tonic-gate while (slave_done == 0) {
54500Sstevel@tonic-gate for (i = 0; i < stick_iter; i++) {
54510Sstevel@tonic-gate /* wait for the slave */
54520Sstevel@tonic-gate while (stick_sync_cmd != MASTER_START)
54530Sstevel@tonic-gate ;
54540Sstevel@tonic-gate /* Event A end */
54550Sstevel@tonic-gate stick_timestamp(×tamp[EV_A_END]);
54560Sstevel@tonic-gate
54570Sstevel@tonic-gate /* make location hot */
54580Sstevel@tonic-gate timestamp[EV_B_START] = 0;
54590Sstevel@tonic-gate stick_timestamp(×tamp[EV_B_START]);
54600Sstevel@tonic-gate
54610Sstevel@tonic-gate /* tell the slave to continue */
54620Sstevel@tonic-gate stick_sync_cmd = SLAVE_CONT;
54630Sstevel@tonic-gate } /* for */
54640Sstevel@tonic-gate
54650Sstevel@tonic-gate /* wait while slave calculates time skew */
54660Sstevel@tonic-gate while (stick_sync_cmd == SLAVE_CONT)
54670Sstevel@tonic-gate ;
54680Sstevel@tonic-gate } /* while */
54690Sstevel@tonic-gate kpreempt_enable();
54700Sstevel@tonic-gate }
54710Sstevel@tonic-gate
54720Sstevel@tonic-gate /*
54730Sstevel@tonic-gate * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
54740Sstevel@tonic-gate * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also,
54750Sstevel@tonic-gate * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
54760Sstevel@tonic-gate * panic idle.
54770Sstevel@tonic-gate */
54780Sstevel@tonic-gate /*ARGSUSED*/
54790Sstevel@tonic-gate void
54800Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt)
54810Sstevel@tonic-gate {}
54820Sstevel@tonic-gate
54830Sstevel@tonic-gate struct kmem_cache *ch_private_cache;
54840Sstevel@tonic-gate
54850Sstevel@tonic-gate /*
54860Sstevel@tonic-gate * Cpu private unitialization. Uninitialize the Ecache scrubber and
54870Sstevel@tonic-gate * deallocate the scrubber data structures and cpu_private data structure.
54880Sstevel@tonic-gate */
54890Sstevel@tonic-gate void
54900Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp)
54910Sstevel@tonic-gate {
54920Sstevel@tonic-gate cheetah_private_t *chprp = CPU_PRIVATE(cp);
54930Sstevel@tonic-gate
54940Sstevel@tonic-gate ASSERT(chprp);
54950Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(cp);
54960Sstevel@tonic-gate CPU_PRIVATE(cp) = NULL;
54970Sstevel@tonic-gate ch_err_tl1_paddrs[cp->cpu_id] = NULL;
54980Sstevel@tonic-gate kmem_cache_free(ch_private_cache, chprp);
54990Sstevel@tonic-gate cmp_delete_cpu(cp->cpu_id);
55000Sstevel@tonic-gate
55010Sstevel@tonic-gate }
55020Sstevel@tonic-gate
55030Sstevel@tonic-gate /*
55040Sstevel@tonic-gate * Cheetah Cache Scrubbing
55050Sstevel@tonic-gate *
55060Sstevel@tonic-gate * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
55070Sstevel@tonic-gate * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
55080Sstevel@tonic-gate * protected by either parity or ECC.
55090Sstevel@tonic-gate *
55100Sstevel@tonic-gate * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
55110Sstevel@tonic-gate * cache per second). Due to the the specifics of how the I$ control
55120Sstevel@tonic-gate * logic works with respect to the ASI used to scrub I$ lines, the entire
55130Sstevel@tonic-gate * I$ is scanned at once.
55140Sstevel@tonic-gate */
55150Sstevel@tonic-gate
55160Sstevel@tonic-gate /*
55170Sstevel@tonic-gate * Tuneables to enable and disable the scrubbing of the caches, and to tune
55180Sstevel@tonic-gate * scrubbing behavior. These may be changed via /etc/system or using mdb
55190Sstevel@tonic-gate * on a running system.
55200Sstevel@tonic-gate */
55210Sstevel@tonic-gate int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */
55220Sstevel@tonic-gate
55230Sstevel@tonic-gate /*
55240Sstevel@tonic-gate * The following are the PIL levels that the softints/cross traps will fire at.
55250Sstevel@tonic-gate */
55260Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */
55270Sstevel@tonic-gate uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */
55280Sstevel@tonic-gate uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */
55290Sstevel@tonic-gate
55300Sstevel@tonic-gate #if defined(JALAPENO)
55310Sstevel@tonic-gate
55320Sstevel@tonic-gate /*
55330Sstevel@tonic-gate * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
55340Sstevel@tonic-gate * on Jalapeno.
55350Sstevel@tonic-gate */
55360Sstevel@tonic-gate int ecache_scrub_enable = 0;
55370Sstevel@tonic-gate
55380Sstevel@tonic-gate #else /* JALAPENO */
55390Sstevel@tonic-gate
55400Sstevel@tonic-gate /*
55410Sstevel@tonic-gate * With all other cpu types, E$ scrubbing is on by default
55420Sstevel@tonic-gate */
55430Sstevel@tonic-gate int ecache_scrub_enable = 1;
55440Sstevel@tonic-gate
55450Sstevel@tonic-gate #endif /* JALAPENO */
55460Sstevel@tonic-gate
55470Sstevel@tonic-gate
55480Sstevel@tonic-gate #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
55490Sstevel@tonic-gate
55500Sstevel@tonic-gate /*
55510Sstevel@tonic-gate * The I$ scrubber tends to cause latency problems for real-time SW, so it
55520Sstevel@tonic-gate * is disabled by default on non-Cheetah systems
55530Sstevel@tonic-gate */
55540Sstevel@tonic-gate int icache_scrub_enable = 0;
55550Sstevel@tonic-gate
55560Sstevel@tonic-gate /*
55570Sstevel@tonic-gate * Tuneables specifying the scrub calls per second and the scan rate
55580Sstevel@tonic-gate * for each cache
55590Sstevel@tonic-gate *
55600Sstevel@tonic-gate * The cyclic times are set during boot based on the following values.
55610Sstevel@tonic-gate * Changing these values in mdb after this time will have no effect. If
55620Sstevel@tonic-gate * a different value is desired, it must be set in /etc/system before a
55630Sstevel@tonic-gate * reboot.
55640Sstevel@tonic-gate */
55650Sstevel@tonic-gate int ecache_calls_a_sec = 1;
55660Sstevel@tonic-gate int dcache_calls_a_sec = 2;
55670Sstevel@tonic-gate int icache_calls_a_sec = 2;
55680Sstevel@tonic-gate
55690Sstevel@tonic-gate int ecache_scan_rate_idle = 1;
55700Sstevel@tonic-gate int ecache_scan_rate_busy = 1;
55710Sstevel@tonic-gate int dcache_scan_rate_idle = 1;
55720Sstevel@tonic-gate int dcache_scan_rate_busy = 1;
55730Sstevel@tonic-gate int icache_scan_rate_idle = 1;
55740Sstevel@tonic-gate int icache_scan_rate_busy = 1;
55750Sstevel@tonic-gate
55760Sstevel@tonic-gate #else /* CHEETAH_PLUS || JALAPENO || SERRANO */
55770Sstevel@tonic-gate
55780Sstevel@tonic-gate int icache_scrub_enable = 1; /* I$ scrubbing is on by default */
55790Sstevel@tonic-gate
55800Sstevel@tonic-gate int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */
55810Sstevel@tonic-gate int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */
55820Sstevel@tonic-gate int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */
55830Sstevel@tonic-gate
55840Sstevel@tonic-gate int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */
55850Sstevel@tonic-gate int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */
55860Sstevel@tonic-gate int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */
55870Sstevel@tonic-gate int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */
55880Sstevel@tonic-gate int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */
55890Sstevel@tonic-gate int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */
55900Sstevel@tonic-gate
55910Sstevel@tonic-gate #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */
55920Sstevel@tonic-gate
55930Sstevel@tonic-gate /*
55940Sstevel@tonic-gate * In order to scrub on offline cpus, a cross trap is sent. The handler will
55950Sstevel@tonic-gate * increment the outstanding request counter and schedule a softint to run
55960Sstevel@tonic-gate * the scrubber.
55970Sstevel@tonic-gate */
55980Sstevel@tonic-gate extern xcfunc_t cache_scrubreq_tl1;
55990Sstevel@tonic-gate
56000Sstevel@tonic-gate /*
56010Sstevel@tonic-gate * These are the softint functions for each cache scrubber
56020Sstevel@tonic-gate */
56030Sstevel@tonic-gate static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
56040Sstevel@tonic-gate static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
56050Sstevel@tonic-gate static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
56060Sstevel@tonic-gate
56070Sstevel@tonic-gate /*
56080Sstevel@tonic-gate * The cache scrub info table contains cache specific information
56090Sstevel@tonic-gate * and allows for some of the scrub code to be table driven, reducing
56100Sstevel@tonic-gate * duplication of cache similar code.
56110Sstevel@tonic-gate *
56120Sstevel@tonic-gate * This table keeps a copy of the value in the calls per second variable
56130Sstevel@tonic-gate * (?cache_calls_a_sec). This makes it much more difficult for someone
56140Sstevel@tonic-gate * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
56150Sstevel@tonic-gate * mdb in a misguided attempt to disable the scrubber).
56160Sstevel@tonic-gate */
56170Sstevel@tonic-gate struct scrub_info {
56180Sstevel@tonic-gate int *csi_enable; /* scrubber enable flag */
56190Sstevel@tonic-gate int csi_freq; /* scrubber calls per second */
56200Sstevel@tonic-gate int csi_index; /* index to chsm_outstanding[] */
56212973Sgovinda uint64_t csi_inum; /* scrubber interrupt number */
56220Sstevel@tonic-gate cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */
56230Sstevel@tonic-gate cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */
56240Sstevel@tonic-gate char csi_name[3]; /* cache name for this scrub entry */
56250Sstevel@tonic-gate } cache_scrub_info[] = {
56260Sstevel@tonic-gate { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
56270Sstevel@tonic-gate { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
56280Sstevel@tonic-gate { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
56290Sstevel@tonic-gate };
56300Sstevel@tonic-gate
56310Sstevel@tonic-gate /*
56320Sstevel@tonic-gate * If scrubbing is enabled, increment the outstanding request counter. If it
56330Sstevel@tonic-gate * is 1 (meaning there were no previous requests outstanding), call
56340Sstevel@tonic-gate * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
56350Sstevel@tonic-gate * a self trap.
56360Sstevel@tonic-gate */
56370Sstevel@tonic-gate static void
56380Sstevel@tonic-gate do_scrub(struct scrub_info *csi)
56390Sstevel@tonic-gate {
56400Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
56410Sstevel@tonic-gate int index = csi->csi_index;
56420Sstevel@tonic-gate uint32_t *outstanding = &csmp->chsm_outstanding[index];
56430Sstevel@tonic-gate
56440Sstevel@tonic-gate if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
56450Sstevel@tonic-gate if (atomic_add_32_nv(outstanding, 1) == 1) {
56460Sstevel@tonic-gate xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
56470Sstevel@tonic-gate csi->csi_inum, 0);
56480Sstevel@tonic-gate }
56490Sstevel@tonic-gate }
56500Sstevel@tonic-gate }
56510Sstevel@tonic-gate
56520Sstevel@tonic-gate /*
56530Sstevel@tonic-gate * Omni cyclics don't fire on offline cpus, so we use another cyclic to
56540Sstevel@tonic-gate * cross-trap the offline cpus.
56550Sstevel@tonic-gate */
56560Sstevel@tonic-gate static void
56570Sstevel@tonic-gate do_scrub_offline(struct scrub_info *csi)
56580Sstevel@tonic-gate {
56590Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
56600Sstevel@tonic-gate
56610Sstevel@tonic-gate if (CPUSET_ISNULL(cpu_offline_set)) {
56620Sstevel@tonic-gate /*
56630Sstevel@tonic-gate * No offline cpus - nothing to do
56640Sstevel@tonic-gate */
56650Sstevel@tonic-gate return;
56660Sstevel@tonic-gate }
56670Sstevel@tonic-gate
56680Sstevel@tonic-gate if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
56690Sstevel@tonic-gate xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
56700Sstevel@tonic-gate csi->csi_index);
56710Sstevel@tonic-gate }
56720Sstevel@tonic-gate }
56730Sstevel@tonic-gate
56740Sstevel@tonic-gate /*
56750Sstevel@tonic-gate * This is the initial setup for the scrubber cyclics - it sets the
56760Sstevel@tonic-gate * interrupt level, frequency, and function to call.
56770Sstevel@tonic-gate */
56780Sstevel@tonic-gate /*ARGSUSED*/
56790Sstevel@tonic-gate static void
56800Sstevel@tonic-gate cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
56810Sstevel@tonic-gate cyc_time_t *when)
56820Sstevel@tonic-gate {
56830Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg;
56840Sstevel@tonic-gate
56850Sstevel@tonic-gate ASSERT(csi != NULL);
56860Sstevel@tonic-gate hdlr->cyh_func = (cyc_func_t)do_scrub;
56870Sstevel@tonic-gate hdlr->cyh_level = CY_LOW_LEVEL;
56880Sstevel@tonic-gate hdlr->cyh_arg = arg;
56890Sstevel@tonic-gate
56900Sstevel@tonic-gate when->cyt_when = 0; /* Start immediately */
56910Sstevel@tonic-gate when->cyt_interval = NANOSEC / csi->csi_freq;
56920Sstevel@tonic-gate }
56930Sstevel@tonic-gate
56940Sstevel@tonic-gate /*
56950Sstevel@tonic-gate * Initialization for cache scrubbing.
56960Sstevel@tonic-gate * This routine is called AFTER all cpus have had cpu_init_private called
56970Sstevel@tonic-gate * to initialize their private data areas.
56980Sstevel@tonic-gate */
56990Sstevel@tonic-gate void
57000Sstevel@tonic-gate cpu_init_cache_scrub(void)
57010Sstevel@tonic-gate {
57020Sstevel@tonic-gate int i;
57030Sstevel@tonic-gate struct scrub_info *csi;
57040Sstevel@tonic-gate cyc_omni_handler_t omni_hdlr;
57050Sstevel@tonic-gate cyc_handler_t offline_hdlr;
57060Sstevel@tonic-gate cyc_time_t when;
57070Sstevel@tonic-gate
57080Sstevel@tonic-gate /*
57090Sstevel@tonic-gate * save away the maximum number of lines for the D$
57100Sstevel@tonic-gate */
57110Sstevel@tonic-gate dcache_nlines = dcache_size / dcache_linesize;
57120Sstevel@tonic-gate
57130Sstevel@tonic-gate /*
57140Sstevel@tonic-gate * register the softints for the cache scrubbing
57150Sstevel@tonic-gate */
57160Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
57170Sstevel@tonic-gate add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
57182973Sgovinda (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
57190Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
57200Sstevel@tonic-gate
57210Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
57220Sstevel@tonic-gate add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
57232973Sgovinda (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
57240Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
57250Sstevel@tonic-gate
57260Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
57270Sstevel@tonic-gate add_softintr(icache_scrub_pil, scrub_icache_line_intr,
57282973Sgovinda (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
57290Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
57300Sstevel@tonic-gate
57310Sstevel@tonic-gate /*
57320Sstevel@tonic-gate * start the scrubbing for all the caches
57330Sstevel@tonic-gate */
57340Sstevel@tonic-gate mutex_enter(&cpu_lock);
57350Sstevel@tonic-gate for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
57360Sstevel@tonic-gate
57370Sstevel@tonic-gate csi = &cache_scrub_info[i];
57380Sstevel@tonic-gate
57390Sstevel@tonic-gate if (!(*csi->csi_enable))
57400Sstevel@tonic-gate continue;
57410Sstevel@tonic-gate
57420Sstevel@tonic-gate /*
57430Sstevel@tonic-gate * force the following to be true:
57440Sstevel@tonic-gate * 1 <= calls_a_sec <= hz
57450Sstevel@tonic-gate */
57460Sstevel@tonic-gate if (csi->csi_freq > hz) {
57470Sstevel@tonic-gate cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
57485219Skm84432 "(%d); resetting to hz (%d)", csi->csi_name,
57495219Skm84432 csi->csi_freq, hz);
57500Sstevel@tonic-gate csi->csi_freq = hz;
57510Sstevel@tonic-gate } else if (csi->csi_freq < 1) {
57520Sstevel@tonic-gate cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
57535219Skm84432 "(%d); resetting to 1", csi->csi_name,
57545219Skm84432 csi->csi_freq);
57550Sstevel@tonic-gate csi->csi_freq = 1;
57560Sstevel@tonic-gate }
57570Sstevel@tonic-gate
57580Sstevel@tonic-gate omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
57590Sstevel@tonic-gate omni_hdlr.cyo_offline = NULL;
57600Sstevel@tonic-gate omni_hdlr.cyo_arg = (void *)csi;
57610Sstevel@tonic-gate
57620Sstevel@tonic-gate offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
57630Sstevel@tonic-gate offline_hdlr.cyh_arg = (void *)csi;
57640Sstevel@tonic-gate offline_hdlr.cyh_level = CY_LOW_LEVEL;
57650Sstevel@tonic-gate
57660Sstevel@tonic-gate when.cyt_when = 0; /* Start immediately */
57670Sstevel@tonic-gate when.cyt_interval = NANOSEC / csi->csi_freq;
57680Sstevel@tonic-gate
57690Sstevel@tonic-gate csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
57700Sstevel@tonic-gate csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
57710Sstevel@tonic-gate }
57720Sstevel@tonic-gate register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
57730Sstevel@tonic-gate mutex_exit(&cpu_lock);
57740Sstevel@tonic-gate }
57750Sstevel@tonic-gate
57760Sstevel@tonic-gate /*
57770Sstevel@tonic-gate * Indicate that the specified cpu is idle.
57780Sstevel@tonic-gate */
57790Sstevel@tonic-gate void
57800Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp)
57810Sstevel@tonic-gate {
57820Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) {
57830Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
57840Sstevel@tonic-gate csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
57850Sstevel@tonic-gate }
57860Sstevel@tonic-gate }
57870Sstevel@tonic-gate
57880Sstevel@tonic-gate /*
57890Sstevel@tonic-gate * Indicate that the specified cpu is busy.
57900Sstevel@tonic-gate */
57910Sstevel@tonic-gate void
57920Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp)
57930Sstevel@tonic-gate {
57940Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) {
57950Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
57960Sstevel@tonic-gate csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
57970Sstevel@tonic-gate }
57980Sstevel@tonic-gate }
57990Sstevel@tonic-gate
58000Sstevel@tonic-gate /*
58010Sstevel@tonic-gate * Initialization for cache scrubbing for the specified cpu.
58020Sstevel@tonic-gate */
58030Sstevel@tonic-gate void
58040Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp)
58050Sstevel@tonic-gate {
58060Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
58070Sstevel@tonic-gate int cpuid = cp->cpu_id;
58080Sstevel@tonic-gate
58090Sstevel@tonic-gate /* initialize the number of lines in the caches */
58100Sstevel@tonic-gate csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
58110Sstevel@tonic-gate cpunodes[cpuid].ecache_linesize;
58120Sstevel@tonic-gate csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
58130Sstevel@tonic-gate CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
58140Sstevel@tonic-gate
58150Sstevel@tonic-gate /*
58160Sstevel@tonic-gate * do_scrub() and do_scrub_offline() check both the global
58170Sstevel@tonic-gate * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers
58180Sstevel@tonic-gate * check this value before scrubbing. Currently, we use it to
58190Sstevel@tonic-gate * disable the E$ scrubber on multi-core cpus or while running at
58200Sstevel@tonic-gate * slowed speed. For now, just turn everything on and allow
58210Sstevel@tonic-gate * cpu_init_private() to change it if necessary.
58220Sstevel@tonic-gate */
58230Sstevel@tonic-gate csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
58240Sstevel@tonic-gate csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
58250Sstevel@tonic-gate csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
58260Sstevel@tonic-gate
58270Sstevel@tonic-gate cpu_busy_ecache_scrub(cp);
58280Sstevel@tonic-gate }
58290Sstevel@tonic-gate
58300Sstevel@tonic-gate /*
58310Sstevel@tonic-gate * Un-initialization for cache scrubbing for the specified cpu.
58320Sstevel@tonic-gate */
58330Sstevel@tonic-gate static void
58340Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp)
58350Sstevel@tonic-gate {
58360Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
58370Sstevel@tonic-gate
58380Sstevel@tonic-gate /*
58390Sstevel@tonic-gate * un-initialize bookkeeping for cache scrubbing
58400Sstevel@tonic-gate */
58410Sstevel@tonic-gate bzero(csmp, sizeof (ch_scrub_misc_t));
58420Sstevel@tonic-gate
58430Sstevel@tonic-gate cpu_idle_ecache_scrub(cp);
58440Sstevel@tonic-gate }
58450Sstevel@tonic-gate
58460Sstevel@tonic-gate /*
58470Sstevel@tonic-gate * Called periodically on each CPU to scrub the D$.
58480Sstevel@tonic-gate */
58490Sstevel@tonic-gate static void
58500Sstevel@tonic-gate scrub_dcache(int how_many)
58510Sstevel@tonic-gate {
58520Sstevel@tonic-gate int i;
58530Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
58540Sstevel@tonic-gate int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
58550Sstevel@tonic-gate
58560Sstevel@tonic-gate /*
58570Sstevel@tonic-gate * scrub the desired number of lines
58580Sstevel@tonic-gate */
58590Sstevel@tonic-gate for (i = 0; i < how_many; i++) {
58600Sstevel@tonic-gate /*
58610Sstevel@tonic-gate * scrub a D$ line
58620Sstevel@tonic-gate */
58630Sstevel@tonic-gate dcache_inval_line(index);
58640Sstevel@tonic-gate
58650Sstevel@tonic-gate /*
58660Sstevel@tonic-gate * calculate the next D$ line to scrub, assumes
58670Sstevel@tonic-gate * that dcache_nlines is a power of 2
58680Sstevel@tonic-gate */
58690Sstevel@tonic-gate index = (index + 1) & (dcache_nlines - 1);
58700Sstevel@tonic-gate }
58710Sstevel@tonic-gate
58720Sstevel@tonic-gate /*
58730Sstevel@tonic-gate * set the scrub index for the next visit
58740Sstevel@tonic-gate */
58750Sstevel@tonic-gate csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
58760Sstevel@tonic-gate }
58770Sstevel@tonic-gate
58780Sstevel@tonic-gate /*
58790Sstevel@tonic-gate * Handler for D$ scrub inum softint. Call scrub_dcache until
58800Sstevel@tonic-gate * we decrement the outstanding request count to zero.
58810Sstevel@tonic-gate */
58820Sstevel@tonic-gate /*ARGSUSED*/
58830Sstevel@tonic-gate static uint_t
58840Sstevel@tonic-gate scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
58850Sstevel@tonic-gate {
58860Sstevel@tonic-gate int i;
58870Sstevel@tonic-gate int how_many;
58880Sstevel@tonic-gate int outstanding;
58890Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
58900Sstevel@tonic-gate uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
58910Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg1;
58920Sstevel@tonic-gate int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
58935219Skm84432 dcache_scan_rate_idle : dcache_scan_rate_busy;
58940Sstevel@tonic-gate
58950Sstevel@tonic-gate /*
58960Sstevel@tonic-gate * The scan rates are expressed in units of tenths of a
58970Sstevel@tonic-gate * percent. A scan rate of 1000 (100%) means the whole
58980Sstevel@tonic-gate * cache is scanned every second.
58990Sstevel@tonic-gate */
59000Sstevel@tonic-gate how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
59010Sstevel@tonic-gate
59020Sstevel@tonic-gate do {
59030Sstevel@tonic-gate outstanding = *countp;
59040Sstevel@tonic-gate for (i = 0; i < outstanding; i++) {
59050Sstevel@tonic-gate scrub_dcache(how_many);
59060Sstevel@tonic-gate }
59070Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding));
59080Sstevel@tonic-gate
59090Sstevel@tonic-gate return (DDI_INTR_CLAIMED);
59100Sstevel@tonic-gate }
59110Sstevel@tonic-gate
59120Sstevel@tonic-gate /*
59130Sstevel@tonic-gate * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
59140Sstevel@tonic-gate * by invalidating lines. Due to the characteristics of the ASI which
59150Sstevel@tonic-gate * is used to invalidate an I$ line, the entire I$ must be invalidated
59160Sstevel@tonic-gate * vs. an individual I$ line.
59170Sstevel@tonic-gate */
59180Sstevel@tonic-gate static void
59190Sstevel@tonic-gate scrub_icache(int how_many)
59200Sstevel@tonic-gate {
59210Sstevel@tonic-gate int i;
59220Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
59230Sstevel@tonic-gate int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
59240Sstevel@tonic-gate int icache_nlines = csmp->chsm_icache_nlines;
59250Sstevel@tonic-gate
59260Sstevel@tonic-gate /*
59270Sstevel@tonic-gate * scrub the desired number of lines
59280Sstevel@tonic-gate */
59290Sstevel@tonic-gate for (i = 0; i < how_many; i++) {
59300Sstevel@tonic-gate /*
59310Sstevel@tonic-gate * since the entire I$ must be scrubbed at once,
59320Sstevel@tonic-gate * wait until the index wraps to zero to invalidate
59330Sstevel@tonic-gate * the entire I$
59340Sstevel@tonic-gate */
59350Sstevel@tonic-gate if (index == 0) {
59360Sstevel@tonic-gate icache_inval_all();
59370Sstevel@tonic-gate }
59380Sstevel@tonic-gate
59390Sstevel@tonic-gate /*
59400Sstevel@tonic-gate * calculate the next I$ line to scrub, assumes
59410Sstevel@tonic-gate * that chsm_icache_nlines is a power of 2
59420Sstevel@tonic-gate */
59430Sstevel@tonic-gate index = (index + 1) & (icache_nlines - 1);
59440Sstevel@tonic-gate }
59450Sstevel@tonic-gate
59460Sstevel@tonic-gate /*
59470Sstevel@tonic-gate * set the scrub index for the next visit
59480Sstevel@tonic-gate */
59490Sstevel@tonic-gate csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
59500Sstevel@tonic-gate }
59510Sstevel@tonic-gate
59520Sstevel@tonic-gate /*
59530Sstevel@tonic-gate * Handler for I$ scrub inum softint. Call scrub_icache until
59540Sstevel@tonic-gate * we decrement the outstanding request count to zero.
59550Sstevel@tonic-gate */
59560Sstevel@tonic-gate /*ARGSUSED*/
59570Sstevel@tonic-gate static uint_t
59580Sstevel@tonic-gate scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
59590Sstevel@tonic-gate {
59600Sstevel@tonic-gate int i;
59610Sstevel@tonic-gate int how_many;
59620Sstevel@tonic-gate int outstanding;
59630Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
59640Sstevel@tonic-gate uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
59650Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg1;
59660Sstevel@tonic-gate int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
59670Sstevel@tonic-gate icache_scan_rate_idle : icache_scan_rate_busy;
59680Sstevel@tonic-gate int icache_nlines = csmp->chsm_icache_nlines;
59690Sstevel@tonic-gate
59700Sstevel@tonic-gate /*
59710Sstevel@tonic-gate * The scan rates are expressed in units of tenths of a
59720Sstevel@tonic-gate * percent. A scan rate of 1000 (100%) means the whole
59730Sstevel@tonic-gate * cache is scanned every second.
59740Sstevel@tonic-gate */
59750Sstevel@tonic-gate how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
59760Sstevel@tonic-gate
59770Sstevel@tonic-gate do {
59780Sstevel@tonic-gate outstanding = *countp;
59790Sstevel@tonic-gate for (i = 0; i < outstanding; i++) {
59800Sstevel@tonic-gate scrub_icache(how_many);
59810Sstevel@tonic-gate }
59820Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding));
59830Sstevel@tonic-gate
59840Sstevel@tonic-gate return (DDI_INTR_CLAIMED);
59850Sstevel@tonic-gate }
59860Sstevel@tonic-gate
59870Sstevel@tonic-gate /*
59880Sstevel@tonic-gate * Called periodically on each CPU to scrub the E$.
59890Sstevel@tonic-gate */
59900Sstevel@tonic-gate static void
59910Sstevel@tonic-gate scrub_ecache(int how_many)
59920Sstevel@tonic-gate {
59930Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
59940Sstevel@tonic-gate int i;
59950Sstevel@tonic-gate int cpuid = CPU->cpu_id;
59960Sstevel@tonic-gate int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
59970Sstevel@tonic-gate int nlines = csmp->chsm_ecache_nlines;
59980Sstevel@tonic-gate int linesize = cpunodes[cpuid].ecache_linesize;
59990Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU);
60000Sstevel@tonic-gate
60010Sstevel@tonic-gate /*
60020Sstevel@tonic-gate * scrub the desired number of lines
60030Sstevel@tonic-gate */
60040Sstevel@tonic-gate for (i = 0; i < how_many; i++) {
60050Sstevel@tonic-gate /*
60060Sstevel@tonic-gate * scrub the E$ line
60070Sstevel@tonic-gate */
60080Sstevel@tonic-gate ecache_flush_line(ecache_flushaddr + (index * linesize),
60090Sstevel@tonic-gate ec_set_size);
60100Sstevel@tonic-gate
60110Sstevel@tonic-gate /*
60120Sstevel@tonic-gate * calculate the next E$ line to scrub based on twice
60130Sstevel@tonic-gate * the number of E$ lines (to displace lines containing
60140Sstevel@tonic-gate * flush area data), assumes that the number of lines
60150Sstevel@tonic-gate * is a power of 2
60160Sstevel@tonic-gate */
60170Sstevel@tonic-gate index = (index + 1) & ((nlines << 1) - 1);
60180Sstevel@tonic-gate }
60190Sstevel@tonic-gate
60200Sstevel@tonic-gate /*
60210Sstevel@tonic-gate * set the ecache scrub index for the next visit
60220Sstevel@tonic-gate */
60230Sstevel@tonic-gate csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
60240Sstevel@tonic-gate }
60250Sstevel@tonic-gate
60260Sstevel@tonic-gate /*
60270Sstevel@tonic-gate * Handler for E$ scrub inum softint. Call the E$ scrubber until
60280Sstevel@tonic-gate * we decrement the outstanding request count to zero.
6029474Srscott *
6030474Srscott * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
6031474Srscott * become negative after the atomic_add_32_nv(). This is not a problem, as
6032474Srscott * the next trip around the loop won't scrub anything, and the next add will
6033474Srscott * reset the count back to zero.
60340Sstevel@tonic-gate */
60350Sstevel@tonic-gate /*ARGSUSED*/
60360Sstevel@tonic-gate static uint_t
60370Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
60380Sstevel@tonic-gate {
60390Sstevel@tonic-gate int i;
60400Sstevel@tonic-gate int how_many;
60410Sstevel@tonic-gate int outstanding;
60420Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
60430Sstevel@tonic-gate uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
60440Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg1;
60450Sstevel@tonic-gate int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
60465219Skm84432 ecache_scan_rate_idle : ecache_scan_rate_busy;
60470Sstevel@tonic-gate int ecache_nlines = csmp->chsm_ecache_nlines;
60480Sstevel@tonic-gate
60490Sstevel@tonic-gate /*
60500Sstevel@tonic-gate * The scan rates are expressed in units of tenths of a
60510Sstevel@tonic-gate * percent. A scan rate of 1000 (100%) means the whole
60520Sstevel@tonic-gate * cache is scanned every second.
60530Sstevel@tonic-gate */
60540Sstevel@tonic-gate how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
60550Sstevel@tonic-gate
60560Sstevel@tonic-gate do {
60570Sstevel@tonic-gate outstanding = *countp;
60580Sstevel@tonic-gate for (i = 0; i < outstanding; i++) {
60590Sstevel@tonic-gate scrub_ecache(how_many);
60600Sstevel@tonic-gate }
60610Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding));
60620Sstevel@tonic-gate
60630Sstevel@tonic-gate return (DDI_INTR_CLAIMED);
60640Sstevel@tonic-gate }
60650Sstevel@tonic-gate
60660Sstevel@tonic-gate /*
60670Sstevel@tonic-gate * Timeout function to reenable CE
60680Sstevel@tonic-gate */
60690Sstevel@tonic-gate static void
60700Sstevel@tonic-gate cpu_delayed_check_ce_errors(void *arg)
60710Sstevel@tonic-gate {
60720Sstevel@tonic-gate if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
60730Sstevel@tonic-gate TQ_NOSLEEP)) {
60740Sstevel@tonic-gate (void) timeout(cpu_delayed_check_ce_errors, arg,
60750Sstevel@tonic-gate drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
60760Sstevel@tonic-gate }
60770Sstevel@tonic-gate }
60780Sstevel@tonic-gate
60790Sstevel@tonic-gate /*
60800Sstevel@tonic-gate * CE Deferred Re-enable after trap.
60810Sstevel@tonic-gate *
60820Sstevel@tonic-gate * When the CPU gets a disrupting trap for any of the errors
60830Sstevel@tonic-gate * controlled by the CEEN bit, CEEN is disabled in the trap handler
60840Sstevel@tonic-gate * immediately. To eliminate the possibility of multiple CEs causing
60850Sstevel@tonic-gate * recursive stack overflow in the trap handler, we cannot
60860Sstevel@tonic-gate * reenable CEEN while still running in the trap handler. Instead,
60870Sstevel@tonic-gate * after a CE is logged on a CPU, we schedule a timeout function,
60880Sstevel@tonic-gate * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
60890Sstevel@tonic-gate * seconds. This function will check whether any further CEs
60900Sstevel@tonic-gate * have occurred on that CPU, and if none have, will reenable CEEN.
60910Sstevel@tonic-gate *
60920Sstevel@tonic-gate * If further CEs have occurred while CEEN is disabled, another
60930Sstevel@tonic-gate * timeout will be scheduled. This is to ensure that the CPU can
60940Sstevel@tonic-gate * make progress in the face of CE 'storms', and that it does not
60950Sstevel@tonic-gate * spend all its time logging CE errors.
60960Sstevel@tonic-gate */
60970Sstevel@tonic-gate static void
60980Sstevel@tonic-gate cpu_check_ce_errors(void *arg)
60990Sstevel@tonic-gate {
6100946Smathue int cpuid = (int)(uintptr_t)arg;
61010Sstevel@tonic-gate cpu_t *cp;
61020Sstevel@tonic-gate
61030Sstevel@tonic-gate /*
61040Sstevel@tonic-gate * We acquire cpu_lock.
61050Sstevel@tonic-gate */
61060Sstevel@tonic-gate ASSERT(curthread->t_pil == 0);
61070Sstevel@tonic-gate
61080Sstevel@tonic-gate /*
61090Sstevel@tonic-gate * verify that the cpu is still around, DR
61100Sstevel@tonic-gate * could have got there first ...
61110Sstevel@tonic-gate */
61120Sstevel@tonic-gate mutex_enter(&cpu_lock);
61130Sstevel@tonic-gate cp = cpu_get(cpuid);
61140Sstevel@tonic-gate if (cp == NULL) {
61150Sstevel@tonic-gate mutex_exit(&cpu_lock);
61160Sstevel@tonic-gate return;
61170Sstevel@tonic-gate }
61180Sstevel@tonic-gate /*
61190Sstevel@tonic-gate * make sure we don't migrate across CPUs
61200Sstevel@tonic-gate * while checking our CE status.
61210Sstevel@tonic-gate */
61220Sstevel@tonic-gate kpreempt_disable();
61230Sstevel@tonic-gate
61240Sstevel@tonic-gate /*
61250Sstevel@tonic-gate * If we are running on the CPU that got the
61260Sstevel@tonic-gate * CE, we can do the checks directly.
61270Sstevel@tonic-gate */
61280Sstevel@tonic-gate if (cp->cpu_id == CPU->cpu_id) {
61290Sstevel@tonic-gate mutex_exit(&cpu_lock);
61300Sstevel@tonic-gate cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
61310Sstevel@tonic-gate kpreempt_enable();
61320Sstevel@tonic-gate return;
61330Sstevel@tonic-gate }
61340Sstevel@tonic-gate kpreempt_enable();
61350Sstevel@tonic-gate
61360Sstevel@tonic-gate /*
61370Sstevel@tonic-gate * send an x-call to get the CPU that originally
61380Sstevel@tonic-gate * got the CE to do the necessary checks. If we can't
61390Sstevel@tonic-gate * send the x-call, reschedule the timeout, otherwise we
61400Sstevel@tonic-gate * lose CEEN forever on that CPU.
61410Sstevel@tonic-gate */
61420Sstevel@tonic-gate if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
61430Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
61440Sstevel@tonic-gate TIMEOUT_CEEN_CHECK, 0);
61450Sstevel@tonic-gate mutex_exit(&cpu_lock);
61460Sstevel@tonic-gate } else {
61470Sstevel@tonic-gate /*
61480Sstevel@tonic-gate * When the CPU is not accepting xcalls, or
61490Sstevel@tonic-gate * the processor is offlined, we don't want to
61500Sstevel@tonic-gate * incur the extra overhead of trying to schedule the
61510Sstevel@tonic-gate * CE timeout indefinitely. However, we don't want to lose
61520Sstevel@tonic-gate * CE checking forever.
61530Sstevel@tonic-gate *
61540Sstevel@tonic-gate * Keep rescheduling the timeout, accepting the additional
61550Sstevel@tonic-gate * overhead as the cost of correctness in the case where we get
61560Sstevel@tonic-gate * a CE, disable CEEN, offline the CPU during the
61570Sstevel@tonic-gate * the timeout interval, and then online it at some
61580Sstevel@tonic-gate * point in the future. This is unlikely given the short
61590Sstevel@tonic-gate * cpu_ceen_delay_secs.
61600Sstevel@tonic-gate */
61610Sstevel@tonic-gate mutex_exit(&cpu_lock);
6162946Smathue (void) timeout(cpu_delayed_check_ce_errors,
6163946Smathue (void *)(uintptr_t)cp->cpu_id,
61640Sstevel@tonic-gate drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
61650Sstevel@tonic-gate }
61660Sstevel@tonic-gate }
61670Sstevel@tonic-gate
61680Sstevel@tonic-gate /*
61690Sstevel@tonic-gate * This routine will check whether CEs have occurred while
61700Sstevel@tonic-gate * CEEN is disabled. Any CEs detected will be logged and, if
61710Sstevel@tonic-gate * possible, scrubbed.
61720Sstevel@tonic-gate *
61730Sstevel@tonic-gate * The memscrubber will also use this routine to clear any errors
61740Sstevel@tonic-gate * caused by its scrubbing with CEEN disabled.
61750Sstevel@tonic-gate *
61760Sstevel@tonic-gate * flag == SCRUBBER_CEEN_CHECK
61770Sstevel@tonic-gate * called from memscrubber, just check/scrub, no reset
61780Sstevel@tonic-gate * paddr physical addr. for start of scrub pages
61790Sstevel@tonic-gate * vaddr virtual addr. for scrub area
61800Sstevel@tonic-gate * psz page size of area to be scrubbed
61810Sstevel@tonic-gate *
61820Sstevel@tonic-gate * flag == TIMEOUT_CEEN_CHECK
61830Sstevel@tonic-gate * timeout function has triggered, reset timeout or CEEN
61840Sstevel@tonic-gate *
61850Sstevel@tonic-gate * Note: We must not migrate cpus during this function. This can be
61860Sstevel@tonic-gate * achieved by one of:
61870Sstevel@tonic-gate * - invoking as target of an x-call in which case we're at XCALL_PIL
61880Sstevel@tonic-gate * The flag value must be first xcall argument.
61890Sstevel@tonic-gate * - disabling kernel preemption. This should be done for very short
61900Sstevel@tonic-gate * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
61910Sstevel@tonic-gate * scrub an extended area with cpu_check_block. The call for
61920Sstevel@tonic-gate * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
61930Sstevel@tonic-gate * brief for this case.
61940Sstevel@tonic-gate * - binding to a cpu, eg with thread_affinity_set(). This is used
61950Sstevel@tonic-gate * in the SCRUBBER_CEEN_CHECK case, but is not practical for
61960Sstevel@tonic-gate * the TIMEOUT_CEEN_CHECK because both need cpu_lock.
61970Sstevel@tonic-gate */
61980Sstevel@tonic-gate void
61990Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
62000Sstevel@tonic-gate {
62010Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
62020Sstevel@tonic-gate uint64_t ec_err_enable;
62030Sstevel@tonic-gate uint64_t page_offset;
62040Sstevel@tonic-gate
62050Sstevel@tonic-gate /* Read AFSR */
62060Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
62070Sstevel@tonic-gate
62080Sstevel@tonic-gate /*
62090Sstevel@tonic-gate * If no CEEN errors have occurred during the timeout
62100Sstevel@tonic-gate * interval, it is safe to re-enable CEEN and exit.
62110Sstevel@tonic-gate */
62125219Skm84432 if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
62135219Skm84432 (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
62140Sstevel@tonic-gate if (flag == TIMEOUT_CEEN_CHECK &&
62150Sstevel@tonic-gate !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
62160Sstevel@tonic-gate set_error_enable(ec_err_enable | EN_REG_CEEN);
62170Sstevel@tonic-gate return;
62180Sstevel@tonic-gate }
62190Sstevel@tonic-gate
62200Sstevel@tonic-gate /*
62210Sstevel@tonic-gate * Ensure that CEEN was not reenabled (maybe by DR) before
62220Sstevel@tonic-gate * we log/clear the error.
62230Sstevel@tonic-gate */
62240Sstevel@tonic-gate if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
62255219Skm84432 set_error_enable(ec_err_enable & ~EN_REG_CEEN);
62260Sstevel@tonic-gate
62270Sstevel@tonic-gate /*
62280Sstevel@tonic-gate * log/clear the CE. If CE_CEEN_DEFER is passed, the
62290Sstevel@tonic-gate * timeout will be rescheduled when the error is logged.
62300Sstevel@tonic-gate */
62315219Skm84432 if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
62325219Skm84432 (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
62335219Skm84432 cpu_ce_detected(&cpu_error_regs,
62345219Skm84432 CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
62350Sstevel@tonic-gate else
62365219Skm84432 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
62370Sstevel@tonic-gate
62380Sstevel@tonic-gate /*
62390Sstevel@tonic-gate * If the memory scrubber runs while CEEN is
62400Sstevel@tonic-gate * disabled, (or if CEEN is disabled during the
62410Sstevel@tonic-gate * scrub as a result of a CE being triggered by
62420Sstevel@tonic-gate * it), the range being scrubbed will not be
62430Sstevel@tonic-gate * completely cleaned. If there are multiple CEs
62440Sstevel@tonic-gate * in the range at most two of these will be dealt
62450Sstevel@tonic-gate * with, (one by the trap handler and one by the
62460Sstevel@tonic-gate * timeout). It is also possible that none are dealt
62470Sstevel@tonic-gate * with, (CEEN disabled and another CE occurs before
62480Sstevel@tonic-gate * the timeout triggers). So to ensure that the
62490Sstevel@tonic-gate * memory is actually scrubbed, we have to access each
62500Sstevel@tonic-gate * memory location in the range and then check whether
62510Sstevel@tonic-gate * that access causes a CE.
62520Sstevel@tonic-gate */
62530Sstevel@tonic-gate if (flag == SCRUBBER_CEEN_CHECK && va) {
62540Sstevel@tonic-gate if ((cpu_error_regs.afar >= pa) &&
62550Sstevel@tonic-gate (cpu_error_regs.afar < (pa + psz))) {
62560Sstevel@tonic-gate /*
62570Sstevel@tonic-gate * Force a load from physical memory for each
62580Sstevel@tonic-gate * 64-byte block, then check AFSR to determine
62590Sstevel@tonic-gate * whether this access caused an error.
62600Sstevel@tonic-gate *
62610Sstevel@tonic-gate * This is a slow way to do a scrub, but as it will
62620Sstevel@tonic-gate * only be invoked when the memory scrubber actually
62630Sstevel@tonic-gate * triggered a CE, it should not happen too
62640Sstevel@tonic-gate * frequently.
62650Sstevel@tonic-gate *
62660Sstevel@tonic-gate * cut down what we need to check as the scrubber
62670Sstevel@tonic-gate * has verified up to AFAR, so get it's offset
62680Sstevel@tonic-gate * into the page and start there.
62690Sstevel@tonic-gate */
62700Sstevel@tonic-gate page_offset = (uint64_t)(cpu_error_regs.afar &
62710Sstevel@tonic-gate (psz - 1));
62720Sstevel@tonic-gate va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
62730Sstevel@tonic-gate psz -= (uint_t)(P2ALIGN(page_offset, 64));
62740Sstevel@tonic-gate cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
62750Sstevel@tonic-gate psz);
62760Sstevel@tonic-gate }
62770Sstevel@tonic-gate }
62780Sstevel@tonic-gate
62790Sstevel@tonic-gate /*
62800Sstevel@tonic-gate * Reset error enable if this CE is not masked.
62810Sstevel@tonic-gate */
62820Sstevel@tonic-gate if ((flag == TIMEOUT_CEEN_CHECK) &&
62830Sstevel@tonic-gate (cpu_error_regs.afsr & cpu_ce_not_deferred))
62845219Skm84432 set_error_enable(ec_err_enable | EN_REG_CEEN);
62850Sstevel@tonic-gate
62860Sstevel@tonic-gate }
62870Sstevel@tonic-gate
62880Sstevel@tonic-gate /*
62890Sstevel@tonic-gate * Attempt a cpu logout for an error that we did not trap for, such
62900Sstevel@tonic-gate * as a CE noticed with CEEN off. It is assumed that we are still running
62910Sstevel@tonic-gate * on the cpu that took the error and that we cannot migrate. Returns
62920Sstevel@tonic-gate * 0 on success, otherwise nonzero.
62930Sstevel@tonic-gate */
62940Sstevel@tonic-gate static int
62950Sstevel@tonic-gate cpu_ce_delayed_ec_logout(uint64_t afar)
62960Sstevel@tonic-gate {
62970Sstevel@tonic-gate ch_cpu_logout_t *clop;
62980Sstevel@tonic-gate
62990Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL)
63000Sstevel@tonic-gate return (0);
63010Sstevel@tonic-gate
63020Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
63030Sstevel@tonic-gate if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
63040Sstevel@tonic-gate LOGOUT_INVALID)
63050Sstevel@tonic-gate return (0);
63060Sstevel@tonic-gate
63070Sstevel@tonic-gate cpu_delayed_logout(afar, clop);
63080Sstevel@tonic-gate return (1);
63090Sstevel@tonic-gate }
63100Sstevel@tonic-gate
63110Sstevel@tonic-gate /*
63120Sstevel@tonic-gate * We got an error while CEEN was disabled. We
63130Sstevel@tonic-gate * need to clean up after it and log whatever
63140Sstevel@tonic-gate * information we have on the CE.
63150Sstevel@tonic-gate */
63160Sstevel@tonic-gate void
63170Sstevel@tonic-gate cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
63180Sstevel@tonic-gate {
63190Sstevel@tonic-gate ch_async_flt_t ch_flt;
63200Sstevel@tonic-gate struct async_flt *aflt;
63210Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING];
63220Sstevel@tonic-gate
63230Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
63240Sstevel@tonic-gate ch_flt.flt_trapped_ce = flag;
63250Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt;
63260Sstevel@tonic-gate aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
63270Sstevel@tonic-gate ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
63280Sstevel@tonic-gate ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
63290Sstevel@tonic-gate (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
63300Sstevel@tonic-gate aflt->flt_addr = cpu_error_regs->afar;
63310Sstevel@tonic-gate #if defined(SERRANO)
63320Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs->afar2;
63330Sstevel@tonic-gate #endif /* SERRANO */
63340Sstevel@tonic-gate aflt->flt_pc = NULL;
63350Sstevel@tonic-gate aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
63360Sstevel@tonic-gate aflt->flt_tl = 0;
63370Sstevel@tonic-gate aflt->flt_panic = 0;
63380Sstevel@tonic-gate cpu_log_and_clear_ce(&ch_flt);
63390Sstevel@tonic-gate
63400Sstevel@tonic-gate /*
63410Sstevel@tonic-gate * check if we caused any errors during cleanup
63420Sstevel@tonic-gate */
63430Sstevel@tonic-gate if (clear_errors(&ch_flt)) {
63440Sstevel@tonic-gate pr_reason[0] = '\0';
63450Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
63460Sstevel@tonic-gate NULL);
63470Sstevel@tonic-gate }
63480Sstevel@tonic-gate }
63490Sstevel@tonic-gate
63500Sstevel@tonic-gate /*
63510Sstevel@tonic-gate * Log/clear CEEN-controlled disrupting errors
63520Sstevel@tonic-gate */
63530Sstevel@tonic-gate static void
63540Sstevel@tonic-gate cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
63550Sstevel@tonic-gate {
63560Sstevel@tonic-gate struct async_flt *aflt;
63570Sstevel@tonic-gate uint64_t afsr, afsr_errs;
63580Sstevel@tonic-gate ch_cpu_logout_t *clop;
63590Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING];
63600Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap;
63610Sstevel@tonic-gate
63620Sstevel@tonic-gate aflt = (struct async_flt *)ch_flt;
63630Sstevel@tonic-gate afsr = aflt->flt_stat;
63640Sstevel@tonic-gate afsr_errs = ch_flt->afsr_errs;
63650Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree();
63660Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
63670Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id;
63680Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
63690Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
63700Sstevel@tonic-gate aflt->flt_status = ECC_C_TRAP;
63710Sstevel@tonic-gate
63720Sstevel@tonic-gate pr_reason[0] = '\0';
63730Sstevel@tonic-gate /*
63740Sstevel@tonic-gate * Get the CPU log out info for Disrupting Trap.
63750Sstevel@tonic-gate */
63760Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) {
63770Sstevel@tonic-gate clop = NULL;
63780Sstevel@tonic-gate ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
63790Sstevel@tonic-gate } else {
63800Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
63810Sstevel@tonic-gate }
63820Sstevel@tonic-gate
63830Sstevel@tonic-gate if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
63840Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
63850Sstevel@tonic-gate
63860Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs);
63870Sstevel@tonic-gate (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
63880Sstevel@tonic-gate clop->clo_data.chd_afsr = cpu_error_regs.afsr;
63890Sstevel@tonic-gate clop->clo_data.chd_afar = cpu_error_regs.afar;
63900Sstevel@tonic-gate clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
63910Sstevel@tonic-gate clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
63920Sstevel@tonic-gate clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
63930Sstevel@tonic-gate clop->clo_sdw_data.chd_afsr_ext =
63940Sstevel@tonic-gate cpu_error_regs.shadow_afsr_ext;
63950Sstevel@tonic-gate #if defined(SERRANO)
63960Sstevel@tonic-gate clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
63970Sstevel@tonic-gate #endif /* SERRANO */
63980Sstevel@tonic-gate ch_flt->flt_data_incomplete = 1;
63990Sstevel@tonic-gate
64000Sstevel@tonic-gate /*
64010Sstevel@tonic-gate * The logging/clear code expects AFSR/AFAR to be cleared.
64020Sstevel@tonic-gate * The trap handler does it for CEEN enabled errors
64030Sstevel@tonic-gate * so we need to do it here.
64040Sstevel@tonic-gate */
64050Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs);
64060Sstevel@tonic-gate }
64070Sstevel@tonic-gate
64080Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
64090Sstevel@tonic-gate /*
64100Sstevel@tonic-gate * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
64110Sstevel@tonic-gate * For Serrano, even thou we do have the AFAR, we still do the
64120Sstevel@tonic-gate * scrub on the RCE side since that's where the error type can
64130Sstevel@tonic-gate * be properly classified as intermittent, persistent, etc.
64140Sstevel@tonic-gate *
64150Sstevel@tonic-gate * CE/RCE: If error is in memory and AFAR is valid, scrub the memory.
64160Sstevel@tonic-gate * Must scrub memory before cpu_queue_events, as scrubbing memory sets
64170Sstevel@tonic-gate * the flt_status bits.
64180Sstevel@tonic-gate */
64190Sstevel@tonic-gate if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
64200Sstevel@tonic-gate (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
64210Sstevel@tonic-gate cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
64220Sstevel@tonic-gate cpu_ce_scrub_mem_err(aflt, B_TRUE);
64230Sstevel@tonic-gate }
64240Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
64250Sstevel@tonic-gate /*
64260Sstevel@tonic-gate * CE/EMC: If error is in memory and AFAR is valid, scrub the memory.
64270Sstevel@tonic-gate * Must scrub memory before cpu_queue_events, as scrubbing memory sets
64280Sstevel@tonic-gate * the flt_status bits.
64290Sstevel@tonic-gate */
64300Sstevel@tonic-gate if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
64310Sstevel@tonic-gate if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
64320Sstevel@tonic-gate cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
64330Sstevel@tonic-gate cpu_ce_scrub_mem_err(aflt, B_TRUE);
64340Sstevel@tonic-gate }
64350Sstevel@tonic-gate }
64360Sstevel@tonic-gate
64370Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
64380Sstevel@tonic-gate
64390Sstevel@tonic-gate /*
64400Sstevel@tonic-gate * Update flt_prot if this error occurred under on_trap protection.
64410Sstevel@tonic-gate */
64420Sstevel@tonic-gate if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
64430Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC;
64440Sstevel@tonic-gate
64450Sstevel@tonic-gate /*
64460Sstevel@tonic-gate * Queue events on the async event queue, one event per error bit.
64470Sstevel@tonic-gate */
64480Sstevel@tonic-gate if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
64490Sstevel@tonic-gate (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
64500Sstevel@tonic-gate ch_flt->flt_type = CPU_INV_AFSR;
64510Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
64520Sstevel@tonic-gate (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
64530Sstevel@tonic-gate aflt->flt_panic);
64540Sstevel@tonic-gate }
64550Sstevel@tonic-gate
64560Sstevel@tonic-gate /*
64570Sstevel@tonic-gate * Zero out + invalidate CPU logout.
64580Sstevel@tonic-gate */
64590Sstevel@tonic-gate if (clop) {
64600Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t));
64610Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID;
64620Sstevel@tonic-gate }
64630Sstevel@tonic-gate
64640Sstevel@tonic-gate /*
64650Sstevel@tonic-gate * If either a CPC, WDC or EDC error has occurred while CEEN
64660Sstevel@tonic-gate * was disabled, we need to flush either the entire
64670Sstevel@tonic-gate * E$ or an E$ line.
64680Sstevel@tonic-gate */
64690Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
64700Sstevel@tonic-gate if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
64710Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
64720Sstevel@tonic-gate if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
64730Sstevel@tonic-gate C_AFSR_L3_CPC | C_AFSR_L3_WDC))
64740Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
64750Sstevel@tonic-gate cpu_error_ecache_flush(ch_flt);
64760Sstevel@tonic-gate
64770Sstevel@tonic-gate }
64780Sstevel@tonic-gate
64790Sstevel@tonic-gate /*
64800Sstevel@tonic-gate * depending on the error type, we determine whether we
64810Sstevel@tonic-gate * need to flush the entire ecache or just a line.
64820Sstevel@tonic-gate */
64830Sstevel@tonic-gate static int
64840Sstevel@tonic-gate cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
64850Sstevel@tonic-gate {
64860Sstevel@tonic-gate struct async_flt *aflt;
64870Sstevel@tonic-gate uint64_t afsr;
64880Sstevel@tonic-gate uint64_t afsr_errs = ch_flt->afsr_errs;
64890Sstevel@tonic-gate
64900Sstevel@tonic-gate aflt = (struct async_flt *)ch_flt;
64910Sstevel@tonic-gate afsr = aflt->flt_stat;
64920Sstevel@tonic-gate
64930Sstevel@tonic-gate /*
64940Sstevel@tonic-gate * If we got multiple errors, no point in trying
64950Sstevel@tonic-gate * the individual cases, just flush the whole cache
64960Sstevel@tonic-gate */
64970Sstevel@tonic-gate if (afsr & C_AFSR_ME) {
64980Sstevel@tonic-gate return (ECACHE_FLUSH_ALL);
64990Sstevel@tonic-gate }
65000Sstevel@tonic-gate
65010Sstevel@tonic-gate /*
65020Sstevel@tonic-gate * If either a CPC, WDC or EDC error has occurred while CEEN
65030Sstevel@tonic-gate * was disabled, we need to flush entire E$. We can't just
65040Sstevel@tonic-gate * flush the cache line affected as the ME bit
65050Sstevel@tonic-gate * is not set when multiple correctable errors of the same
65060Sstevel@tonic-gate * type occur, so we might have multiple CPC or EDC errors,
65070Sstevel@tonic-gate * with only the first recorded.
65080Sstevel@tonic-gate */
65090Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
65100Sstevel@tonic-gate if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
65110Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
65120Sstevel@tonic-gate if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
65130Sstevel@tonic-gate C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
65140Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
65150Sstevel@tonic-gate return (ECACHE_FLUSH_ALL);
65160Sstevel@tonic-gate }
65170Sstevel@tonic-gate
65180Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
65190Sstevel@tonic-gate /*
65200Sstevel@tonic-gate * If only UE or RUE is set, flush the Ecache line, otherwise
65210Sstevel@tonic-gate * flush the entire Ecache.
65220Sstevel@tonic-gate */
65230Sstevel@tonic-gate if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
65240Sstevel@tonic-gate if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
65250Sstevel@tonic-gate (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
65260Sstevel@tonic-gate return (ECACHE_FLUSH_LINE);
65270Sstevel@tonic-gate } else {
65280Sstevel@tonic-gate return (ECACHE_FLUSH_ALL);
65290Sstevel@tonic-gate }
65300Sstevel@tonic-gate }
65310Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
65320Sstevel@tonic-gate /*
65330Sstevel@tonic-gate * If UE only is set, flush the Ecache line, otherwise
65340Sstevel@tonic-gate * flush the entire Ecache.
65350Sstevel@tonic-gate */
65360Sstevel@tonic-gate if (afsr_errs & C_AFSR_UE) {
65370Sstevel@tonic-gate if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
65380Sstevel@tonic-gate C_AFSR_UE) {
65390Sstevel@tonic-gate return (ECACHE_FLUSH_LINE);
65400Sstevel@tonic-gate } else {
65410Sstevel@tonic-gate return (ECACHE_FLUSH_ALL);
65420Sstevel@tonic-gate }
65430Sstevel@tonic-gate }
65440Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
65450Sstevel@tonic-gate
65460Sstevel@tonic-gate /*
65470Sstevel@tonic-gate * EDU: If EDU only is set, flush the ecache line, otherwise
65480Sstevel@tonic-gate * flush the entire Ecache.
65490Sstevel@tonic-gate */
65500Sstevel@tonic-gate if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
65510Sstevel@tonic-gate if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
65520Sstevel@tonic-gate ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
65530Sstevel@tonic-gate return (ECACHE_FLUSH_LINE);
65540Sstevel@tonic-gate } else {
65550Sstevel@tonic-gate return (ECACHE_FLUSH_ALL);
65560Sstevel@tonic-gate }
65570Sstevel@tonic-gate }
65580Sstevel@tonic-gate
65590Sstevel@tonic-gate /*
65600Sstevel@tonic-gate * BERR: If BERR only is set, flush the Ecache line, otherwise
65610Sstevel@tonic-gate * flush the entire Ecache.
65620Sstevel@tonic-gate */
65630Sstevel@tonic-gate if (afsr_errs & C_AFSR_BERR) {
65640Sstevel@tonic-gate if ((afsr_errs & ~C_AFSR_BERR) == 0) {
65650Sstevel@tonic-gate return (ECACHE_FLUSH_LINE);
65660Sstevel@tonic-gate } else {
65670Sstevel@tonic-gate return (ECACHE_FLUSH_ALL);
65680Sstevel@tonic-gate }
65690Sstevel@tonic-gate }
65700Sstevel@tonic-gate
65710Sstevel@tonic-gate return (0);
65720Sstevel@tonic-gate }
65730Sstevel@tonic-gate
65740Sstevel@tonic-gate void
65750Sstevel@tonic-gate cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
65760Sstevel@tonic-gate {
65770Sstevel@tonic-gate int ecache_flush_flag =
65780Sstevel@tonic-gate cpu_error_ecache_flush_required(ch_flt);
65790Sstevel@tonic-gate
65800Sstevel@tonic-gate /*
65810Sstevel@tonic-gate * Flush Ecache line or entire Ecache based on above checks.
65820Sstevel@tonic-gate */
65830Sstevel@tonic-gate if (ecache_flush_flag == ECACHE_FLUSH_ALL)
65840Sstevel@tonic-gate cpu_flush_ecache();
65850Sstevel@tonic-gate else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
65860Sstevel@tonic-gate cpu_flush_ecache_line(ch_flt);
65870Sstevel@tonic-gate }
65880Sstevel@tonic-gate
65890Sstevel@tonic-gate }
65900Sstevel@tonic-gate
65910Sstevel@tonic-gate /*
65920Sstevel@tonic-gate * Extract the PA portion from the E$ tag.
65930Sstevel@tonic-gate */
65940Sstevel@tonic-gate uint64_t
65950Sstevel@tonic-gate cpu_ectag_to_pa(int setsize, uint64_t tag)
65960Sstevel@tonic-gate {
65970Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
65980Sstevel@tonic-gate return (JG_ECTAG_TO_PA(setsize, tag));
65990Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
66000Sstevel@tonic-gate return (PN_L3TAG_TO_PA(tag));
66010Sstevel@tonic-gate else
66020Sstevel@tonic-gate return (CH_ECTAG_TO_PA(setsize, tag));
66030Sstevel@tonic-gate }
66040Sstevel@tonic-gate
66050Sstevel@tonic-gate /*
66060Sstevel@tonic-gate * Convert the E$ tag PA into an E$ subblock index.
66070Sstevel@tonic-gate */
66086330Sjc25722 int
66090Sstevel@tonic-gate cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
66100Sstevel@tonic-gate {
66110Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
66120Sstevel@tonic-gate return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
66130Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
66140Sstevel@tonic-gate /* Panther has only one subblock per line */
66150Sstevel@tonic-gate return (0);
66160Sstevel@tonic-gate else
66170Sstevel@tonic-gate return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
66180Sstevel@tonic-gate }
66190Sstevel@tonic-gate
66200Sstevel@tonic-gate /*
66210Sstevel@tonic-gate * All subblocks in an E$ line must be invalid for
66220Sstevel@tonic-gate * the line to be invalid.
66230Sstevel@tonic-gate */
66240Sstevel@tonic-gate int
66250Sstevel@tonic-gate cpu_ectag_line_invalid(int cachesize, uint64_t tag)
66260Sstevel@tonic-gate {
66270Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
66280Sstevel@tonic-gate return (JG_ECTAG_LINE_INVALID(cachesize, tag));
66290Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
66300Sstevel@tonic-gate return (PN_L3_LINE_INVALID(tag));
66310Sstevel@tonic-gate else
66320Sstevel@tonic-gate return (CH_ECTAG_LINE_INVALID(cachesize, tag));
66330Sstevel@tonic-gate }
66340Sstevel@tonic-gate
66350Sstevel@tonic-gate /*
66360Sstevel@tonic-gate * Extract state bits for a subblock given the tag. Note that for Panther
66370Sstevel@tonic-gate * this works on both l2 and l3 tags.
66380Sstevel@tonic-gate */
66396330Sjc25722 int
66400Sstevel@tonic-gate cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
66410Sstevel@tonic-gate {
66420Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
66430Sstevel@tonic-gate return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
66440Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
66450Sstevel@tonic-gate return (tag & CH_ECSTATE_MASK);
66460Sstevel@tonic-gate else
66470Sstevel@tonic-gate return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
66480Sstevel@tonic-gate }
66490Sstevel@tonic-gate
66500Sstevel@tonic-gate /*
66510Sstevel@tonic-gate * Cpu specific initialization.
66520Sstevel@tonic-gate */
66530Sstevel@tonic-gate void
66540Sstevel@tonic-gate cpu_mp_init(void)
66550Sstevel@tonic-gate {
66560Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
66570Sstevel@tonic-gate if (cheetah_sendmondo_recover) {
66580Sstevel@tonic-gate cheetah_nudge_init();
66590Sstevel@tonic-gate }
66600Sstevel@tonic-gate #endif
66610Sstevel@tonic-gate }
66620Sstevel@tonic-gate
66630Sstevel@tonic-gate void
66640Sstevel@tonic-gate cpu_ereport_post(struct async_flt *aflt)
66650Sstevel@tonic-gate {
66660Sstevel@tonic-gate char *cpu_type, buf[FM_MAX_CLASS];
66670Sstevel@tonic-gate nv_alloc_t *nva = NULL;
66680Sstevel@tonic-gate nvlist_t *ereport, *detector, *resource;
66690Sstevel@tonic-gate errorq_elem_t *eqep;
66700Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
66710Sstevel@tonic-gate char unum[UNUM_NAMLEN];
66722436Smb91622 int synd_code;
66732381Smikechr uint8_t msg_type;
66740Sstevel@tonic-gate plat_ecc_ch_async_flt_t plat_ecc_ch_flt;
66750Sstevel@tonic-gate
66760Sstevel@tonic-gate if (aflt->flt_panic || panicstr) {
66770Sstevel@tonic-gate eqep = errorq_reserve(ereport_errorq);
66780Sstevel@tonic-gate if (eqep == NULL)
66790Sstevel@tonic-gate return;
66800Sstevel@tonic-gate ereport = errorq_elem_nvl(ereport_errorq, eqep);
66810Sstevel@tonic-gate nva = errorq_elem_nva(ereport_errorq, eqep);
66820Sstevel@tonic-gate } else {
66830Sstevel@tonic-gate ereport = fm_nvlist_create(nva);
66840Sstevel@tonic-gate }
66850Sstevel@tonic-gate
66860Sstevel@tonic-gate /*
66870Sstevel@tonic-gate * Create the scheme "cpu" FMRI.
66880Sstevel@tonic-gate */
66890Sstevel@tonic-gate detector = fm_nvlist_create(nva);
66900Sstevel@tonic-gate resource = fm_nvlist_create(nva);
66910Sstevel@tonic-gate switch (cpunodes[aflt->flt_inst].implementation) {
66920Sstevel@tonic-gate case CHEETAH_IMPL:
66930Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIII;
66940Sstevel@tonic-gate break;
66950Sstevel@tonic-gate case CHEETAH_PLUS_IMPL:
66960Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIIIplus;
66970Sstevel@tonic-gate break;
66980Sstevel@tonic-gate case JALAPENO_IMPL:
66990Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIIIi;
67000Sstevel@tonic-gate break;
67010Sstevel@tonic-gate case SERRANO_IMPL:
67020Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIIIiplus;
67030Sstevel@tonic-gate break;
67040Sstevel@tonic-gate case JAGUAR_IMPL:
67050Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIV;
67060Sstevel@tonic-gate break;
67070Sstevel@tonic-gate case PANTHER_IMPL:
67080Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIVplus;
67090Sstevel@tonic-gate break;
67100Sstevel@tonic-gate default:
67110Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
67120Sstevel@tonic-gate break;
67130Sstevel@tonic-gate }
67142381Smikechr
67152381Smikechr cpu_fmri_cpu_set(detector, aflt->flt_inst);
67160Sstevel@tonic-gate
67170Sstevel@tonic-gate /*
67180Sstevel@tonic-gate * Encode all the common data into the ereport.
67190Sstevel@tonic-gate */
67200Sstevel@tonic-gate (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
67215219Skm84432 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
67220Sstevel@tonic-gate
67230Sstevel@tonic-gate fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
67240Sstevel@tonic-gate fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
67250Sstevel@tonic-gate detector, NULL);
67260Sstevel@tonic-gate
67270Sstevel@tonic-gate /*
67280Sstevel@tonic-gate * Encode the error specific data that was saved in
67290Sstevel@tonic-gate * the async_flt structure into the ereport.
67300Sstevel@tonic-gate */
67310Sstevel@tonic-gate cpu_payload_add_aflt(aflt, ereport, resource,
67320Sstevel@tonic-gate &plat_ecc_ch_flt.ecaf_afar_status,
67330Sstevel@tonic-gate &plat_ecc_ch_flt.ecaf_synd_status);
67340Sstevel@tonic-gate
67350Sstevel@tonic-gate if (aflt->flt_panic || panicstr) {
67360Sstevel@tonic-gate errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
67370Sstevel@tonic-gate } else {
67380Sstevel@tonic-gate (void) fm_ereport_post(ereport, EVCH_TRYHARD);
67390Sstevel@tonic-gate fm_nvlist_destroy(ereport, FM_NVA_FREE);
67400Sstevel@tonic-gate fm_nvlist_destroy(detector, FM_NVA_FREE);
67410Sstevel@tonic-gate fm_nvlist_destroy(resource, FM_NVA_FREE);
67420Sstevel@tonic-gate }
67430Sstevel@tonic-gate /*
67440Sstevel@tonic-gate * Send the enhanced error information (plat_ecc_error2_data_t)
67450Sstevel@tonic-gate * to the SC olny if it can process it.
67460Sstevel@tonic-gate */
67470Sstevel@tonic-gate
67480Sstevel@tonic-gate if (&plat_ecc_capability_sc_get &&
67490Sstevel@tonic-gate plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
67500Sstevel@tonic-gate msg_type = cpu_flt_bit_to_plat_error(aflt);
67510Sstevel@tonic-gate if (msg_type != PLAT_ECC_ERROR2_NONE) {
67520Sstevel@tonic-gate /*
67530Sstevel@tonic-gate * If afar status is not invalid do a unum lookup.
67540Sstevel@tonic-gate */
67550Sstevel@tonic-gate if (plat_ecc_ch_flt.ecaf_afar_status !=
67560Sstevel@tonic-gate AFLT_STAT_INVALID) {
67572436Smb91622 synd_code = synd_to_synd_code(
67582436Smb91622 plat_ecc_ch_flt.ecaf_synd_status,
67592436Smb91622 aflt->flt_synd, ch_flt->flt_bit);
67602436Smb91622 (void) cpu_get_mem_unum_synd(synd_code,
67612436Smb91622 aflt, unum);
67620Sstevel@tonic-gate } else {
67630Sstevel@tonic-gate unum[0] = '\0';
67640Sstevel@tonic-gate }
67650Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
67660Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
67670Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
67680Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
67690Sstevel@tonic-gate ch_flt->flt_sdw_afsr_ext;
67700Sstevel@tonic-gate
67710Sstevel@tonic-gate if (&plat_log_fruid_error2)
67720Sstevel@tonic-gate plat_log_fruid_error2(msg_type, unum, aflt,
67730Sstevel@tonic-gate &plat_ecc_ch_flt);
67740Sstevel@tonic-gate }
67750Sstevel@tonic-gate }
67760Sstevel@tonic-gate }
67770Sstevel@tonic-gate
67780Sstevel@tonic-gate void
67790Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
67800Sstevel@tonic-gate {
67810Sstevel@tonic-gate int status;
67820Sstevel@tonic-gate ddi_fm_error_t de;
67830Sstevel@tonic-gate
67840Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t));
67850Sstevel@tonic-gate
67860Sstevel@tonic-gate de.fme_version = DDI_FME_VERSION;
67870Sstevel@tonic-gate de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
67880Sstevel@tonic-gate FM_ENA_FMT1);
67890Sstevel@tonic-gate de.fme_flag = expected;
67900Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr;
67910Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
67920Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
67930Sstevel@tonic-gate aflt->flt_panic = 1;
67940Sstevel@tonic-gate }
67950Sstevel@tonic-gate
67960Sstevel@tonic-gate void
67970Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
67980Sstevel@tonic-gate errorq_t *eqp, uint_t flag)
67990Sstevel@tonic-gate {
68000Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)payload;
68010Sstevel@tonic-gate
68020Sstevel@tonic-gate aflt->flt_erpt_class = error_class;
68030Sstevel@tonic-gate errorq_dispatch(eqp, payload, payload_sz, flag);
68040Sstevel@tonic-gate }
68050Sstevel@tonic-gate
68060Sstevel@tonic-gate /*
68070Sstevel@tonic-gate * This routine may be called by the IO module, but does not do
68080Sstevel@tonic-gate * anything in this cpu module. The SERD algorithm is handled by
68090Sstevel@tonic-gate * cpumem-diagnosis engine instead.
68100Sstevel@tonic-gate */
68110Sstevel@tonic-gate /*ARGSUSED*/
68120Sstevel@tonic-gate void
68130Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
68140Sstevel@tonic-gate {}
68150Sstevel@tonic-gate
68160Sstevel@tonic-gate void
68170Sstevel@tonic-gate adjust_hw_copy_limits(int ecache_size)
68180Sstevel@tonic-gate {
68190Sstevel@tonic-gate /*
68200Sstevel@tonic-gate * Set hw copy limits.
68210Sstevel@tonic-gate *
68220Sstevel@tonic-gate * /etc/system will be parsed later and can override one or more
68230Sstevel@tonic-gate * of these settings.
68240Sstevel@tonic-gate *
68250Sstevel@tonic-gate * At this time, ecache size seems only mildly relevant.
68260Sstevel@tonic-gate * We seem to run into issues with the d-cache and stalls
68270Sstevel@tonic-gate * we see on misses.
68280Sstevel@tonic-gate *
68290Sstevel@tonic-gate * Cycle measurement indicates that 2 byte aligned copies fare
68300Sstevel@tonic-gate * little better than doing things with VIS at around 512 bytes.
68310Sstevel@tonic-gate * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
68320Sstevel@tonic-gate * aligned is faster whenever the source and destination data
68330Sstevel@tonic-gate * in cache and the total size is less than 2 Kbytes. The 2K
68340Sstevel@tonic-gate * limit seems to be driven by the 2K write cache.
68350Sstevel@tonic-gate * When more than 2K of copies are done in non-VIS mode, stores
68360Sstevel@tonic-gate * backup in the write cache. In VIS mode, the write cache is
68370Sstevel@tonic-gate * bypassed, allowing faster cache-line writes aligned on cache
68380Sstevel@tonic-gate * boundaries.
68390Sstevel@tonic-gate *
68400Sstevel@tonic-gate * In addition, in non-VIS mode, there is no prefetching, so
68410Sstevel@tonic-gate * for larger copies, the advantage of prefetching to avoid even
68420Sstevel@tonic-gate * occasional cache misses is enough to justify using the VIS code.
68430Sstevel@tonic-gate *
68440Sstevel@tonic-gate * During testing, it was discovered that netbench ran 3% slower
68450Sstevel@tonic-gate * when hw_copy_limit_8 was 2K or larger. Apparently for server
68460Sstevel@tonic-gate * applications, data is only used once (copied to the output
68470Sstevel@tonic-gate * buffer, then copied by the network device off the system). Using
68480Sstevel@tonic-gate * the VIS copy saves more L2 cache state. Network copies are
68490Sstevel@tonic-gate * around 1.3K to 1.5K in size for historical reasons.
68500Sstevel@tonic-gate *
68510Sstevel@tonic-gate * Therefore, a limit of 1K bytes will be used for the 8 byte
68520Sstevel@tonic-gate * aligned copy even for large caches and 8 MB ecache. The
68530Sstevel@tonic-gate * infrastructure to allow different limits for different sized
68540Sstevel@tonic-gate * caches is kept to allow further tuning in later releases.
68550Sstevel@tonic-gate */
68560Sstevel@tonic-gate
68570Sstevel@tonic-gate if (min_ecache_size == 0 && use_hw_bcopy) {
68580Sstevel@tonic-gate /*
68590Sstevel@tonic-gate * First time through - should be before /etc/system
68600Sstevel@tonic-gate * is read.
68610Sstevel@tonic-gate * Could skip the checks for zero but this lets us
68620Sstevel@tonic-gate * preserve any debugger rewrites.
68630Sstevel@tonic-gate */
68640Sstevel@tonic-gate if (hw_copy_limit_1 == 0) {
68650Sstevel@tonic-gate hw_copy_limit_1 = VIS_COPY_THRESHOLD;
68660Sstevel@tonic-gate priv_hcl_1 = hw_copy_limit_1;
68670Sstevel@tonic-gate }
68680Sstevel@tonic-gate if (hw_copy_limit_2 == 0) {
68690Sstevel@tonic-gate hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
68700Sstevel@tonic-gate priv_hcl_2 = hw_copy_limit_2;
68710Sstevel@tonic-gate }
68720Sstevel@tonic-gate if (hw_copy_limit_4 == 0) {
68730Sstevel@tonic-gate hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
68740Sstevel@tonic-gate priv_hcl_4 = hw_copy_limit_4;
68750Sstevel@tonic-gate }
68760Sstevel@tonic-gate if (hw_copy_limit_8 == 0) {
68770Sstevel@tonic-gate hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
68780Sstevel@tonic-gate priv_hcl_8 = hw_copy_limit_8;
68790Sstevel@tonic-gate }
68800Sstevel@tonic-gate min_ecache_size = ecache_size;
68810Sstevel@tonic-gate } else {
68820Sstevel@tonic-gate /*
68830Sstevel@tonic-gate * MP initialization. Called *after* /etc/system has
68840Sstevel@tonic-gate * been parsed. One CPU has already been initialized.
68850Sstevel@tonic-gate * Need to cater for /etc/system having scragged one
68860Sstevel@tonic-gate * of our values.
68870Sstevel@tonic-gate */
68880Sstevel@tonic-gate if (ecache_size == min_ecache_size) {
68890Sstevel@tonic-gate /*
68900Sstevel@tonic-gate * Same size ecache. We do nothing unless we
68910Sstevel@tonic-gate * have a pessimistic ecache setting. In that
68920Sstevel@tonic-gate * case we become more optimistic (if the cache is
68930Sstevel@tonic-gate * large enough).
68940Sstevel@tonic-gate */
68950Sstevel@tonic-gate if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
68960Sstevel@tonic-gate /*
68970Sstevel@tonic-gate * Need to adjust hw_copy_limit* from our
68980Sstevel@tonic-gate * pessimistic uniprocessor value to a more
68990Sstevel@tonic-gate * optimistic UP value *iff* it hasn't been
69000Sstevel@tonic-gate * reset.
69010Sstevel@tonic-gate */
69020Sstevel@tonic-gate if ((ecache_size > 1048576) &&
69030Sstevel@tonic-gate (priv_hcl_8 == hw_copy_limit_8)) {
69040Sstevel@tonic-gate if (ecache_size <= 2097152)
69050Sstevel@tonic-gate hw_copy_limit_8 = 4 *
69060Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69070Sstevel@tonic-gate else if (ecache_size <= 4194304)
69080Sstevel@tonic-gate hw_copy_limit_8 = 4 *
69090Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69100Sstevel@tonic-gate else
69110Sstevel@tonic-gate hw_copy_limit_8 = 4 *
69120Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69130Sstevel@tonic-gate priv_hcl_8 = hw_copy_limit_8;
69140Sstevel@tonic-gate }
69150Sstevel@tonic-gate }
69160Sstevel@tonic-gate } else if (ecache_size < min_ecache_size) {
69170Sstevel@tonic-gate /*
69180Sstevel@tonic-gate * A different ecache size. Can this even happen?
69190Sstevel@tonic-gate */
69200Sstevel@tonic-gate if (priv_hcl_8 == hw_copy_limit_8) {
69210Sstevel@tonic-gate /*
69220Sstevel@tonic-gate * The previous value that we set
69230Sstevel@tonic-gate * is unchanged (i.e., it hasn't been
69240Sstevel@tonic-gate * scragged by /etc/system). Rewrite it.
69250Sstevel@tonic-gate */
69260Sstevel@tonic-gate if (ecache_size <= 1048576)
69270Sstevel@tonic-gate hw_copy_limit_8 = 8 *
69280Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69290Sstevel@tonic-gate else if (ecache_size <= 2097152)
69300Sstevel@tonic-gate hw_copy_limit_8 = 8 *
69310Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69320Sstevel@tonic-gate else if (ecache_size <= 4194304)
69330Sstevel@tonic-gate hw_copy_limit_8 = 8 *
69340Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69350Sstevel@tonic-gate else
69360Sstevel@tonic-gate hw_copy_limit_8 = 10 *
69370Sstevel@tonic-gate VIS_COPY_THRESHOLD;
69380Sstevel@tonic-gate priv_hcl_8 = hw_copy_limit_8;
69390Sstevel@tonic-gate min_ecache_size = ecache_size;
69400Sstevel@tonic-gate }
69410Sstevel@tonic-gate }
69420Sstevel@tonic-gate }
69430Sstevel@tonic-gate }
69440Sstevel@tonic-gate
69450Sstevel@tonic-gate /*
69460Sstevel@tonic-gate * Called from illegal instruction trap handler to see if we can attribute
69470Sstevel@tonic-gate * the trap to a fpras check.
69480Sstevel@tonic-gate */
69490Sstevel@tonic-gate int
69500Sstevel@tonic-gate fpras_chktrap(struct regs *rp)
69510Sstevel@tonic-gate {
69520Sstevel@tonic-gate int op;
69530Sstevel@tonic-gate struct fpras_chkfngrp *cgp;
69540Sstevel@tonic-gate uintptr_t tpc = (uintptr_t)rp->r_pc;
69550Sstevel@tonic-gate
69560Sstevel@tonic-gate if (fpras_chkfngrps == NULL)
69570Sstevel@tonic-gate return (0);
69580Sstevel@tonic-gate
69590Sstevel@tonic-gate cgp = &fpras_chkfngrps[CPU->cpu_id];
69600Sstevel@tonic-gate for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
69610Sstevel@tonic-gate if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
69620Sstevel@tonic-gate tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
69630Sstevel@tonic-gate break;
69640Sstevel@tonic-gate }
69650Sstevel@tonic-gate if (op == FPRAS_NCOPYOPS)
69660Sstevel@tonic-gate return (0);
69670Sstevel@tonic-gate
69680Sstevel@tonic-gate /*
69690Sstevel@tonic-gate * This is an fpRAS failure caught through an illegal
69700Sstevel@tonic-gate * instruction - trampoline.
69710Sstevel@tonic-gate */
69720Sstevel@tonic-gate rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
69730Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4;
69740Sstevel@tonic-gate return (1);
69750Sstevel@tonic-gate }
69760Sstevel@tonic-gate
69770Sstevel@tonic-gate /*
69780Sstevel@tonic-gate * fpras_failure is called when a fpras check detects a bad calculation
69790Sstevel@tonic-gate * result or an illegal instruction trap is attributed to an fpras
69800Sstevel@tonic-gate * check. In all cases we are still bound to CPU.
69810Sstevel@tonic-gate */
69820Sstevel@tonic-gate int
69830Sstevel@tonic-gate fpras_failure(int op, int how)
69840Sstevel@tonic-gate {
69850Sstevel@tonic-gate int use_hw_bcopy_orig, use_hw_bzero_orig;
69860Sstevel@tonic-gate uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
69870Sstevel@tonic-gate ch_async_flt_t ch_flt;
69880Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)&ch_flt;
69890Sstevel@tonic-gate struct fpras_chkfn *sfp, *cfp;
69900Sstevel@tonic-gate uint32_t *sip, *cip;
69910Sstevel@tonic-gate int i;
69920Sstevel@tonic-gate
69930Sstevel@tonic-gate /*
69940Sstevel@tonic-gate * We're running on a sick CPU. Avoid further FPU use at least for
69950Sstevel@tonic-gate * the time in which we dispatch an ereport and (if applicable) panic.
69960Sstevel@tonic-gate */
69970Sstevel@tonic-gate use_hw_bcopy_orig = use_hw_bcopy;
69980Sstevel@tonic-gate use_hw_bzero_orig = use_hw_bzero;
69990Sstevel@tonic-gate hcl1_orig = hw_copy_limit_1;
70000Sstevel@tonic-gate hcl2_orig = hw_copy_limit_2;
70010Sstevel@tonic-gate hcl4_orig = hw_copy_limit_4;
70020Sstevel@tonic-gate hcl8_orig = hw_copy_limit_8;
70030Sstevel@tonic-gate use_hw_bcopy = use_hw_bzero = 0;
70040Sstevel@tonic-gate hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
70050Sstevel@tonic-gate hw_copy_limit_8 = 0;
70060Sstevel@tonic-gate
70070Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t));
70080Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree();
70090Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
70100Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id;
70110Sstevel@tonic-gate aflt->flt_status = (how << 8) | op;
70120Sstevel@tonic-gate aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
70130Sstevel@tonic-gate ch_flt.flt_type = CPU_FPUERR;
70140Sstevel@tonic-gate
70150Sstevel@tonic-gate /*
70160Sstevel@tonic-gate * We must panic if the copy operation had no lofault protection -
70170Sstevel@tonic-gate * ie, don't panic for copyin, copyout, kcopy and bcopy called
70180Sstevel@tonic-gate * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
70190Sstevel@tonic-gate */
70200Sstevel@tonic-gate aflt->flt_panic = (curthread->t_lofault == NULL);
70210Sstevel@tonic-gate
70220Sstevel@tonic-gate /*
70230Sstevel@tonic-gate * XOR the source instruction block with the copied instruction
70240Sstevel@tonic-gate * block - this will show us which bit(s) are corrupted.
70250Sstevel@tonic-gate */
70260Sstevel@tonic-gate sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
70270Sstevel@tonic-gate cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
70280Sstevel@tonic-gate if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
70290Sstevel@tonic-gate sip = &sfp->fpras_blk0[0];
70300Sstevel@tonic-gate cip = &cfp->fpras_blk0[0];
70310Sstevel@tonic-gate } else {
70320Sstevel@tonic-gate sip = &sfp->fpras_blk1[0];
70330Sstevel@tonic-gate cip = &cfp->fpras_blk1[0];
70340Sstevel@tonic-gate }
70350Sstevel@tonic-gate for (i = 0; i < 16; ++i, ++sip, ++cip)
70360Sstevel@tonic-gate ch_flt.flt_fpdata[i] = *sip ^ *cip;
70370Sstevel@tonic-gate
70380Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
70390Sstevel@tonic-gate sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
70400Sstevel@tonic-gate
70410Sstevel@tonic-gate if (aflt->flt_panic)
70420Sstevel@tonic-gate fm_panic("FPU failure on CPU %d", CPU->cpu_id);
70430Sstevel@tonic-gate
70440Sstevel@tonic-gate /*
70450Sstevel@tonic-gate * We get here for copyin/copyout and kcopy or bcopy where the
70460Sstevel@tonic-gate * caller has used on_fault. We will flag the error so that
70470Sstevel@tonic-gate * the process may be killed The trap_async_hwerr mechanism will
70480Sstevel@tonic-gate * take appropriate further action (such as a reboot, contract
70490Sstevel@tonic-gate * notification etc). Since we may be continuing we will
70500Sstevel@tonic-gate * restore the global hardware copy acceleration switches.
70510Sstevel@tonic-gate *
70520Sstevel@tonic-gate * When we return from this function to the copy function we want to
70530Sstevel@tonic-gate * avoid potentially bad data being used, ie we want the affected
70540Sstevel@tonic-gate * copy function to return an error. The caller should therefore
70550Sstevel@tonic-gate * invoke its lofault handler (which always exists for these functions)
70560Sstevel@tonic-gate * which will return the appropriate error.
70570Sstevel@tonic-gate */
70580Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
70590Sstevel@tonic-gate aston(curthread);
70600Sstevel@tonic-gate
70610Sstevel@tonic-gate use_hw_bcopy = use_hw_bcopy_orig;
70620Sstevel@tonic-gate use_hw_bzero = use_hw_bzero_orig;
70630Sstevel@tonic-gate hw_copy_limit_1 = hcl1_orig;
70640Sstevel@tonic-gate hw_copy_limit_2 = hcl2_orig;
70650Sstevel@tonic-gate hw_copy_limit_4 = hcl4_orig;
70660Sstevel@tonic-gate hw_copy_limit_8 = hcl8_orig;
70670Sstevel@tonic-gate
70680Sstevel@tonic-gate return (1);
70690Sstevel@tonic-gate }
70700Sstevel@tonic-gate
70710Sstevel@tonic-gate #define VIS_BLOCKSIZE 64
70720Sstevel@tonic-gate
70730Sstevel@tonic-gate int
70740Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
70750Sstevel@tonic-gate {
70760Sstevel@tonic-gate int ret, watched;
70770Sstevel@tonic-gate
70780Sstevel@tonic-gate watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
70790Sstevel@tonic-gate ret = dtrace_blksuword32(addr, data, 0);
70800Sstevel@tonic-gate if (watched)
70810Sstevel@tonic-gate watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
70820Sstevel@tonic-gate
70830Sstevel@tonic-gate return (ret);
70840Sstevel@tonic-gate }
70850Sstevel@tonic-gate
70860Sstevel@tonic-gate /*
70870Sstevel@tonic-gate * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
70880Sstevel@tonic-gate * faulted cpu into that state). Cross-trap to the faulted cpu to clear
70890Sstevel@tonic-gate * CEEN from the EER to disable traps for further disrupting error types
70900Sstevel@tonic-gate * on that cpu. We could cross-call instead, but that has a larger
70910Sstevel@tonic-gate * instruction and data footprint than cross-trapping, and the cpu is known
70920Sstevel@tonic-gate * to be faulted.
70930Sstevel@tonic-gate */
70940Sstevel@tonic-gate
70950Sstevel@tonic-gate void
70960Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp)
70970Sstevel@tonic-gate {
70980Sstevel@tonic-gate xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
70990Sstevel@tonic-gate }
71000Sstevel@tonic-gate
71010Sstevel@tonic-gate /*
71020Sstevel@tonic-gate * Called when a cpu leaves the CPU_FAULTED state to return to one of
71030Sstevel@tonic-gate * offline, spare, or online (by the cpu requesting this state change).
71040Sstevel@tonic-gate * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
71050Sstevel@tonic-gate * disrupting error bits that have accumulated without trapping, then
71060Sstevel@tonic-gate * we cross-trap to re-enable CEEN controlled traps.
71070Sstevel@tonic-gate */
71080Sstevel@tonic-gate void
71090Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp)
71100Sstevel@tonic-gate {
71110Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs;
71120Sstevel@tonic-gate
71130Sstevel@tonic-gate cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
71140Sstevel@tonic-gate if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
71150Sstevel@tonic-gate cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
71160Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
71170Sstevel@tonic-gate (uint64_t)&cpu_error_regs, 0);
71180Sstevel@tonic-gate
71190Sstevel@tonic-gate xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
71200Sstevel@tonic-gate }
71210Sstevel@tonic-gate
71220Sstevel@tonic-gate /*
71230Sstevel@tonic-gate * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
71240Sstevel@tonic-gate * the errors in the original AFSR, 0 otherwise.
71250Sstevel@tonic-gate *
71260Sstevel@tonic-gate * For all procs if the initial error was a BERR or TO, then it is possible
71270Sstevel@tonic-gate * that we may have caused a secondary BERR or TO in the process of logging the
71280Sstevel@tonic-gate * inital error via cpu_run_bus_error_handlers(). If this is the case then
71290Sstevel@tonic-gate * if the request was protected then a panic is still not necessary, if not
71300Sstevel@tonic-gate * protected then aft_panic is already set - so either way there's no need
71310Sstevel@tonic-gate * to set aft_panic for the secondary error.
71320Sstevel@tonic-gate *
71330Sstevel@tonic-gate * For Cheetah and Jalapeno, if the original error was a UE which occurred on
71340Sstevel@tonic-gate * a store merge, then the error handling code will call cpu_deferred_error().
71350Sstevel@tonic-gate * When clear_errors() is called, it will determine that secondary errors have
71360Sstevel@tonic-gate * occurred - in particular, the store merge also caused a EDU and WDU that
71370Sstevel@tonic-gate * weren't discovered until this point.
71380Sstevel@tonic-gate *
71390Sstevel@tonic-gate * We do three checks to verify that we are in this case. If we pass all three
71400Sstevel@tonic-gate * checks, we return 1 to indicate that we should not panic. If any unexpected
71410Sstevel@tonic-gate * errors occur, we return 0.
71420Sstevel@tonic-gate *
71430Sstevel@tonic-gate * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
71440Sstevel@tonic-gate * handled in cpu_disrupting_errors(). Since this function is not even called
71450Sstevel@tonic-gate * in the case we are interested in, we just return 0 for these processors.
71460Sstevel@tonic-gate */
71470Sstevel@tonic-gate /*ARGSUSED*/
71480Sstevel@tonic-gate static int
71490Sstevel@tonic-gate cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
71500Sstevel@tonic-gate uint64_t t_afar)
71510Sstevel@tonic-gate {
71520Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
71530Sstevel@tonic-gate #else /* CHEETAH_PLUS */
71540Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt;
71550Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
71560Sstevel@tonic-gate
71570Sstevel@tonic-gate /*
71580Sstevel@tonic-gate * Was the original error a BERR or TO and only a BERR or TO
71590Sstevel@tonic-gate * (multiple errors are also OK)
71600Sstevel@tonic-gate */
71610Sstevel@tonic-gate if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
71620Sstevel@tonic-gate /*
71630Sstevel@tonic-gate * Is the new error a BERR or TO and only a BERR or TO
71640Sstevel@tonic-gate * (multiple errors are also OK)
71650Sstevel@tonic-gate */
71660Sstevel@tonic-gate if ((ch_flt->afsr_errs &
71670Sstevel@tonic-gate ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
71680Sstevel@tonic-gate return (1);
71690Sstevel@tonic-gate }
71700Sstevel@tonic-gate
71710Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
71720Sstevel@tonic-gate return (0);
71730Sstevel@tonic-gate #else /* CHEETAH_PLUS */
71740Sstevel@tonic-gate /*
71750Sstevel@tonic-gate * Now look for secondary effects of a UE on cheetah/jalapeno
71760Sstevel@tonic-gate *
71770Sstevel@tonic-gate * Check the original error was a UE, and only a UE. Note that
71780Sstevel@tonic-gate * the ME bit will cause us to fail this check.
71790Sstevel@tonic-gate */
71800Sstevel@tonic-gate if (t_afsr_errs != C_AFSR_UE)
71810Sstevel@tonic-gate return (0);
71820Sstevel@tonic-gate
71830Sstevel@tonic-gate /*
71840Sstevel@tonic-gate * Check the secondary errors were exclusively an EDU and/or WDU.
71850Sstevel@tonic-gate */
71860Sstevel@tonic-gate if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
71870Sstevel@tonic-gate return (0);
71880Sstevel@tonic-gate
71890Sstevel@tonic-gate /*
71900Sstevel@tonic-gate * Check the AFAR of the original error and secondary errors
71910Sstevel@tonic-gate * match to the 64-byte boundary
71920Sstevel@tonic-gate */
71930Sstevel@tonic-gate if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
71940Sstevel@tonic-gate return (0);
71950Sstevel@tonic-gate
71960Sstevel@tonic-gate /*
71970Sstevel@tonic-gate * We've passed all the checks, so it's a secondary error!
71980Sstevel@tonic-gate */
71990Sstevel@tonic-gate return (1);
72000Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
72010Sstevel@tonic-gate }
72020Sstevel@tonic-gate
72030Sstevel@tonic-gate /*
72040Sstevel@tonic-gate * Translate the flt_bit or flt_type into an error type. First, flt_bit
72050Sstevel@tonic-gate * is checked for any valid errors. If found, the error type is
72060Sstevel@tonic-gate * returned. If not found, the flt_type is checked for L1$ parity errors.
72070Sstevel@tonic-gate */
72080Sstevel@tonic-gate /*ARGSUSED*/
72090Sstevel@tonic-gate static uint8_t
72100Sstevel@tonic-gate cpu_flt_bit_to_plat_error(struct async_flt *aflt)
72110Sstevel@tonic-gate {
72120Sstevel@tonic-gate #if defined(JALAPENO)
72130Sstevel@tonic-gate /*
72140Sstevel@tonic-gate * Currently, logging errors to the SC is not supported on Jalapeno
72150Sstevel@tonic-gate */
72160Sstevel@tonic-gate return (PLAT_ECC_ERROR2_NONE);
72170Sstevel@tonic-gate #else
72180Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
72190Sstevel@tonic-gate
72200Sstevel@tonic-gate switch (ch_flt->flt_bit) {
72210Sstevel@tonic-gate case C_AFSR_CE:
72220Sstevel@tonic-gate return (PLAT_ECC_ERROR2_CE);
72230Sstevel@tonic-gate case C_AFSR_UCC:
72240Sstevel@tonic-gate case C_AFSR_EDC:
72250Sstevel@tonic-gate case C_AFSR_WDC:
72260Sstevel@tonic-gate case C_AFSR_CPC:
72270Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_CE);
72280Sstevel@tonic-gate case C_AFSR_EMC:
72290Sstevel@tonic-gate return (PLAT_ECC_ERROR2_EMC);
72300Sstevel@tonic-gate case C_AFSR_IVC:
72310Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IVC);
72320Sstevel@tonic-gate case C_AFSR_UE:
72330Sstevel@tonic-gate return (PLAT_ECC_ERROR2_UE);
72340Sstevel@tonic-gate case C_AFSR_UCU:
72350Sstevel@tonic-gate case C_AFSR_EDU:
72360Sstevel@tonic-gate case C_AFSR_WDU:
72370Sstevel@tonic-gate case C_AFSR_CPU:
72380Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_UE);
72390Sstevel@tonic-gate case C_AFSR_IVU:
72400Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IVU);
72410Sstevel@tonic-gate case C_AFSR_TO:
72420Sstevel@tonic-gate return (PLAT_ECC_ERROR2_TO);
72430Sstevel@tonic-gate case C_AFSR_BERR:
72440Sstevel@tonic-gate return (PLAT_ECC_ERROR2_BERR);
72450Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
72460Sstevel@tonic-gate case C_AFSR_L3_EDC:
72470Sstevel@tonic-gate case C_AFSR_L3_UCC:
72480Sstevel@tonic-gate case C_AFSR_L3_CPC:
72490Sstevel@tonic-gate case C_AFSR_L3_WDC:
72500Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_CE);
72510Sstevel@tonic-gate case C_AFSR_IMC:
72520Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IMC);
72530Sstevel@tonic-gate case C_AFSR_TSCE:
72540Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_TSCE);
72550Sstevel@tonic-gate case C_AFSR_THCE:
72560Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_THCE);
72570Sstevel@tonic-gate case C_AFSR_L3_MECC:
72580Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_MECC);
72590Sstevel@tonic-gate case C_AFSR_L3_THCE:
72600Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_THCE);
72610Sstevel@tonic-gate case C_AFSR_L3_CPU:
72620Sstevel@tonic-gate case C_AFSR_L3_EDU:
72630Sstevel@tonic-gate case C_AFSR_L3_UCU:
72640Sstevel@tonic-gate case C_AFSR_L3_WDU:
72650Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_UE);
72660Sstevel@tonic-gate case C_AFSR_DUE:
72670Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DUE);
72680Sstevel@tonic-gate case C_AFSR_DTO:
72690Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DTO);
72700Sstevel@tonic-gate case C_AFSR_DBERR:
72710Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DBERR);
72720Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
72730Sstevel@tonic-gate default:
72740Sstevel@tonic-gate switch (ch_flt->flt_type) {
72750Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
72760Sstevel@tonic-gate case CPU_IC_PARITY:
72770Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IPE);
72780Sstevel@tonic-gate case CPU_DC_PARITY:
72790Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
72800Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_cache ==
72810Sstevel@tonic-gate CPU_PC_PARITY) {
72820Sstevel@tonic-gate return (PLAT_ECC_ERROR2_PCACHE);
72830Sstevel@tonic-gate }
72840Sstevel@tonic-gate }
72850Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DPE);
72860Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
72870Sstevel@tonic-gate case CPU_ITLB_PARITY:
72880Sstevel@tonic-gate return (PLAT_ECC_ERROR2_ITLB);
72890Sstevel@tonic-gate case CPU_DTLB_PARITY:
72900Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DTLB);
72910Sstevel@tonic-gate default:
72920Sstevel@tonic-gate return (PLAT_ECC_ERROR2_NONE);
72930Sstevel@tonic-gate }
72940Sstevel@tonic-gate }
72950Sstevel@tonic-gate #endif /* JALAPENO */
72960Sstevel@tonic-gate }
7297