10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52241Shuah * Common Development and Distribution License (the "License"). 62241Shuah * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 226330Sjc25722 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/types.h> 290Sstevel@tonic-gate #include <sys/systm.h> 300Sstevel@tonic-gate #include <sys/ddi.h> 310Sstevel@tonic-gate #include <sys/sysmacros.h> 320Sstevel@tonic-gate #include <sys/archsystm.h> 330Sstevel@tonic-gate #include <sys/vmsystm.h> 340Sstevel@tonic-gate #include <sys/machparam.h> 350Sstevel@tonic-gate #include <sys/machsystm.h> 360Sstevel@tonic-gate #include <sys/machthread.h> 370Sstevel@tonic-gate #include <sys/cpu.h> 380Sstevel@tonic-gate #include <sys/cmp.h> 390Sstevel@tonic-gate #include <sys/elf_SPARC.h> 400Sstevel@tonic-gate #include <vm/vm_dep.h> 410Sstevel@tonic-gate #include <vm/hat_sfmmu.h> 420Sstevel@tonic-gate #include <vm/seg_kpm.h> 430Sstevel@tonic-gate #include <sys/cpuvar.h> 440Sstevel@tonic-gate #include <sys/cheetahregs.h> 450Sstevel@tonic-gate #include <sys/us3_module.h> 460Sstevel@tonic-gate #include <sys/async.h> 470Sstevel@tonic-gate #include <sys/cmn_err.h> 480Sstevel@tonic-gate #include <sys/debug.h> 490Sstevel@tonic-gate #include <sys/dditypes.h> 500Sstevel@tonic-gate #include <sys/prom_debug.h> 510Sstevel@tonic-gate #include <sys/prom_plat.h> 520Sstevel@tonic-gate #include <sys/cpu_module.h> 530Sstevel@tonic-gate #include <sys/sysmacros.h> 540Sstevel@tonic-gate #include <sys/intreg.h> 550Sstevel@tonic-gate #include <sys/clock.h> 560Sstevel@tonic-gate #include <sys/platform_module.h> 570Sstevel@tonic-gate #include <sys/machtrap.h> 580Sstevel@tonic-gate #include <sys/ontrap.h> 590Sstevel@tonic-gate #include <sys/panic.h> 600Sstevel@tonic-gate #include <sys/memlist.h> 610Sstevel@tonic-gate #include <sys/bootconf.h> 620Sstevel@tonic-gate #include <sys/ivintr.h> 630Sstevel@tonic-gate #include <sys/atomic.h> 640Sstevel@tonic-gate #include <sys/taskq.h> 650Sstevel@tonic-gate #include <sys/note.h> 660Sstevel@tonic-gate #include <sys/ndifm.h> 670Sstevel@tonic-gate #include <sys/ddifm.h> 680Sstevel@tonic-gate #include <sys/fm/protocol.h> 690Sstevel@tonic-gate #include <sys/fm/util.h> 700Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-III.h> 710Sstevel@tonic-gate #include <sys/fpras_impl.h> 720Sstevel@tonic-gate #include <sys/dtrace.h> 730Sstevel@tonic-gate #include <sys/watchpoint.h> 740Sstevel@tonic-gate #include <sys/plat_ecc_unum.h> 750Sstevel@tonic-gate #include <sys/cyclic.h> 760Sstevel@tonic-gate #include <sys/errorq.h> 770Sstevel@tonic-gate #include <sys/errclassify.h> 783434Sesaxe #include <sys/pghw.h> 790Sstevel@tonic-gate 800Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25 810Sstevel@tonic-gate #include <sys/xc_impl.h> 820Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */ 830Sstevel@tonic-gate 846330Sjc25722 ch_cpu_logout_t clop_before_flush; 856330Sjc25722 ch_cpu_logout_t clop_after_flush; 866330Sjc25722 uint_t flush_retries_done = 0; 870Sstevel@tonic-gate /* 880Sstevel@tonic-gate * Note that 'Cheetah PRM' refers to: 890Sstevel@tonic-gate * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III 900Sstevel@tonic-gate */ 910Sstevel@tonic-gate 920Sstevel@tonic-gate /* 930Sstevel@tonic-gate * Per CPU pointers to physical address of TL>0 logout data areas. 940Sstevel@tonic-gate * These pointers have to be in the kernel nucleus to avoid MMU 950Sstevel@tonic-gate * misses. 960Sstevel@tonic-gate */ 970Sstevel@tonic-gate uint64_t ch_err_tl1_paddrs[NCPU]; 980Sstevel@tonic-gate 990Sstevel@tonic-gate /* 1000Sstevel@tonic-gate * One statically allocated structure to use during startup/DR 1010Sstevel@tonic-gate * to prevent unnecessary panics. 1020Sstevel@tonic-gate */ 1030Sstevel@tonic-gate ch_err_tl1_data_t ch_err_tl1_data; 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate /* 1060Sstevel@tonic-gate * Per CPU pending error at TL>0, used by level15 softint handler 1070Sstevel@tonic-gate */ 1080Sstevel@tonic-gate uchar_t ch_err_tl1_pending[NCPU]; 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate /* 1110Sstevel@tonic-gate * For deferred CE re-enable after trap. 1120Sstevel@tonic-gate */ 1130Sstevel@tonic-gate taskq_t *ch_check_ce_tq; 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate /* 1160Sstevel@tonic-gate * Internal functions. 1170Sstevel@tonic-gate */ 1180Sstevel@tonic-gate static int cpu_async_log_err(void *flt, errorq_elem_t *eqep); 1190Sstevel@tonic-gate static void cpu_log_diag_info(ch_async_flt_t *ch_flt); 1200Sstevel@tonic-gate static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 1210Sstevel@tonic-gate ecc_type_to_info_t *eccp, ch_diag_data_t *cdp); 1222436Smb91622 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, 1232436Smb91622 uint64_t t_afsr_bit); 1240Sstevel@tonic-gate static int clear_ecc(struct async_flt *ecc); 1250Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC) 1260Sstevel@tonic-gate static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt); 1270Sstevel@tonic-gate #endif 1286330Sjc25722 int cpu_ecache_set_size(struct cpu *cp); 1290Sstevel@tonic-gate static int cpu_ectag_line_invalid(int cachesize, uint64_t tag); 1306330Sjc25722 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr); 1316330Sjc25722 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag); 1326330Sjc25722 int cpu_ectag_pa_to_subblk_state(int cachesize, 1330Sstevel@tonic-gate uint64_t subaddr, uint64_t tag); 1340Sstevel@tonic-gate static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt); 1350Sstevel@tonic-gate static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit); 1360Sstevel@tonic-gate static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit); 1370Sstevel@tonic-gate static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit); 1380Sstevel@tonic-gate static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit); 1390Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit); 1402436Smb91622 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf); 1410Sstevel@tonic-gate static void cpu_uninit_ecache_scrub_dr(struct cpu *cp); 1420Sstevel@tonic-gate static void cpu_scrubphys(struct async_flt *aflt); 1430Sstevel@tonic-gate static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *, 1440Sstevel@tonic-gate int *, int *); 1450Sstevel@tonic-gate static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *); 1460Sstevel@tonic-gate static void cpu_ereport_init(struct async_flt *aflt); 1470Sstevel@tonic-gate static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t); 1480Sstevel@tonic-gate static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt); 1490Sstevel@tonic-gate static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 150815Sdilpreet uint64_t nceen, ch_cpu_logout_t *clop); 1510Sstevel@tonic-gate static int cpu_ce_delayed_ec_logout(uint64_t); 1520Sstevel@tonic-gate static int cpu_matching_ecache_line(uint64_t, void *, int, int *); 1532381Smikechr static int cpu_error_is_ecache_data(int, uint64_t); 1542381Smikechr static void cpu_fmri_cpu_set(nvlist_t *, int); 1552381Smikechr static int cpu_error_to_resource_type(struct async_flt *aflt); 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25 1580Sstevel@tonic-gate static int mondo_recover_proc(uint16_t, int); 1590Sstevel@tonic-gate static void cheetah_nudge_init(void); 1600Sstevel@tonic-gate static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 1610Sstevel@tonic-gate cyc_time_t *when); 1620Sstevel@tonic-gate static void cheetah_nudge_buddy(void); 1630Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */ 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 1660Sstevel@tonic-gate static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt); 1670Sstevel@tonic-gate static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index); 1680Sstevel@tonic-gate static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 1690Sstevel@tonic-gate ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word); 1700Sstevel@tonic-gate static void cpu_icache_parity_info(ch_async_flt_t *ch_flt); 1710Sstevel@tonic-gate static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index); 1720Sstevel@tonic-gate static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt); 1730Sstevel@tonic-gate static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index); 1740Sstevel@tonic-gate static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *); 1750Sstevel@tonic-gate static void cpu_payload_add_icache(struct async_flt *, nvlist_t *); 1760Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate int (*p2get_mem_info)(int synd_code, uint64_t paddr, 1790Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1800Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp); 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate /* 1830Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when an ECC 1840Sstevel@tonic-gate * error occurs. The array is indexed by an 9-bit syndrome. The entries 1850Sstevel@tonic-gate * of this array have the following semantics: 1860Sstevel@tonic-gate * 1870Sstevel@tonic-gate * 00-127 The number of the bad bit, when only one bit is bad. 1880Sstevel@tonic-gate * 128 ECC bit C0 is bad. 1890Sstevel@tonic-gate * 129 ECC bit C1 is bad. 1900Sstevel@tonic-gate * 130 ECC bit C2 is bad. 1910Sstevel@tonic-gate * 131 ECC bit C3 is bad. 1920Sstevel@tonic-gate * 132 ECC bit C4 is bad. 1930Sstevel@tonic-gate * 133 ECC bit C5 is bad. 1940Sstevel@tonic-gate * 134 ECC bit C6 is bad. 1950Sstevel@tonic-gate * 135 ECC bit C7 is bad. 1960Sstevel@tonic-gate * 136 ECC bit C8 is bad. 1970Sstevel@tonic-gate * 137-143 reserved for Mtag Data and ECC. 1980Sstevel@tonic-gate * 144(M2) Two bits are bad within a nibble. 1990Sstevel@tonic-gate * 145(M3) Three bits are bad within a nibble. 2000Sstevel@tonic-gate * 146(M3) Four bits are bad within a nibble. 2010Sstevel@tonic-gate * 147(M) Multiple bits (5 or more) are bad. 2020Sstevel@tonic-gate * 148 NO bits are bad. 2030Sstevel@tonic-gate * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5. 2040Sstevel@tonic-gate */ 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate #define C0 128 2070Sstevel@tonic-gate #define C1 129 2080Sstevel@tonic-gate #define C2 130 2090Sstevel@tonic-gate #define C3 131 2100Sstevel@tonic-gate #define C4 132 2110Sstevel@tonic-gate #define C5 133 2120Sstevel@tonic-gate #define C6 134 2130Sstevel@tonic-gate #define C7 135 2140Sstevel@tonic-gate #define C8 136 2150Sstevel@tonic-gate #define MT0 137 /* Mtag Data bit 0 */ 2160Sstevel@tonic-gate #define MT1 138 2170Sstevel@tonic-gate #define MT2 139 2180Sstevel@tonic-gate #define MTC0 140 /* Mtag Check bit 0 */ 2190Sstevel@tonic-gate #define MTC1 141 2200Sstevel@tonic-gate #define MTC2 142 2210Sstevel@tonic-gate #define MTC3 143 2220Sstevel@tonic-gate #define M2 144 2230Sstevel@tonic-gate #define M3 145 2240Sstevel@tonic-gate #define M4 146 2250Sstevel@tonic-gate #define M 147 2260Sstevel@tonic-gate #define NA 148 2270Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 2280Sstevel@tonic-gate #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */ 2290Sstevel@tonic-gate #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */ 2300Sstevel@tonic-gate #define SLAST S003MEM /* last special syndrome */ 2310Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 2320Sstevel@tonic-gate #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */ 2330Sstevel@tonic-gate #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */ 2340Sstevel@tonic-gate #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */ 2350Sstevel@tonic-gate #define SLAST S11C /* last special syndrome */ 2360Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 2370Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 2380Sstevel@tonic-gate #define BPAR0 152 /* syndrom 152 through 167 for bus parity */ 2390Sstevel@tonic-gate #define BPAR15 167 2400Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate static uint8_t ecc_syndrome_tab[] = 2430Sstevel@tonic-gate { 2440Sstevel@tonic-gate NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, 2450Sstevel@tonic-gate C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, 2460Sstevel@tonic-gate C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, 2470Sstevel@tonic-gate M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, 2480Sstevel@tonic-gate C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, 2490Sstevel@tonic-gate M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, 2500Sstevel@tonic-gate M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, 2510Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 2520Sstevel@tonic-gate 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 2530Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 2540Sstevel@tonic-gate 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 2550Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 2560Sstevel@tonic-gate C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, 2570Sstevel@tonic-gate M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, 2580Sstevel@tonic-gate M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, 2590Sstevel@tonic-gate 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, 2600Sstevel@tonic-gate M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, 2610Sstevel@tonic-gate 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, 2620Sstevel@tonic-gate 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, 2630Sstevel@tonic-gate M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, 2640Sstevel@tonic-gate C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, 2650Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 2660Sstevel@tonic-gate M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, 2670Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 2680Sstevel@tonic-gate M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M, 2690Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 2700Sstevel@tonic-gate M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, 2710Sstevel@tonic-gate 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, 2720Sstevel@tonic-gate M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, 2730Sstevel@tonic-gate 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, 2740Sstevel@tonic-gate 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, 2750Sstevel@tonic-gate M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, 2760Sstevel@tonic-gate M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, 2770Sstevel@tonic-gate 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, 2780Sstevel@tonic-gate 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, 2790Sstevel@tonic-gate M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, 2800Sstevel@tonic-gate 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, 2810Sstevel@tonic-gate M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, 2820Sstevel@tonic-gate M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, 2830Sstevel@tonic-gate 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M 2840Sstevel@tonic-gate }; 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t)) 2870Sstevel@tonic-gate 2880Sstevel@tonic-gate #if !(defined(JALAPENO) || defined(SERRANO)) 2890Sstevel@tonic-gate /* 2900Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when a Mtag 2910Sstevel@tonic-gate * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries 2920Sstevel@tonic-gate * of this array have the following semantics: 2930Sstevel@tonic-gate * 2940Sstevel@tonic-gate * -1 Invalid mtag syndrome. 2950Sstevel@tonic-gate * 137 Mtag Data 0 is bad. 2960Sstevel@tonic-gate * 138 Mtag Data 1 is bad. 2970Sstevel@tonic-gate * 139 Mtag Data 2 is bad. 2980Sstevel@tonic-gate * 140 Mtag ECC 0 is bad. 2990Sstevel@tonic-gate * 141 Mtag ECC 1 is bad. 3000Sstevel@tonic-gate * 142 Mtag ECC 2 is bad. 3010Sstevel@tonic-gate * 143 Mtag ECC 3 is bad. 3020Sstevel@tonic-gate * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate short mtag_syndrome_tab[] = 3050Sstevel@tonic-gate { 3060Sstevel@tonic-gate NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2 3070Sstevel@tonic-gate }; 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short)) 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate #else /* !(JALAPENO || SERRANO) */ 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate #define BSYND_TBL_SIZE 16 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate #endif /* !(JALAPENO || SERRANO) */ 3160Sstevel@tonic-gate 3170Sstevel@tonic-gate /* 3182381Smikechr * Types returned from cpu_error_to_resource_type() 3192381Smikechr */ 3202381Smikechr #define ERRTYPE_UNKNOWN 0 3212381Smikechr #define ERRTYPE_CPU 1 3222381Smikechr #define ERRTYPE_MEMORY 2 3232381Smikechr #define ERRTYPE_ECACHE_DATA 3 3242381Smikechr 3252381Smikechr /* 3260Sstevel@tonic-gate * CE initial classification and subsequent action lookup table 3270Sstevel@tonic-gate */ 3280Sstevel@tonic-gate static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE]; 3290Sstevel@tonic-gate static int ce_disp_inited; 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate /* 3320Sstevel@tonic-gate * Set to disable leaky and partner check for memory correctables 3330Sstevel@tonic-gate */ 3340Sstevel@tonic-gate int ce_xdiag_off; 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate /* 3370Sstevel@tonic-gate * The following are not incremented atomically so are indicative only 3380Sstevel@tonic-gate */ 3390Sstevel@tonic-gate static int ce_xdiag_drops; 3400Sstevel@tonic-gate static int ce_xdiag_lkydrops; 3410Sstevel@tonic-gate static int ce_xdiag_ptnrdrops; 3420Sstevel@tonic-gate static int ce_xdiag_bad; 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate /* 3450Sstevel@tonic-gate * CE leaky check callback structure 3460Sstevel@tonic-gate */ 3470Sstevel@tonic-gate typedef struct { 3480Sstevel@tonic-gate struct async_flt *lkycb_aflt; 3490Sstevel@tonic-gate errorq_t *lkycb_eqp; 3500Sstevel@tonic-gate errorq_elem_t *lkycb_eqep; 3510Sstevel@tonic-gate } ce_lkychk_cb_t; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate /* 3540Sstevel@tonic-gate * defines for various ecache_flush_flag's 3550Sstevel@tonic-gate */ 3560Sstevel@tonic-gate #define ECACHE_FLUSH_LINE 1 3570Sstevel@tonic-gate #define ECACHE_FLUSH_ALL 2 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate /* 3600Sstevel@tonic-gate * STICK sync 3610Sstevel@tonic-gate */ 3620Sstevel@tonic-gate #define STICK_ITERATION 10 3630Sstevel@tonic-gate #define MAX_TSKEW 1 3640Sstevel@tonic-gate #define EV_A_START 0 3650Sstevel@tonic-gate #define EV_A_END 1 3660Sstevel@tonic-gate #define EV_B_START 2 3670Sstevel@tonic-gate #define EV_B_END 3 3680Sstevel@tonic-gate #define EVENTS 4 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate static int64_t stick_iter = STICK_ITERATION; 3710Sstevel@tonic-gate static int64_t stick_tsk = MAX_TSKEW; 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate typedef enum { 3740Sstevel@tonic-gate EVENT_NULL = 0, 3750Sstevel@tonic-gate SLAVE_START, 3760Sstevel@tonic-gate SLAVE_CONT, 3770Sstevel@tonic-gate MASTER_START 3780Sstevel@tonic-gate } event_cmd_t; 3790Sstevel@tonic-gate 3800Sstevel@tonic-gate static volatile event_cmd_t stick_sync_cmd = EVENT_NULL; 3810Sstevel@tonic-gate static int64_t timestamp[EVENTS]; 3820Sstevel@tonic-gate static volatile int slave_done; 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate #ifdef DEBUG 3850Sstevel@tonic-gate #define DSYNC_ATTEMPTS 64 3860Sstevel@tonic-gate typedef struct { 3870Sstevel@tonic-gate int64_t skew_val[DSYNC_ATTEMPTS]; 3880Sstevel@tonic-gate } ss_t; 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate ss_t stick_sync_stats[NCPU]; 3910Sstevel@tonic-gate #endif /* DEBUG */ 3920Sstevel@tonic-gate 3932241Shuah uint_t cpu_impl_dual_pgsz = 0; 3940Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE) 3950Sstevel@tonic-gate uint_t disable_dual_pgsz = 0; 3960Sstevel@tonic-gate #endif /* CPU_IMP_DUAL_PAGESIZE */ 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate /* 3990Sstevel@tonic-gate * Save the cache bootup state for use when internal 4000Sstevel@tonic-gate * caches are to be re-enabled after an error occurs. 4010Sstevel@tonic-gate */ 4020Sstevel@tonic-gate uint64_t cache_boot_state; 4030Sstevel@tonic-gate 4040Sstevel@tonic-gate /* 4050Sstevel@tonic-gate * PA[22:0] represent Displacement in Safari configuration space. 4060Sstevel@tonic-gate */ 4070Sstevel@tonic-gate uint_t root_phys_addr_lo_mask = 0x7fffffu; 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate bus_config_eclk_t bus_config_eclk[] = { 4100Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 4110Sstevel@tonic-gate {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1}, 4120Sstevel@tonic-gate {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2}, 4130Sstevel@tonic-gate {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32}, 4140Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 4150Sstevel@tonic-gate {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1}, 4160Sstevel@tonic-gate {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2}, 4170Sstevel@tonic-gate {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32}, 4180Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 4190Sstevel@tonic-gate {0, 0} 4200Sstevel@tonic-gate }; 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate /* 4230Sstevel@tonic-gate * Interval for deferred CEEN reenable 4240Sstevel@tonic-gate */ 4250Sstevel@tonic-gate int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS; 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate /* 4280Sstevel@tonic-gate * set in /etc/system to control logging of user BERR/TO's 4290Sstevel@tonic-gate */ 4300Sstevel@tonic-gate int cpu_berr_to_verbose = 0; 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate /* 4330Sstevel@tonic-gate * set to 0 in /etc/system to defer CEEN reenable for all CEs 4340Sstevel@tonic-gate */ 4350Sstevel@tonic-gate uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED; 4360Sstevel@tonic-gate uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT; 4370Sstevel@tonic-gate 4380Sstevel@tonic-gate /* 4390Sstevel@tonic-gate * Set of all offline cpus 4400Sstevel@tonic-gate */ 4410Sstevel@tonic-gate cpuset_t cpu_offline_set; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate static void cpu_delayed_check_ce_errors(void *); 4440Sstevel@tonic-gate static void cpu_check_ce_errors(void *); 4450Sstevel@tonic-gate void cpu_error_ecache_flush(ch_async_flt_t *); 4460Sstevel@tonic-gate static int cpu_error_ecache_flush_required(ch_async_flt_t *); 4470Sstevel@tonic-gate static void cpu_log_and_clear_ce(ch_async_flt_t *); 4480Sstevel@tonic-gate void cpu_ce_detected(ch_cpu_errors_t *, int); 4490Sstevel@tonic-gate 4500Sstevel@tonic-gate /* 4510Sstevel@tonic-gate * CE Leaky check timeout in microseconds. This is chosen to be twice the 4520Sstevel@tonic-gate * memory refresh interval of current DIMMs (64ms). After initial fix that 4530Sstevel@tonic-gate * gives at least one full refresh cycle in which the cell can leak 4540Sstevel@tonic-gate * (whereafter further refreshes simply reinforce any incorrect bit value). 4550Sstevel@tonic-gate */ 4560Sstevel@tonic-gate clock_t cpu_ce_lkychk_timeout_usec = 128000; 4570Sstevel@tonic-gate 4580Sstevel@tonic-gate /* 4590Sstevel@tonic-gate * CE partner check partner caching period in seconds 4600Sstevel@tonic-gate */ 4610Sstevel@tonic-gate int cpu_ce_ptnr_cachetime_sec = 60; 4620Sstevel@tonic-gate 4630Sstevel@tonic-gate /* 4640Sstevel@tonic-gate * Sets trap table entry ttentry by overwriting eight instructions from ttlabel 4650Sstevel@tonic-gate */ 4660Sstevel@tonic-gate #define CH_SET_TRAP(ttentry, ttlabel) \ 4670Sstevel@tonic-gate bcopy((const void *)&ttlabel, &ttentry, 32); \ 4680Sstevel@tonic-gate flush_instr_mem((caddr_t)&ttentry, 32); 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate static int min_ecache_size; 4710Sstevel@tonic-gate static uint_t priv_hcl_1; 4720Sstevel@tonic-gate static uint_t priv_hcl_2; 4730Sstevel@tonic-gate static uint_t priv_hcl_4; 4740Sstevel@tonic-gate static uint_t priv_hcl_8; 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate void 4770Sstevel@tonic-gate cpu_setup(void) 4780Sstevel@tonic-gate { 4790Sstevel@tonic-gate extern int at_flags; 4800Sstevel@tonic-gate extern int cpc_has_overflow_intr; 4810Sstevel@tonic-gate 4820Sstevel@tonic-gate /* 4830Sstevel@tonic-gate * Setup chip-specific trap handlers. 4840Sstevel@tonic-gate */ 4850Sstevel@tonic-gate cpu_init_trap(); 4860Sstevel@tonic-gate 4870Sstevel@tonic-gate cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 4880Sstevel@tonic-gate 4890Sstevel@tonic-gate at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 4900Sstevel@tonic-gate 4910Sstevel@tonic-gate /* 4920Sstevel@tonic-gate * save the cache bootup state. 4930Sstevel@tonic-gate */ 4940Sstevel@tonic-gate cache_boot_state = get_dcu() & DCU_CACHE; 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate /* 4970Sstevel@tonic-gate * Due to the number of entries in the fully-associative tlb 4980Sstevel@tonic-gate * this may have to be tuned lower than in spitfire. 4990Sstevel@tonic-gate */ 5000Sstevel@tonic-gate pp_slots = MIN(8, MAXPP_SLOTS); 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate /* 5030Sstevel@tonic-gate * Block stores do not invalidate all pages of the d$, pagecopy 5040Sstevel@tonic-gate * et. al. need virtual translations with virtual coloring taken 5050Sstevel@tonic-gate * into consideration. prefetch/ldd will pollute the d$ on the 5060Sstevel@tonic-gate * load side. 5070Sstevel@tonic-gate */ 5080Sstevel@tonic-gate pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate if (use_page_coloring) { 5110Sstevel@tonic-gate do_pg_coloring = 1; 5120Sstevel@tonic-gate } 5130Sstevel@tonic-gate 5140Sstevel@tonic-gate isa_list = 5150Sstevel@tonic-gate "sparcv9+vis2 sparcv9+vis sparcv9 " 5160Sstevel@tonic-gate "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 5170Sstevel@tonic-gate "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 5180Sstevel@tonic-gate 5190Sstevel@tonic-gate /* 5200Sstevel@tonic-gate * On Panther-based machines, this should 5210Sstevel@tonic-gate * also include AV_SPARC_POPC too 5220Sstevel@tonic-gate */ 5230Sstevel@tonic-gate cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate /* 5260Sstevel@tonic-gate * On cheetah, there's no hole in the virtual address space 5270Sstevel@tonic-gate */ 5280Sstevel@tonic-gate hole_start = hole_end = 0; 5290Sstevel@tonic-gate 5300Sstevel@tonic-gate /* 5310Sstevel@tonic-gate * The kpm mapping window. 5320Sstevel@tonic-gate * kpm_size: 5330Sstevel@tonic-gate * The size of a single kpm range. 5340Sstevel@tonic-gate * The overall size will be: kpm_size * vac_colors. 5350Sstevel@tonic-gate * kpm_vbase: 5360Sstevel@tonic-gate * The virtual start address of the kpm range within the kernel 5370Sstevel@tonic-gate * virtual address space. kpm_vbase has to be kpm_size aligned. 5380Sstevel@tonic-gate */ 5390Sstevel@tonic-gate kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */ 5400Sstevel@tonic-gate kpm_size_shift = 43; 5410Sstevel@tonic-gate kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 5420Sstevel@tonic-gate kpm_smallpages = 1; 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate /* 5450Sstevel@tonic-gate * The traptrace code uses either %tick or %stick for 5460Sstevel@tonic-gate * timestamping. We have %stick so we can use it. 5470Sstevel@tonic-gate */ 5480Sstevel@tonic-gate traptrace_use_stick = 1; 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate /* 5510Sstevel@tonic-gate * Cheetah has a performance counter overflow interrupt 5520Sstevel@tonic-gate */ 5530Sstevel@tonic-gate cpc_has_overflow_intr = 1; 5540Sstevel@tonic-gate 5550Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE) 5560Sstevel@tonic-gate /* 5570Sstevel@tonic-gate * Use Cheetah+ and later dual page size support. 5580Sstevel@tonic-gate */ 5590Sstevel@tonic-gate if (!disable_dual_pgsz) { 5602241Shuah cpu_impl_dual_pgsz = 1; 5610Sstevel@tonic-gate } 5620Sstevel@tonic-gate #endif /* CPU_IMP_DUAL_PAGESIZE */ 5630Sstevel@tonic-gate 5640Sstevel@tonic-gate /* 5650Sstevel@tonic-gate * Declare that this architecture/cpu combination does fpRAS. 5660Sstevel@tonic-gate */ 5670Sstevel@tonic-gate fpras_implemented = 1; 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate /* 5700Sstevel@tonic-gate * Setup CE lookup table 5710Sstevel@tonic-gate */ 5720Sstevel@tonic-gate CE_INITDISPTBL_POPULATE(ce_disp_table); 5730Sstevel@tonic-gate ce_disp_inited = 1; 5740Sstevel@tonic-gate } 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate /* 5770Sstevel@tonic-gate * Called by setcpudelay 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate void 5800Sstevel@tonic-gate cpu_init_tick_freq(void) 5810Sstevel@tonic-gate { 5820Sstevel@tonic-gate /* 5830Sstevel@tonic-gate * For UltraSPARC III and beyond we want to use the 5840Sstevel@tonic-gate * system clock rate as the basis for low level timing, 5850Sstevel@tonic-gate * due to support of mixed speed CPUs and power managment. 5860Sstevel@tonic-gate */ 5870Sstevel@tonic-gate if (system_clock_freq == 0) 5880Sstevel@tonic-gate cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 5890Sstevel@tonic-gate 5900Sstevel@tonic-gate sys_tick_freq = system_clock_freq; 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate 5930Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25 5940Sstevel@tonic-gate /* 5950Sstevel@tonic-gate * Tunables 5960Sstevel@tonic-gate */ 5970Sstevel@tonic-gate int cheetah_bpe_off = 0; 5980Sstevel@tonic-gate int cheetah_sendmondo_recover = 1; 5990Sstevel@tonic-gate int cheetah_sendmondo_fullscan = 0; 6000Sstevel@tonic-gate int cheetah_sendmondo_recover_delay = 5; 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate #define CHEETAH_LIVELOCK_MIN_DELAY 1 6030Sstevel@tonic-gate 6040Sstevel@tonic-gate /* 6050Sstevel@tonic-gate * Recovery Statistics 6060Sstevel@tonic-gate */ 6070Sstevel@tonic-gate typedef struct cheetah_livelock_entry { 6080Sstevel@tonic-gate int cpuid; /* fallen cpu */ 6090Sstevel@tonic-gate int buddy; /* cpu that ran recovery */ 6100Sstevel@tonic-gate clock_t lbolt; /* when recovery started */ 6110Sstevel@tonic-gate hrtime_t recovery_time; /* time spent in recovery */ 6120Sstevel@tonic-gate } cheetah_livelock_entry_t; 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate #define CHEETAH_LIVELOCK_NENTRY 32 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY]; 6170Sstevel@tonic-gate int cheetah_livelock_entry_nxt; 6180Sstevel@tonic-gate 6190Sstevel@tonic-gate #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \ 6200Sstevel@tonic-gate statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \ 6210Sstevel@tonic-gate if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \ 6220Sstevel@tonic-gate cheetah_livelock_entry_nxt = 0; \ 6230Sstevel@tonic-gate } \ 6240Sstevel@tonic-gate } 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate struct { 6290Sstevel@tonic-gate hrtime_t hrt; /* maximum recovery time */ 6300Sstevel@tonic-gate int recovery; /* recovered */ 6310Sstevel@tonic-gate int full_claimed; /* maximum pages claimed in full recovery */ 6320Sstevel@tonic-gate int proc_entry; /* attempted to claim TSB */ 6330Sstevel@tonic-gate int proc_tsb_scan; /* tsb scanned */ 6340Sstevel@tonic-gate int proc_tsb_partscan; /* tsb partially scanned */ 6350Sstevel@tonic-gate int proc_tsb_fullscan; /* whole tsb scanned */ 6360Sstevel@tonic-gate int proc_claimed; /* maximum pages claimed in tsb scan */ 6370Sstevel@tonic-gate int proc_user; /* user thread */ 6380Sstevel@tonic-gate int proc_kernel; /* kernel thread */ 6390Sstevel@tonic-gate int proc_onflt; /* bad stack */ 6400Sstevel@tonic-gate int proc_cpu; /* null cpu */ 6410Sstevel@tonic-gate int proc_thread; /* null thread */ 6420Sstevel@tonic-gate int proc_proc; /* null proc */ 6430Sstevel@tonic-gate int proc_as; /* null as */ 6440Sstevel@tonic-gate int proc_hat; /* null hat */ 6450Sstevel@tonic-gate int proc_hat_inval; /* hat contents don't make sense */ 6460Sstevel@tonic-gate int proc_hat_busy; /* hat is changing TSBs */ 6470Sstevel@tonic-gate int proc_tsb_reloc; /* TSB skipped because being relocated */ 6480Sstevel@tonic-gate int proc_cnum_bad; /* cnum out of range */ 6490Sstevel@tonic-gate int proc_cnum; /* last cnum processed */ 6500Sstevel@tonic-gate tte_t proc_tte; /* last tte processed */ 6510Sstevel@tonic-gate } cheetah_livelock_stat; 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++ 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate #define CHEETAH_LIVELOCK_STATSET(item, value) \ 6560Sstevel@tonic-gate cheetah_livelock_stat.item = value 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \ 6590Sstevel@tonic-gate if (value > cheetah_livelock_stat.item) \ 6600Sstevel@tonic-gate cheetah_livelock_stat.item = value; \ 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate /* 6640Sstevel@tonic-gate * Attempt to recover a cpu by claiming every cache line as saved 6650Sstevel@tonic-gate * in the TSB that the non-responsive cpu is using. Since we can't 6660Sstevel@tonic-gate * grab any adaptive lock, this is at best an attempt to do so. Because 6670Sstevel@tonic-gate * we don't grab any locks, we must operate under the protection of 6680Sstevel@tonic-gate * on_fault(). 6690Sstevel@tonic-gate * 6700Sstevel@tonic-gate * Return 1 if cpuid could be recovered, 0 if failed. 6710Sstevel@tonic-gate */ 6720Sstevel@tonic-gate int 6730Sstevel@tonic-gate mondo_recover_proc(uint16_t cpuid, int bn) 6740Sstevel@tonic-gate { 6750Sstevel@tonic-gate label_t ljb; 6760Sstevel@tonic-gate cpu_t *cp; 6770Sstevel@tonic-gate kthread_t *t; 6780Sstevel@tonic-gate proc_t *p; 6790Sstevel@tonic-gate struct as *as; 6800Sstevel@tonic-gate struct hat *hat; 6812241Shuah uint_t cnum; 6820Sstevel@tonic-gate struct tsb_info *tsbinfop; 6830Sstevel@tonic-gate struct tsbe *tsbep; 6840Sstevel@tonic-gate caddr_t tsbp; 6850Sstevel@tonic-gate caddr_t end_tsbp; 6860Sstevel@tonic-gate uint64_t paddr; 6870Sstevel@tonic-gate uint64_t idsr; 6880Sstevel@tonic-gate u_longlong_t pahi, palo; 6890Sstevel@tonic-gate int pages_claimed = 0; 6900Sstevel@tonic-gate tte_t tsbe_tte; 6910Sstevel@tonic-gate int tried_kernel_tsb = 0; 6922241Shuah mmu_ctx_t *mmu_ctxp; 6930Sstevel@tonic-gate 6940Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_entry); 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate if (on_fault(&ljb)) { 6970Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_onflt); 6980Sstevel@tonic-gate goto badstruct; 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate if ((cp = cpu[cpuid]) == NULL) { 7020Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_cpu); 7030Sstevel@tonic-gate goto badstruct; 7040Sstevel@tonic-gate } 7050Sstevel@tonic-gate 7060Sstevel@tonic-gate if ((t = cp->cpu_thread) == NULL) { 7070Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_thread); 7080Sstevel@tonic-gate goto badstruct; 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate 7110Sstevel@tonic-gate if ((p = ttoproc(t)) == NULL) { 7120Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_proc); 7130Sstevel@tonic-gate goto badstruct; 7140Sstevel@tonic-gate } 7150Sstevel@tonic-gate 7160Sstevel@tonic-gate if ((as = p->p_as) == NULL) { 7170Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_as); 7180Sstevel@tonic-gate goto badstruct; 7190Sstevel@tonic-gate } 7200Sstevel@tonic-gate 7210Sstevel@tonic-gate if ((hat = as->a_hat) == NULL) { 7220Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat); 7230Sstevel@tonic-gate goto badstruct; 7240Sstevel@tonic-gate } 7250Sstevel@tonic-gate 7260Sstevel@tonic-gate if (hat != ksfmmup) { 7270Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_user); 7280Sstevel@tonic-gate if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) { 7290Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat_busy); 7300Sstevel@tonic-gate goto badstruct; 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate tsbinfop = hat->sfmmu_tsb; 7330Sstevel@tonic-gate if (tsbinfop == NULL) { 7340Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat_inval); 7350Sstevel@tonic-gate goto badstruct; 7360Sstevel@tonic-gate } 7370Sstevel@tonic-gate tsbp = tsbinfop->tsb_va; 7380Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 7390Sstevel@tonic-gate } else { 7400Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_kernel); 7410Sstevel@tonic-gate tsbinfop = NULL; 7420Sstevel@tonic-gate tsbp = ktsb_base; 7430Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 7440Sstevel@tonic-gate } 7450Sstevel@tonic-gate 7460Sstevel@tonic-gate /* Verify as */ 7470Sstevel@tonic-gate if (hat->sfmmu_as != as) { 7480Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_hat_inval); 7490Sstevel@tonic-gate goto badstruct; 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate 7522241Shuah mmu_ctxp = CPU_MMU_CTXP(cp); 7532241Shuah ASSERT(mmu_ctxp); 7542241Shuah cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum; 7550Sstevel@tonic-gate CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum); 7560Sstevel@tonic-gate 7572241Shuah if ((cnum < 0) || (cnum == INVALID_CONTEXT) || 7582241Shuah (cnum >= mmu_ctxp->mmu_nctxs)) { 7590Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_cnum_bad); 7600Sstevel@tonic-gate goto badstruct; 7610Sstevel@tonic-gate } 7620Sstevel@tonic-gate 7630Sstevel@tonic-gate do { 7640Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_scan); 7650Sstevel@tonic-gate 7660Sstevel@tonic-gate /* 7670Sstevel@tonic-gate * Skip TSBs being relocated. This is important because 7680Sstevel@tonic-gate * we want to avoid the following deadlock scenario: 7690Sstevel@tonic-gate * 7700Sstevel@tonic-gate * 1) when we came in we set ourselves to "in recover" state. 7710Sstevel@tonic-gate * 2) when we try to touch TSB being relocated the mapping 7720Sstevel@tonic-gate * will be in the suspended state so we'll spin waiting 7730Sstevel@tonic-gate * for it to be unlocked. 7740Sstevel@tonic-gate * 3) when the CPU that holds the TSB mapping locked tries to 7750Sstevel@tonic-gate * unlock it it will send a xtrap which will fail to xcall 7760Sstevel@tonic-gate * us or the CPU we're trying to recover, and will in turn 7770Sstevel@tonic-gate * enter the mondo code. 7780Sstevel@tonic-gate * 4) since we are still spinning on the locked mapping 7790Sstevel@tonic-gate * no further progress will be made and the system will 7800Sstevel@tonic-gate * inevitably hard hang. 7810Sstevel@tonic-gate * 7820Sstevel@tonic-gate * A TSB not being relocated can't begin being relocated 7830Sstevel@tonic-gate * while we're accessing it because we check 7840Sstevel@tonic-gate * sendmondo_in_recover before relocating TSBs. 7850Sstevel@tonic-gate */ 7860Sstevel@tonic-gate if (hat != ksfmmup && 7870Sstevel@tonic-gate (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) { 7880Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_reloc); 7890Sstevel@tonic-gate goto next_tsbinfo; 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate for (tsbep = (struct tsbe *)tsbp; 7930Sstevel@tonic-gate tsbep < (struct tsbe *)end_tsbp; tsbep++) { 7940Sstevel@tonic-gate tsbe_tte = tsbep->tte_data; 7950Sstevel@tonic-gate 7960Sstevel@tonic-gate if (tsbe_tte.tte_val == 0) { 7970Sstevel@tonic-gate /* 7980Sstevel@tonic-gate * Invalid tte 7990Sstevel@tonic-gate */ 8000Sstevel@tonic-gate continue; 8010Sstevel@tonic-gate } 8020Sstevel@tonic-gate if (tsbe_tte.tte_se) { 8030Sstevel@tonic-gate /* 8040Sstevel@tonic-gate * Don't want device registers 8050Sstevel@tonic-gate */ 8060Sstevel@tonic-gate continue; 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate if (tsbe_tte.tte_cp == 0) { 8090Sstevel@tonic-gate /* 8100Sstevel@tonic-gate * Must be cached in E$ 8110Sstevel@tonic-gate */ 8120Sstevel@tonic-gate continue; 8130Sstevel@tonic-gate } 8144485Sjesusm if (tsbep->tte_tag.tag_invalid != 0) { 8154485Sjesusm /* 8164485Sjesusm * Invalid tag, ingnore this entry. 8174485Sjesusm */ 8184485Sjesusm continue; 8194485Sjesusm } 8200Sstevel@tonic-gate CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte); 8210Sstevel@tonic-gate idsr = getidsr(); 8220Sstevel@tonic-gate if ((idsr & (IDSR_NACK_BIT(bn) | 8230Sstevel@tonic-gate IDSR_BUSY_BIT(bn))) == 0) { 8240Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_partscan); 8250Sstevel@tonic-gate goto done; 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate pahi = tsbe_tte.tte_pahi; 8280Sstevel@tonic-gate palo = tsbe_tte.tte_palo; 8290Sstevel@tonic-gate paddr = (uint64_t)((pahi << 32) | 8300Sstevel@tonic-gate (palo << MMU_PAGESHIFT)); 8310Sstevel@tonic-gate claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)), 8320Sstevel@tonic-gate CH_ECACHE_SUBBLK_SIZE); 8330Sstevel@tonic-gate if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 8340Sstevel@tonic-gate shipit(cpuid, bn); 8350Sstevel@tonic-gate } 8360Sstevel@tonic-gate pages_claimed++; 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate next_tsbinfo: 8390Sstevel@tonic-gate if (tsbinfop != NULL) 8400Sstevel@tonic-gate tsbinfop = tsbinfop->tsb_next; 8410Sstevel@tonic-gate if (tsbinfop != NULL) { 8420Sstevel@tonic-gate tsbp = tsbinfop->tsb_va; 8430Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 8440Sstevel@tonic-gate } else if (tsbp == ktsb_base) { 8450Sstevel@tonic-gate tried_kernel_tsb = 1; 8460Sstevel@tonic-gate } else if (!tried_kernel_tsb) { 8470Sstevel@tonic-gate tsbp = ktsb_base; 8480Sstevel@tonic-gate end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 8490Sstevel@tonic-gate hat = ksfmmup; 8500Sstevel@tonic-gate tsbinfop = NULL; 8510Sstevel@tonic-gate } 8520Sstevel@tonic-gate } while (tsbinfop != NULL || 8534485Sjesusm ((tsbp == ktsb_base) && !tried_kernel_tsb)); 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan); 8560Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 8570Sstevel@tonic-gate no_fault(); 8580Sstevel@tonic-gate idsr = getidsr(); 8590Sstevel@tonic-gate if ((idsr & (IDSR_NACK_BIT(bn) | 8600Sstevel@tonic-gate IDSR_BUSY_BIT(bn))) == 0) { 8610Sstevel@tonic-gate return (1); 8620Sstevel@tonic-gate } else { 8630Sstevel@tonic-gate return (0); 8640Sstevel@tonic-gate } 8650Sstevel@tonic-gate 8660Sstevel@tonic-gate done: 8670Sstevel@tonic-gate no_fault(); 8680Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 8690Sstevel@tonic-gate return (1); 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate badstruct: 8720Sstevel@tonic-gate no_fault(); 8730Sstevel@tonic-gate return (0); 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate /* 8770Sstevel@tonic-gate * Attempt to claim ownership, temporarily, of every cache line that a 8780Sstevel@tonic-gate * non-responsive cpu might be using. This might kick that cpu out of 8790Sstevel@tonic-gate * this state. 8800Sstevel@tonic-gate * 8810Sstevel@tonic-gate * The return value indicates to the caller if we have exhausted all recovery 8820Sstevel@tonic-gate * techniques. If 1 is returned, it is useless to call this function again 8830Sstevel@tonic-gate * even for a different target CPU. 8840Sstevel@tonic-gate */ 8850Sstevel@tonic-gate int 8860Sstevel@tonic-gate mondo_recover(uint16_t cpuid, int bn) 8870Sstevel@tonic-gate { 8880Sstevel@tonic-gate struct memseg *seg; 8890Sstevel@tonic-gate uint64_t begin_pa, end_pa, cur_pa; 8900Sstevel@tonic-gate hrtime_t begin_hrt, end_hrt; 8910Sstevel@tonic-gate int retval = 0; 8920Sstevel@tonic-gate int pages_claimed = 0; 8930Sstevel@tonic-gate cheetah_livelock_entry_t *histp; 8940Sstevel@tonic-gate uint64_t idsr; 8950Sstevel@tonic-gate 8960Sstevel@tonic-gate if (cas32(&sendmondo_in_recover, 0, 1) != 0) { 8970Sstevel@tonic-gate /* 8980Sstevel@tonic-gate * Wait while recovery takes place 8990Sstevel@tonic-gate */ 9000Sstevel@tonic-gate while (sendmondo_in_recover) { 9010Sstevel@tonic-gate drv_usecwait(1); 9020Sstevel@tonic-gate } 9030Sstevel@tonic-gate /* 9040Sstevel@tonic-gate * Assume we didn't claim the whole memory. If 9050Sstevel@tonic-gate * the target of this caller is not recovered, 9060Sstevel@tonic-gate * it will come back. 9070Sstevel@tonic-gate */ 9080Sstevel@tonic-gate return (retval); 9090Sstevel@tonic-gate } 9100Sstevel@tonic-gate 9110Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_NEXT(histp) 9120Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt); 9130Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid); 9140Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id); 9150Sstevel@tonic-gate 9160Sstevel@tonic-gate begin_hrt = gethrtime_waitfree(); 9170Sstevel@tonic-gate /* 9180Sstevel@tonic-gate * First try to claim the lines in the TSB the target 9190Sstevel@tonic-gate * may have been using. 9200Sstevel@tonic-gate */ 9210Sstevel@tonic-gate if (mondo_recover_proc(cpuid, bn) == 1) { 9220Sstevel@tonic-gate /* 9230Sstevel@tonic-gate * Didn't claim the whole memory 9240Sstevel@tonic-gate */ 9250Sstevel@tonic-gate goto done; 9260Sstevel@tonic-gate } 9270Sstevel@tonic-gate 9280Sstevel@tonic-gate /* 9290Sstevel@tonic-gate * We tried using the TSB. The target is still 9300Sstevel@tonic-gate * not recovered. Check if complete memory scan is 9310Sstevel@tonic-gate * enabled. 9320Sstevel@tonic-gate */ 9330Sstevel@tonic-gate if (cheetah_sendmondo_fullscan == 0) { 9340Sstevel@tonic-gate /* 9350Sstevel@tonic-gate * Full memory scan is disabled. 9360Sstevel@tonic-gate */ 9370Sstevel@tonic-gate retval = 1; 9380Sstevel@tonic-gate goto done; 9390Sstevel@tonic-gate } 9400Sstevel@tonic-gate 9410Sstevel@tonic-gate /* 9420Sstevel@tonic-gate * Try claiming the whole memory. 9430Sstevel@tonic-gate */ 9440Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 9450Sstevel@tonic-gate begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT; 9460Sstevel@tonic-gate end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT; 9470Sstevel@tonic-gate for (cur_pa = begin_pa; cur_pa < end_pa; 9480Sstevel@tonic-gate cur_pa += MMU_PAGESIZE) { 9490Sstevel@tonic-gate idsr = getidsr(); 9500Sstevel@tonic-gate if ((idsr & (IDSR_NACK_BIT(bn) | 9510Sstevel@tonic-gate IDSR_BUSY_BIT(bn))) == 0) { 9520Sstevel@tonic-gate /* 9530Sstevel@tonic-gate * Didn't claim all memory 9540Sstevel@tonic-gate */ 9550Sstevel@tonic-gate goto done; 9560Sstevel@tonic-gate } 9570Sstevel@tonic-gate claimlines(cur_pa, MMU_PAGESIZE, 9580Sstevel@tonic-gate CH_ECACHE_SUBBLK_SIZE); 9590Sstevel@tonic-gate if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 9600Sstevel@tonic-gate shipit(cpuid, bn); 9610Sstevel@tonic-gate } 9620Sstevel@tonic-gate pages_claimed++; 9630Sstevel@tonic-gate } 9640Sstevel@tonic-gate } 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate /* 9670Sstevel@tonic-gate * We did all we could. 9680Sstevel@tonic-gate */ 9690Sstevel@tonic-gate retval = 1; 9700Sstevel@tonic-gate 9710Sstevel@tonic-gate done: 9720Sstevel@tonic-gate /* 9730Sstevel@tonic-gate * Update statistics 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate end_hrt = gethrtime_waitfree(); 9760Sstevel@tonic-gate CHEETAH_LIVELOCK_STAT(recovery); 9770Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt)); 9780Sstevel@tonic-gate CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed); 9790Sstevel@tonic-gate CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \ 9800Sstevel@tonic-gate (end_hrt - begin_hrt)); 9810Sstevel@tonic-gate 9824485Sjesusm while (cas32(&sendmondo_in_recover, 1, 0) != 1) 9834485Sjesusm ; 9840Sstevel@tonic-gate 9850Sstevel@tonic-gate return (retval); 9860Sstevel@tonic-gate } 9870Sstevel@tonic-gate 9880Sstevel@tonic-gate /* 9890Sstevel@tonic-gate * This is called by the cyclic framework when this CPU becomes online 9900Sstevel@tonic-gate */ 9910Sstevel@tonic-gate /*ARGSUSED*/ 9920Sstevel@tonic-gate static void 9930Sstevel@tonic-gate cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 9940Sstevel@tonic-gate { 9950Sstevel@tonic-gate 9960Sstevel@tonic-gate hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy; 9970Sstevel@tonic-gate hdlr->cyh_level = CY_LOW_LEVEL; 9980Sstevel@tonic-gate hdlr->cyh_arg = NULL; 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate /* 10010Sstevel@tonic-gate * Stagger the start time 10020Sstevel@tonic-gate */ 10030Sstevel@tonic-gate when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU); 10040Sstevel@tonic-gate if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) { 10050Sstevel@tonic-gate cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY; 10060Sstevel@tonic-gate } 10070Sstevel@tonic-gate when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC; 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 10100Sstevel@tonic-gate /* 10110Sstevel@tonic-gate * Create a low level cyclic to send a xtrap to the next cpu online. 10120Sstevel@tonic-gate * However, there's no need to have this running on a uniprocessor system. 10130Sstevel@tonic-gate */ 10140Sstevel@tonic-gate static void 10150Sstevel@tonic-gate cheetah_nudge_init(void) 10160Sstevel@tonic-gate { 10170Sstevel@tonic-gate cyc_omni_handler_t hdlr; 10180Sstevel@tonic-gate 10190Sstevel@tonic-gate if (max_ncpus == 1) { 10200Sstevel@tonic-gate return; 10210Sstevel@tonic-gate } 10220Sstevel@tonic-gate 10230Sstevel@tonic-gate hdlr.cyo_online = cheetah_nudge_onln; 10240Sstevel@tonic-gate hdlr.cyo_offline = NULL; 10250Sstevel@tonic-gate hdlr.cyo_arg = NULL; 10260Sstevel@tonic-gate 10270Sstevel@tonic-gate mutex_enter(&cpu_lock); 10280Sstevel@tonic-gate (void) cyclic_add_omni(&hdlr); 10290Sstevel@tonic-gate mutex_exit(&cpu_lock); 10300Sstevel@tonic-gate } 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate /* 10330Sstevel@tonic-gate * Cyclic handler to wake up buddy 10340Sstevel@tonic-gate */ 10350Sstevel@tonic-gate void 10360Sstevel@tonic-gate cheetah_nudge_buddy(void) 10370Sstevel@tonic-gate { 10380Sstevel@tonic-gate /* 10390Sstevel@tonic-gate * Disable kernel preemption to protect the cpu list 10400Sstevel@tonic-gate */ 10410Sstevel@tonic-gate kpreempt_disable(); 10420Sstevel@tonic-gate if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) { 10430Sstevel@tonic-gate xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1, 10440Sstevel@tonic-gate 0, 0); 10450Sstevel@tonic-gate } 10460Sstevel@tonic-gate kpreempt_enable(); 10470Sstevel@tonic-gate } 10480Sstevel@tonic-gate 10490Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */ 10500Sstevel@tonic-gate 10510Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 10520Sstevel@tonic-gate uint32_t x_one_stimes[64]; 10530Sstevel@tonic-gate uint32_t x_one_ltimes[16]; 10540Sstevel@tonic-gate uint32_t x_set_stimes[64]; 10550Sstevel@tonic-gate uint32_t x_set_ltimes[16]; 10560Sstevel@tonic-gate uint32_t x_set_cpus[NCPU]; 10570Sstevel@tonic-gate uint32_t x_nack_stimes[64]; 10580Sstevel@tonic-gate #endif 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate /* 10610Sstevel@tonic-gate * Note: A version of this function is used by the debugger via the KDI, 10620Sstevel@tonic-gate * and must be kept in sync with this version. Any changes made to this 10630Sstevel@tonic-gate * function to support new chips or to accomodate errata must also be included 10640Sstevel@tonic-gate * in the KDI-specific version. See us3_kdi.c. 10650Sstevel@tonic-gate */ 10660Sstevel@tonic-gate void 10670Sstevel@tonic-gate send_one_mondo(int cpuid) 10680Sstevel@tonic-gate { 10690Sstevel@tonic-gate int busy, nack; 10700Sstevel@tonic-gate uint64_t idsr, starttick, endtick, tick, lasttick; 10710Sstevel@tonic-gate uint64_t busymask; 10720Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25 10730Sstevel@tonic-gate int recovered = 0; 10740Sstevel@tonic-gate #endif 10750Sstevel@tonic-gate 10760Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 10770Sstevel@tonic-gate starttick = lasttick = gettick(); 10780Sstevel@tonic-gate shipit(cpuid, 0); 10790Sstevel@tonic-gate endtick = starttick + xc_tick_limit; 10800Sstevel@tonic-gate busy = nack = 0; 10810Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 10820Sstevel@tonic-gate /* 10830Sstevel@tonic-gate * Lower 2 bits of the agent ID determine which BUSY/NACK pair 10840Sstevel@tonic-gate * will be used for dispatching interrupt. For now, assume 10850Sstevel@tonic-gate * there are no more than IDSR_BN_SETS CPUs, hence no aliasing 10860Sstevel@tonic-gate * issues with respect to BUSY/NACK pair usage. 10870Sstevel@tonic-gate */ 10880Sstevel@tonic-gate busymask = IDSR_BUSY_BIT(cpuid); 10890Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 10900Sstevel@tonic-gate busymask = IDSR_BUSY; 10910Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 10920Sstevel@tonic-gate for (;;) { 10930Sstevel@tonic-gate idsr = getidsr(); 10940Sstevel@tonic-gate if (idsr == 0) 10950Sstevel@tonic-gate break; 10960Sstevel@tonic-gate 10970Sstevel@tonic-gate tick = gettick(); 10980Sstevel@tonic-gate /* 10990Sstevel@tonic-gate * If there is a big jump between the current tick 11000Sstevel@tonic-gate * count and lasttick, we have probably hit a break 11010Sstevel@tonic-gate * point. Adjust endtick accordingly to avoid panic. 11020Sstevel@tonic-gate */ 11030Sstevel@tonic-gate if (tick > (lasttick + xc_tick_jump_limit)) 11040Sstevel@tonic-gate endtick += (tick - lasttick); 11050Sstevel@tonic-gate lasttick = tick; 11060Sstevel@tonic-gate if (tick > endtick) { 11070Sstevel@tonic-gate if (panic_quiesce) 11080Sstevel@tonic-gate return; 11090Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25 11100Sstevel@tonic-gate if (cheetah_sendmondo_recover && recovered == 0) { 11110Sstevel@tonic-gate if (mondo_recover(cpuid, 0)) { 11120Sstevel@tonic-gate /* 11130Sstevel@tonic-gate * We claimed the whole memory or 11140Sstevel@tonic-gate * full scan is disabled. 11150Sstevel@tonic-gate */ 11160Sstevel@tonic-gate recovered++; 11170Sstevel@tonic-gate } 11180Sstevel@tonic-gate tick = gettick(); 11190Sstevel@tonic-gate endtick = tick + xc_tick_limit; 11200Sstevel@tonic-gate lasttick = tick; 11210Sstevel@tonic-gate /* 11220Sstevel@tonic-gate * Recheck idsr 11230Sstevel@tonic-gate */ 11240Sstevel@tonic-gate continue; 11250Sstevel@tonic-gate } else 11260Sstevel@tonic-gate #endif /* CHEETAHPLUS_ERRATUM_25 */ 11270Sstevel@tonic-gate { 11280Sstevel@tonic-gate cmn_err(CE_PANIC, "send mondo timeout " 11290Sstevel@tonic-gate "(target 0x%x) [%d NACK %d BUSY]", 11300Sstevel@tonic-gate cpuid, nack, busy); 11310Sstevel@tonic-gate } 11320Sstevel@tonic-gate } 11330Sstevel@tonic-gate 11340Sstevel@tonic-gate if (idsr & busymask) { 11350Sstevel@tonic-gate busy++; 11360Sstevel@tonic-gate continue; 11370Sstevel@tonic-gate } 11380Sstevel@tonic-gate drv_usecwait(1); 11390Sstevel@tonic-gate shipit(cpuid, 0); 11400Sstevel@tonic-gate nack++; 11410Sstevel@tonic-gate busy = 0; 11420Sstevel@tonic-gate } 11430Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 11440Sstevel@tonic-gate { 11450Sstevel@tonic-gate int n = gettick() - starttick; 11460Sstevel@tonic-gate if (n < 8192) 11470Sstevel@tonic-gate x_one_stimes[n >> 7]++; 11480Sstevel@tonic-gate else 11490Sstevel@tonic-gate x_one_ltimes[(n >> 13) & 0xf]++; 11500Sstevel@tonic-gate } 11510Sstevel@tonic-gate #endif 11520Sstevel@tonic-gate } 11530Sstevel@tonic-gate 11540Sstevel@tonic-gate void 11550Sstevel@tonic-gate syncfpu(void) 11560Sstevel@tonic-gate { 11570Sstevel@tonic-gate } 11580Sstevel@tonic-gate 11590Sstevel@tonic-gate /* 11600Sstevel@tonic-gate * Return processor specific async error structure 11610Sstevel@tonic-gate * size used. 11620Sstevel@tonic-gate */ 11630Sstevel@tonic-gate int 11640Sstevel@tonic-gate cpu_aflt_size(void) 11650Sstevel@tonic-gate { 11660Sstevel@tonic-gate return (sizeof (ch_async_flt_t)); 11670Sstevel@tonic-gate } 11680Sstevel@tonic-gate 11690Sstevel@tonic-gate /* 1170960Srscott * Tunable to disable the checking of other cpu logout areas during panic for 1171960Srscott * potential syndrome 71 generating errors. 1172960Srscott */ 1173960Srscott int enable_check_other_cpus_logout = 1; 1174960Srscott 1175960Srscott /* 1176960Srscott * Check other cpus logout area for potential synd 71 generating 1177960Srscott * errors. 1178960Srscott */ 1179960Srscott static void 1180960Srscott cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type, 1181960Srscott ch_cpu_logout_t *clop) 1182960Srscott { 1183960Srscott struct async_flt *aflt; 1184960Srscott ch_async_flt_t ch_flt; 1185960Srscott uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1186960Srscott 1187960Srscott if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) { 1188960Srscott return; 1189960Srscott } 1190960Srscott 1191960Srscott bzero(&ch_flt, sizeof (ch_async_flt_t)); 1192960Srscott 1193960Srscott t_afar = clop->clo_data.chd_afar; 1194960Srscott t_afsr = clop->clo_data.chd_afsr; 1195960Srscott t_afsr_ext = clop->clo_data.chd_afsr_ext; 1196960Srscott #if defined(SERRANO) 1197960Srscott ch_flt.afar2 = clop->clo_data.chd_afar2; 1198960Srscott #endif /* SERRANO */ 1199960Srscott 1200960Srscott /* 1201960Srscott * In order to simplify code, we maintain this afsr_errs 1202960Srscott * variable which holds the aggregate of AFSR and AFSR_EXT 1203960Srscott * sticky bits. 1204960Srscott */ 1205960Srscott t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1206960Srscott (t_afsr & C_AFSR_ALL_ERRS); 1207960Srscott 1208960Srscott /* Setup the async fault structure */ 1209960Srscott aflt = (struct async_flt *)&ch_flt; 1210960Srscott aflt->flt_id = gethrtime_waitfree(); 1211960Srscott ch_flt.afsr_ext = t_afsr_ext; 1212960Srscott ch_flt.afsr_errs = t_afsr_errs; 1213960Srscott aflt->flt_stat = t_afsr; 1214960Srscott aflt->flt_addr = t_afar; 1215960Srscott aflt->flt_bus_id = cpuid; 1216960Srscott aflt->flt_inst = cpuid; 1217960Srscott aflt->flt_pc = tpc; 1218960Srscott aflt->flt_prot = AFLT_PROT_NONE; 1219960Srscott aflt->flt_class = CPU_FAULT; 1220960Srscott aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0); 1221960Srscott aflt->flt_tl = tl; 1222960Srscott aflt->flt_status = ecc_type; 1223960Srscott aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1224960Srscott 1225960Srscott /* 1226960Srscott * Queue events on the async event queue, one event per error bit. 1227960Srscott * If no events are queued, queue an event to complain. 1228960Srscott */ 1229960Srscott if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) { 1230960Srscott ch_flt.flt_type = CPU_INV_AFSR; 1231960Srscott cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1232960Srscott (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1233960Srscott aflt->flt_panic); 1234960Srscott } 1235960Srscott 1236960Srscott /* 1237960Srscott * Zero out + invalidate CPU logout. 1238960Srscott */ 1239960Srscott bzero(clop, sizeof (ch_cpu_logout_t)); 1240960Srscott clop->clo_data.chd_afar = LOGOUT_INVALID; 1241960Srscott } 1242960Srscott 1243960Srscott /* 1244960Srscott * Check the logout areas of all other cpus for unlogged errors. 1245960Srscott */ 1246960Srscott static void 1247960Srscott cpu_check_other_cpus_logout(void) 1248960Srscott { 1249960Srscott int i, j; 1250960Srscott processorid_t myid; 1251960Srscott struct cpu *cp; 1252960Srscott ch_err_tl1_data_t *cl1p; 1253960Srscott 1254960Srscott myid = CPU->cpu_id; 1255960Srscott for (i = 0; i < NCPU; i++) { 1256960Srscott cp = cpu[i]; 1257960Srscott 1258960Srscott if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) || 1259960Srscott (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) { 1260960Srscott continue; 1261960Srscott } 1262960Srscott 1263960Srscott /* 1264960Srscott * Check each of the tl>0 logout areas 1265960Srscott */ 1266960Srscott cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]); 1267960Srscott for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) { 1268960Srscott if (cl1p->ch_err_tl1_flags == 0) 1269960Srscott continue; 1270960Srscott 1271960Srscott cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc, 1272960Srscott 1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout); 1273960Srscott } 1274960Srscott 1275960Srscott /* 1276960Srscott * Check each of the remaining logout areas 1277960Srscott */ 1278960Srscott cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP, 1279960Srscott CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout)); 1280960Srscott cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP, 1281960Srscott CPU_PRIVATE_PTR(cp, chpr_cecc_logout)); 1282960Srscott cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP, 1283960Srscott CPU_PRIVATE_PTR(cp, chpr_async_logout)); 1284960Srscott } 1285960Srscott } 1286960Srscott 1287960Srscott /* 12880Sstevel@tonic-gate * The fast_ecc_err handler transfers control here for UCU, UCC events. 12890Sstevel@tonic-gate * Note that we flush Ecache twice, once in the fast_ecc_err handler to 12900Sstevel@tonic-gate * flush the error that caused the UCU/UCC, then again here at the end to 12910Sstevel@tonic-gate * flush the TL=1 trap handler code out of the Ecache, so we can minimize 12920Sstevel@tonic-gate * the probability of getting a TL>1 Fast ECC trap when we're fielding 12930Sstevel@tonic-gate * another Fast ECC trap. 12940Sstevel@tonic-gate * 12950Sstevel@tonic-gate * Cheetah+ also handles: TSCE: No additional processing required. 12960Sstevel@tonic-gate * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT. 12970Sstevel@tonic-gate * 12980Sstevel@tonic-gate * Note that the p_clo_flags input is only valid in cases where the 12990Sstevel@tonic-gate * cpu_private struct is not yet initialized (since that is the only 13000Sstevel@tonic-gate * time that information cannot be obtained from the logout struct.) 13010Sstevel@tonic-gate */ 13020Sstevel@tonic-gate /*ARGSUSED*/ 13030Sstevel@tonic-gate void 13040Sstevel@tonic-gate cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags) 13050Sstevel@tonic-gate { 13060Sstevel@tonic-gate ch_cpu_logout_t *clop; 1307815Sdilpreet uint64_t ceen, nceen; 13080Sstevel@tonic-gate 13090Sstevel@tonic-gate /* 13100Sstevel@tonic-gate * Get the CPU log out info. If we can't find our CPU private 13110Sstevel@tonic-gate * pointer, then we will have to make due without any detailed 13120Sstevel@tonic-gate * logout information. 13130Sstevel@tonic-gate */ 13140Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) { 13150Sstevel@tonic-gate clop = NULL; 13160Sstevel@tonic-gate ceen = p_clo_flags & EN_REG_CEEN; 1317815Sdilpreet nceen = p_clo_flags & EN_REG_NCEEN; 13180Sstevel@tonic-gate } else { 13190Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout); 13200Sstevel@tonic-gate ceen = clop->clo_flags & EN_REG_CEEN; 1321815Sdilpreet nceen = clop->clo_flags & EN_REG_NCEEN; 13220Sstevel@tonic-gate } 13230Sstevel@tonic-gate 13240Sstevel@tonic-gate cpu_log_fast_ecc_error((caddr_t)rp->r_pc, 1325815Sdilpreet (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop); 13260Sstevel@tonic-gate } 13270Sstevel@tonic-gate 13280Sstevel@tonic-gate /* 13290Sstevel@tonic-gate * Log fast ecc error, called from either Fast ECC at TL=0 or Fast 13300Sstevel@tonic-gate * ECC at TL>0. Need to supply either a error register pointer or a 13310Sstevel@tonic-gate * cpu logout structure pointer. 13320Sstevel@tonic-gate */ 13330Sstevel@tonic-gate static void 13340Sstevel@tonic-gate cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 1335815Sdilpreet uint64_t nceen, ch_cpu_logout_t *clop) 13360Sstevel@tonic-gate { 13370Sstevel@tonic-gate struct async_flt *aflt; 13380Sstevel@tonic-gate ch_async_flt_t ch_flt; 13390Sstevel@tonic-gate uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 13400Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING]; 13410Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 13420Sstevel@tonic-gate 13430Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 13440Sstevel@tonic-gate /* 13450Sstevel@tonic-gate * If no cpu logout data, then we will have to make due without 13460Sstevel@tonic-gate * any detailed logout information. 13470Sstevel@tonic-gate */ 13480Sstevel@tonic-gate if (clop == NULL) { 13490Sstevel@tonic-gate ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 13500Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 13510Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs); 13520Sstevel@tonic-gate t_afar = cpu_error_regs.afar; 13530Sstevel@tonic-gate t_afsr = cpu_error_regs.afsr; 13540Sstevel@tonic-gate t_afsr_ext = cpu_error_regs.afsr_ext; 13550Sstevel@tonic-gate #if defined(SERRANO) 13560Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2; 13570Sstevel@tonic-gate #endif /* SERRANO */ 13580Sstevel@tonic-gate } else { 13590Sstevel@tonic-gate t_afar = clop->clo_data.chd_afar; 13600Sstevel@tonic-gate t_afsr = clop->clo_data.chd_afsr; 13610Sstevel@tonic-gate t_afsr_ext = clop->clo_data.chd_afsr_ext; 13620Sstevel@tonic-gate #if defined(SERRANO) 13630Sstevel@tonic-gate ch_flt.afar2 = clop->clo_data.chd_afar2; 13640Sstevel@tonic-gate #endif /* SERRANO */ 13650Sstevel@tonic-gate } 13660Sstevel@tonic-gate 13670Sstevel@tonic-gate /* 13680Sstevel@tonic-gate * In order to simplify code, we maintain this afsr_errs 13690Sstevel@tonic-gate * variable which holds the aggregate of AFSR and AFSR_EXT 13700Sstevel@tonic-gate * sticky bits. 13710Sstevel@tonic-gate */ 13720Sstevel@tonic-gate t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 13730Sstevel@tonic-gate (t_afsr & C_AFSR_ALL_ERRS); 13740Sstevel@tonic-gate pr_reason[0] = '\0'; 13750Sstevel@tonic-gate 13760Sstevel@tonic-gate /* Setup the async fault structure */ 13770Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt; 13780Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 13790Sstevel@tonic-gate ch_flt.afsr_ext = t_afsr_ext; 13800Sstevel@tonic-gate ch_flt.afsr_errs = t_afsr_errs; 13810Sstevel@tonic-gate aflt->flt_stat = t_afsr; 13820Sstevel@tonic-gate aflt->flt_addr = t_afar; 13830Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 13840Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 13850Sstevel@tonic-gate aflt->flt_pc = tpc; 13860Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 13870Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 13880Sstevel@tonic-gate aflt->flt_priv = priv; 13890Sstevel@tonic-gate aflt->flt_tl = tl; 13900Sstevel@tonic-gate aflt->flt_status = ECC_F_TRAP; 13910Sstevel@tonic-gate aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 13920Sstevel@tonic-gate 13930Sstevel@tonic-gate /* 13940Sstevel@tonic-gate * XXXX - Phenomenal hack to get around Solaris not getting all the 13950Sstevel@tonic-gate * cmn_err messages out to the console. The situation is a UCU (in 13960Sstevel@tonic-gate * priv mode) which causes a WDU which causes a UE (on the retry). 13970Sstevel@tonic-gate * The messages for the UCU and WDU are enqueued and then pulled off 13980Sstevel@tonic-gate * the async queue via softint and syslogd starts to process them 13990Sstevel@tonic-gate * but doesn't get them to the console. The UE causes a panic, but 14000Sstevel@tonic-gate * since the UCU/WDU messages are already in transit, those aren't 14010Sstevel@tonic-gate * on the async queue. The hack is to check if we have a matching 14020Sstevel@tonic-gate * WDU event for the UCU, and if it matches, we're more than likely 14030Sstevel@tonic-gate * going to panic with a UE, unless we're under protection. So, we 14040Sstevel@tonic-gate * check to see if we got a matching WDU event and if we're under 14050Sstevel@tonic-gate * protection. 14060Sstevel@tonic-gate * 14070Sstevel@tonic-gate * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about 14080Sstevel@tonic-gate * looks like this: 14090Sstevel@tonic-gate * UCU->WDU->UE 14100Sstevel@tonic-gate * For Panther, it could look like either of these: 14110Sstevel@tonic-gate * UCU---->WDU->L3_WDU->UE 14120Sstevel@tonic-gate * L3_UCU->WDU->L3_WDU->UE 14130Sstevel@tonic-gate */ 14140Sstevel@tonic-gate if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) && 14150Sstevel@tonic-gate aflt->flt_panic == 0 && aflt->flt_priv != 0 && 14160Sstevel@tonic-gate curthread->t_ontrap == NULL && curthread->t_lofault == NULL) { 14170Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 1418*7058Skwmc if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 1419*7058Skwmc aflt->flt_panic |= 1420*7058Skwmc ((cpu_error_regs.afsr & C_AFSR_WDU) && 1421*7058Skwmc (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) && 1422*7058Skwmc (cpu_error_regs.afar == t_afar)); 1423*7058Skwmc aflt->flt_panic |= ((clop == NULL) && 1424*7058Skwmc (t_afsr_errs & C_AFSR_WDU) && 1425*7058Skwmc (t_afsr_errs & C_AFSR_L3_WDU)); 1426*7058Skwmc } else { 1427*7058Skwmc aflt->flt_panic |= 1428*7058Skwmc ((cpu_error_regs.afsr & C_AFSR_WDU) && 1429*7058Skwmc (cpu_error_regs.afar == t_afar)); 1430*7058Skwmc aflt->flt_panic |= ((clop == NULL) && 1431*7058Skwmc (t_afsr_errs & C_AFSR_WDU)); 1432*7058Skwmc } 14330Sstevel@tonic-gate } 14340Sstevel@tonic-gate 14350Sstevel@tonic-gate /* 14360Sstevel@tonic-gate * Queue events on the async event queue, one event per error bit. 14370Sstevel@tonic-gate * If no events are queued or no Fast ECC events are on in the AFSR, 14380Sstevel@tonic-gate * queue an event to complain. 14390Sstevel@tonic-gate */ 14400Sstevel@tonic-gate if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 || 14410Sstevel@tonic-gate ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) { 14420Sstevel@tonic-gate ch_flt.flt_type = CPU_INV_AFSR; 14430Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 14440Sstevel@tonic-gate (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 14450Sstevel@tonic-gate aflt->flt_panic); 14460Sstevel@tonic-gate } 14470Sstevel@tonic-gate 14480Sstevel@tonic-gate /* 14490Sstevel@tonic-gate * Zero out + invalidate CPU logout. 14500Sstevel@tonic-gate */ 14510Sstevel@tonic-gate if (clop) { 14520Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t)); 14530Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID; 14540Sstevel@tonic-gate } 14550Sstevel@tonic-gate 14560Sstevel@tonic-gate /* 14570Sstevel@tonic-gate * We carefully re-enable NCEEN and CEEN and then check if any deferred 14580Sstevel@tonic-gate * or disrupting errors have happened. We do this because if a 14590Sstevel@tonic-gate * deferred or disrupting error had occurred with NCEEN/CEEN off, the 14600Sstevel@tonic-gate * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 14610Sstevel@tonic-gate * CEEN works differently on Cheetah than on Spitfire. Also, we enable 14620Sstevel@tonic-gate * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 14630Sstevel@tonic-gate * deferred or disrupting error happening between checking the AFSR and 14640Sstevel@tonic-gate * enabling NCEEN/CEEN. 14650Sstevel@tonic-gate * 1466815Sdilpreet * Note: CEEN and NCEEN are only reenabled if they were on when trap 1467815Sdilpreet * taken. 1468815Sdilpreet */ 1469815Sdilpreet set_error_enable(get_error_enable() | (nceen | ceen)); 14700Sstevel@tonic-gate if (clear_errors(&ch_flt)) { 14710Sstevel@tonic-gate aflt->flt_panic |= ((ch_flt.afsr_errs & 14720Sstevel@tonic-gate (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0); 14730Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 14740Sstevel@tonic-gate NULL); 14750Sstevel@tonic-gate } 14760Sstevel@tonic-gate 14770Sstevel@tonic-gate /* 14780Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will 14790Sstevel@tonic-gate * be logged as part of the panic flow. 14800Sstevel@tonic-gate */ 14810Sstevel@tonic-gate if (aflt->flt_panic) 14820Sstevel@tonic-gate fm_panic("%sError(s)", pr_reason); 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate /* 14850Sstevel@tonic-gate * Flushing the Ecache here gets the part of the trap handler that 14860Sstevel@tonic-gate * is run at TL=1 out of the Ecache. 14870Sstevel@tonic-gate */ 14880Sstevel@tonic-gate cpu_flush_ecache(); 14890Sstevel@tonic-gate } 14900Sstevel@tonic-gate 14910Sstevel@tonic-gate /* 14920Sstevel@tonic-gate * This is called via sys_trap from pil15_interrupt code if the 14930Sstevel@tonic-gate * corresponding entry in ch_err_tl1_pending is set. Checks the 14940Sstevel@tonic-gate * various ch_err_tl1_data structures for valid entries based on the bit 14950Sstevel@tonic-gate * settings in the ch_err_tl1_flags entry of the structure. 14960Sstevel@tonic-gate */ 14970Sstevel@tonic-gate /*ARGSUSED*/ 14980Sstevel@tonic-gate void 14990Sstevel@tonic-gate cpu_tl1_error(struct regs *rp, int panic) 15000Sstevel@tonic-gate { 15010Sstevel@tonic-gate ch_err_tl1_data_t *cl1p, cl1; 15020Sstevel@tonic-gate int i, ncl1ps; 15030Sstevel@tonic-gate uint64_t me_flags; 1504815Sdilpreet uint64_t ceen, nceen; 15050Sstevel@tonic-gate 15060Sstevel@tonic-gate if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) { 15070Sstevel@tonic-gate cl1p = &ch_err_tl1_data; 15080Sstevel@tonic-gate ncl1ps = 1; 15090Sstevel@tonic-gate } else if (CPU_PRIVATE(CPU) != NULL) { 15100Sstevel@tonic-gate cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]); 15110Sstevel@tonic-gate ncl1ps = CH_ERR_TL1_TLMAX; 15120Sstevel@tonic-gate } else { 15130Sstevel@tonic-gate ncl1ps = 0; 15140Sstevel@tonic-gate } 15150Sstevel@tonic-gate 15160Sstevel@tonic-gate for (i = 0; i < ncl1ps; i++, cl1p++) { 15170Sstevel@tonic-gate if (cl1p->ch_err_tl1_flags == 0) 15180Sstevel@tonic-gate continue; 15190Sstevel@tonic-gate 15200Sstevel@tonic-gate /* 15210Sstevel@tonic-gate * Grab a copy of the logout data and invalidate 15220Sstevel@tonic-gate * the logout area. 15230Sstevel@tonic-gate */ 15240Sstevel@tonic-gate cl1 = *cl1p; 15250Sstevel@tonic-gate bzero(cl1p, sizeof (ch_err_tl1_data_t)); 15260Sstevel@tonic-gate cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID; 15270Sstevel@tonic-gate me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags); 15280Sstevel@tonic-gate 15290Sstevel@tonic-gate /* 15300Sstevel@tonic-gate * Log "first error" in ch_err_tl1_data. 15310Sstevel@tonic-gate */ 15320Sstevel@tonic-gate if (cl1.ch_err_tl1_flags & CH_ERR_FECC) { 15330Sstevel@tonic-gate ceen = get_error_enable() & EN_REG_CEEN; 1534815Sdilpreet nceen = get_error_enable() & EN_REG_NCEEN; 15350Sstevel@tonic-gate cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1, 1536815Sdilpreet 1, ceen, nceen, &cl1.ch_err_tl1_logout); 15370Sstevel@tonic-gate } 15380Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 15390Sstevel@tonic-gate if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 15400Sstevel@tonic-gate cpu_parity_error(rp, cl1.ch_err_tl1_flags, 15410Sstevel@tonic-gate (caddr_t)cl1.ch_err_tl1_tpc); 15420Sstevel@tonic-gate } 15430Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 15440Sstevel@tonic-gate 15450Sstevel@tonic-gate /* 15460Sstevel@tonic-gate * Log "multiple events" in ch_err_tl1_data. Note that 15470Sstevel@tonic-gate * we don't read and clear the AFSR/AFAR in the TL>0 code 15480Sstevel@tonic-gate * if the structure is busy, we just do the cache flushing 15490Sstevel@tonic-gate * we have to do and then do the retry. So the AFSR/AFAR 15500Sstevel@tonic-gate * at this point *should* have some relevant info. If there 15510Sstevel@tonic-gate * are no valid errors in the AFSR, we'll assume they've 15520Sstevel@tonic-gate * already been picked up and logged. For I$/D$ parity, 15530Sstevel@tonic-gate * we just log an event with an "Unknown" (NULL) TPC. 15540Sstevel@tonic-gate */ 15550Sstevel@tonic-gate if (me_flags & CH_ERR_FECC) { 15560Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 15570Sstevel@tonic-gate uint64_t t_afsr_errs; 15580Sstevel@tonic-gate 15590Sstevel@tonic-gate /* 15600Sstevel@tonic-gate * Get the error registers and see if there's 15610Sstevel@tonic-gate * a pending error. If not, don't bother 15620Sstevel@tonic-gate * generating an "Invalid AFSR" error event. 15630Sstevel@tonic-gate */ 15640Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 15650Sstevel@tonic-gate t_afsr_errs = (cpu_error_regs.afsr_ext & 15660Sstevel@tonic-gate C_AFSR_EXT_ALL_ERRS) | 15670Sstevel@tonic-gate (cpu_error_regs.afsr & C_AFSR_ALL_ERRS); 15680Sstevel@tonic-gate if (t_afsr_errs != 0) { 15690Sstevel@tonic-gate ceen = get_error_enable() & EN_REG_CEEN; 1570815Sdilpreet nceen = get_error_enable() & EN_REG_NCEEN; 15710Sstevel@tonic-gate cpu_log_fast_ecc_error((caddr_t)NULL, 1, 1572815Sdilpreet 1, ceen, nceen, NULL); 15730Sstevel@tonic-gate } 15740Sstevel@tonic-gate } 15750Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 15760Sstevel@tonic-gate if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 15770Sstevel@tonic-gate cpu_parity_error(rp, me_flags, (caddr_t)NULL); 15780Sstevel@tonic-gate } 15790Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 15800Sstevel@tonic-gate } 15810Sstevel@tonic-gate } 15820Sstevel@tonic-gate 15830Sstevel@tonic-gate /* 15840Sstevel@tonic-gate * Called from Fast ECC TL>0 handler in case of fatal error. 15850Sstevel@tonic-gate * cpu_tl1_error should always find an associated ch_err_tl1_data structure, 15860Sstevel@tonic-gate * but if we don't, we'll panic with something reasonable. 15870Sstevel@tonic-gate */ 15880Sstevel@tonic-gate /*ARGSUSED*/ 15890Sstevel@tonic-gate void 15900Sstevel@tonic-gate cpu_tl1_err_panic(struct regs *rp, ulong_t flags) 15910Sstevel@tonic-gate { 15920Sstevel@tonic-gate cpu_tl1_error(rp, 1); 15930Sstevel@tonic-gate /* 15940Sstevel@tonic-gate * Should never return, but just in case. 15950Sstevel@tonic-gate */ 15960Sstevel@tonic-gate fm_panic("Unsurvivable ECC Error at TL>0"); 15970Sstevel@tonic-gate } 15980Sstevel@tonic-gate 15990Sstevel@tonic-gate /* 16000Sstevel@tonic-gate * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST, 16010Sstevel@tonic-gate * EDC, WDU, WDC, CPU, CPC, IVU, IVC events. 16020Sstevel@tonic-gate * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU 16030Sstevel@tonic-gate * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC 16040Sstevel@tonic-gate * 16050Sstevel@tonic-gate * Cheetah+ also handles (No additional processing required): 16060Sstevel@tonic-gate * DUE, DTO, DBERR (NCEEN controlled) 16070Sstevel@tonic-gate * THCE (CEEN and ET_ECC_en controlled) 16080Sstevel@tonic-gate * TUE (ET_ECC_en controlled) 16090Sstevel@tonic-gate * 16100Sstevel@tonic-gate * Panther further adds: 16110Sstevel@tonic-gate * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 16120Sstevel@tonic-gate * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 16130Sstevel@tonic-gate * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 16140Sstevel@tonic-gate * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 16150Sstevel@tonic-gate * THCE (CEEN and L2_tag_ECC_en controlled) 16160Sstevel@tonic-gate * L3_THCE (CEEN and ET_ECC_en controlled) 16170Sstevel@tonic-gate * 16180Sstevel@tonic-gate * Note that the p_clo_flags input is only valid in cases where the 16190Sstevel@tonic-gate * cpu_private struct is not yet initialized (since that is the only 16200Sstevel@tonic-gate * time that information cannot be obtained from the logout struct.) 16210Sstevel@tonic-gate */ 16220Sstevel@tonic-gate /*ARGSUSED*/ 16230Sstevel@tonic-gate void 16240Sstevel@tonic-gate cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags) 16250Sstevel@tonic-gate { 16260Sstevel@tonic-gate struct async_flt *aflt; 16270Sstevel@tonic-gate ch_async_flt_t ch_flt; 16280Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING]; 16290Sstevel@tonic-gate ch_cpu_logout_t *clop; 16300Sstevel@tonic-gate uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 16310Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 16320Sstevel@tonic-gate 16330Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 16340Sstevel@tonic-gate /* 16350Sstevel@tonic-gate * Get the CPU log out info. If we can't find our CPU private 16360Sstevel@tonic-gate * pointer, then we will have to make due without any detailed 16370Sstevel@tonic-gate * logout information. 16380Sstevel@tonic-gate */ 16390Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) { 16400Sstevel@tonic-gate clop = NULL; 16410Sstevel@tonic-gate ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 16420Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 16430Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs); 16440Sstevel@tonic-gate t_afar = cpu_error_regs.afar; 16450Sstevel@tonic-gate t_afsr = cpu_error_regs.afsr; 16460Sstevel@tonic-gate t_afsr_ext = cpu_error_regs.afsr_ext; 16470Sstevel@tonic-gate #if defined(SERRANO) 16480Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2; 16490Sstevel@tonic-gate #endif /* SERRANO */ 16500Sstevel@tonic-gate } else { 16510Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 16520Sstevel@tonic-gate t_afar = clop->clo_data.chd_afar; 16530Sstevel@tonic-gate t_afsr = clop->clo_data.chd_afsr; 16540Sstevel@tonic-gate t_afsr_ext = clop->clo_data.chd_afsr_ext; 16550Sstevel@tonic-gate #if defined(SERRANO) 16560Sstevel@tonic-gate ch_flt.afar2 = clop->clo_data.chd_afar2; 16570Sstevel@tonic-gate #endif /* SERRANO */ 16580Sstevel@tonic-gate } 16590Sstevel@tonic-gate 16600Sstevel@tonic-gate /* 16610Sstevel@tonic-gate * In order to simplify code, we maintain this afsr_errs 16620Sstevel@tonic-gate * variable which holds the aggregate of AFSR and AFSR_EXT 16630Sstevel@tonic-gate * sticky bits. 16640Sstevel@tonic-gate */ 16650Sstevel@tonic-gate t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 16660Sstevel@tonic-gate (t_afsr & C_AFSR_ALL_ERRS); 16670Sstevel@tonic-gate 16680Sstevel@tonic-gate pr_reason[0] = '\0'; 16690Sstevel@tonic-gate /* Setup the async fault structure */ 16700Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt; 16710Sstevel@tonic-gate ch_flt.afsr_ext = t_afsr_ext; 16720Sstevel@tonic-gate ch_flt.afsr_errs = t_afsr_errs; 16730Sstevel@tonic-gate aflt->flt_stat = t_afsr; 16740Sstevel@tonic-gate aflt->flt_addr = t_afar; 16750Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc; 16760Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 16770Sstevel@tonic-gate aflt->flt_tl = 0; 16780Sstevel@tonic-gate aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 16790Sstevel@tonic-gate 16800Sstevel@tonic-gate /* 16810Sstevel@tonic-gate * If this trap is a result of one of the errors not masked 16820Sstevel@tonic-gate * by cpu_ce_not_deferred, we don't reenable CEEN. Instead 16830Sstevel@tonic-gate * indicate that a timeout is to be set later. 16840Sstevel@tonic-gate */ 16850Sstevel@tonic-gate if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) && 16860Sstevel@tonic-gate !aflt->flt_panic) 16870Sstevel@tonic-gate ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED; 16880Sstevel@tonic-gate else 16890Sstevel@tonic-gate ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED; 16900Sstevel@tonic-gate 16910Sstevel@tonic-gate /* 16920Sstevel@tonic-gate * log the CE and clean up 16930Sstevel@tonic-gate */ 16940Sstevel@tonic-gate cpu_log_and_clear_ce(&ch_flt); 16950Sstevel@tonic-gate 16960Sstevel@tonic-gate /* 16970Sstevel@tonic-gate * We re-enable CEEN (if required) and check if any disrupting errors 16980Sstevel@tonic-gate * have happened. We do this because if a disrupting error had occurred 16990Sstevel@tonic-gate * with CEEN off, the trap will not be taken when CEEN is re-enabled. 17000Sstevel@tonic-gate * Note that CEEN works differently on Cheetah than on Spitfire. Also, 17010Sstevel@tonic-gate * we enable CEEN *before* checking the AFSR to avoid the small window 17020Sstevel@tonic-gate * of a error happening between checking the AFSR and enabling CEEN. 17030Sstevel@tonic-gate */ 17040Sstevel@tonic-gate if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER) 17055219Skm84432 set_error_enable(get_error_enable() | EN_REG_CEEN); 17060Sstevel@tonic-gate if (clear_errors(&ch_flt)) { 17070Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 17080Sstevel@tonic-gate NULL); 17090Sstevel@tonic-gate } 17100Sstevel@tonic-gate 17110Sstevel@tonic-gate /* 17120Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will 17130Sstevel@tonic-gate * be logged as part of the panic flow. 17140Sstevel@tonic-gate */ 17150Sstevel@tonic-gate if (aflt->flt_panic) 17160Sstevel@tonic-gate fm_panic("%sError(s)", pr_reason); 17170Sstevel@tonic-gate } 17180Sstevel@tonic-gate 17190Sstevel@tonic-gate /* 17200Sstevel@tonic-gate * The async_err handler transfers control here for UE, EMU, EDU:BLD, 17210Sstevel@tonic-gate * L3_EDU:BLD, TO, and BERR events. 17220Sstevel@tonic-gate * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 17230Sstevel@tonic-gate * 17240Sstevel@tonic-gate * Cheetah+: No additional errors handled. 17250Sstevel@tonic-gate * 17260Sstevel@tonic-gate * Note that the p_clo_flags input is only valid in cases where the 17270Sstevel@tonic-gate * cpu_private struct is not yet initialized (since that is the only 17280Sstevel@tonic-gate * time that information cannot be obtained from the logout struct.) 17290Sstevel@tonic-gate */ 17300Sstevel@tonic-gate /*ARGSUSED*/ 17310Sstevel@tonic-gate void 17320Sstevel@tonic-gate cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags) 17330Sstevel@tonic-gate { 17340Sstevel@tonic-gate ushort_t ttype, tl; 17350Sstevel@tonic-gate ch_async_flt_t ch_flt; 17360Sstevel@tonic-gate struct async_flt *aflt; 17370Sstevel@tonic-gate int trampolined = 0; 17380Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING]; 17390Sstevel@tonic-gate ch_cpu_logout_t *clop; 17400Sstevel@tonic-gate uint64_t ceen, clo_flags; 17410Sstevel@tonic-gate uint64_t log_afsr; 17420Sstevel@tonic-gate uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 17430Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 17440Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED; 17450Sstevel@tonic-gate ddi_acc_hdl_t *hp; 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate /* 17480Sstevel@tonic-gate * We need to look at p_flag to determine if the thread detected an 17490Sstevel@tonic-gate * error while dumping core. We can't grab p_lock here, but it's ok 17500Sstevel@tonic-gate * because we just need a consistent snapshot and we know that everyone 17510Sstevel@tonic-gate * else will store a consistent set of bits while holding p_lock. We 17520Sstevel@tonic-gate * don't have to worry about a race because SDOCORE is set once prior 17530Sstevel@tonic-gate * to doing i/o from the process's address space and is never cleared. 17540Sstevel@tonic-gate */ 17550Sstevel@tonic-gate uint_t pflag = ttoproc(curthread)->p_flag; 17560Sstevel@tonic-gate 17570Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 17580Sstevel@tonic-gate /* 17590Sstevel@tonic-gate * Get the CPU log out info. If we can't find our CPU private 17600Sstevel@tonic-gate * pointer then we will have to make due without any detailed 17610Sstevel@tonic-gate * logout information. 17620Sstevel@tonic-gate */ 17630Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) { 17640Sstevel@tonic-gate clop = NULL; 17650Sstevel@tonic-gate ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 17660Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 17670Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs); 17680Sstevel@tonic-gate t_afar = cpu_error_regs.afar; 17690Sstevel@tonic-gate t_afsr = cpu_error_regs.afsr; 17700Sstevel@tonic-gate t_afsr_ext = cpu_error_regs.afsr_ext; 17710Sstevel@tonic-gate #if defined(SERRANO) 17720Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2; 17730Sstevel@tonic-gate #endif /* SERRANO */ 17740Sstevel@tonic-gate clo_flags = p_clo_flags; 17750Sstevel@tonic-gate } else { 17760Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout); 17770Sstevel@tonic-gate t_afar = clop->clo_data.chd_afar; 17780Sstevel@tonic-gate t_afsr = clop->clo_data.chd_afsr; 17790Sstevel@tonic-gate t_afsr_ext = clop->clo_data.chd_afsr_ext; 17800Sstevel@tonic-gate #if defined(SERRANO) 17810Sstevel@tonic-gate ch_flt.afar2 = clop->clo_data.chd_afar2; 17820Sstevel@tonic-gate #endif /* SERRANO */ 17830Sstevel@tonic-gate clo_flags = clop->clo_flags; 17840Sstevel@tonic-gate } 17850Sstevel@tonic-gate 17860Sstevel@tonic-gate /* 17870Sstevel@tonic-gate * In order to simplify code, we maintain this afsr_errs 17880Sstevel@tonic-gate * variable which holds the aggregate of AFSR and AFSR_EXT 17890Sstevel@tonic-gate * sticky bits. 17900Sstevel@tonic-gate */ 17910Sstevel@tonic-gate t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 17920Sstevel@tonic-gate (t_afsr & C_AFSR_ALL_ERRS); 17930Sstevel@tonic-gate pr_reason[0] = '\0'; 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate /* 17960Sstevel@tonic-gate * Grab information encoded into our clo_flags field. 17970Sstevel@tonic-gate */ 17980Sstevel@tonic-gate ceen = clo_flags & EN_REG_CEEN; 17990Sstevel@tonic-gate tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT; 18000Sstevel@tonic-gate ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT; 18010Sstevel@tonic-gate 18020Sstevel@tonic-gate /* 18030Sstevel@tonic-gate * handle the specific error 18040Sstevel@tonic-gate */ 18050Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt; 18060Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 18070Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 18080Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 18090Sstevel@tonic-gate ch_flt.afsr_ext = t_afsr_ext; 18100Sstevel@tonic-gate ch_flt.afsr_errs = t_afsr_errs; 18110Sstevel@tonic-gate aflt->flt_stat = t_afsr; 18120Sstevel@tonic-gate aflt->flt_addr = t_afar; 18130Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc; 18140Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 18150Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 18160Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 18170Sstevel@tonic-gate aflt->flt_tl = (uchar_t)tl; 18180Sstevel@tonic-gate aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) || 18190Sstevel@tonic-gate C_AFSR_PANIC(t_afsr_errs)); 18200Sstevel@tonic-gate aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 18210Sstevel@tonic-gate aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP); 18220Sstevel@tonic-gate 18230Sstevel@tonic-gate /* 18240Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0, we need to check to 18250Sstevel@tonic-gate * see if we were executing in the kernel under on_trap() or t_lofault 18260Sstevel@tonic-gate * protection. If so, modify the saved registers so that we return 18270Sstevel@tonic-gate * from the trap to the appropriate trampoline routine. 18280Sstevel@tonic-gate */ 18290Sstevel@tonic-gate if (aflt->flt_priv && tl == 0) { 18300Sstevel@tonic-gate if (curthread->t_ontrap != NULL) { 18310Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap; 18320Sstevel@tonic-gate 18330Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) { 18340Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC; 18350Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC; 18360Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 18370Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 18380Sstevel@tonic-gate trampolined = 1; 18390Sstevel@tonic-gate } 18400Sstevel@tonic-gate 18410Sstevel@tonic-gate if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) && 18420Sstevel@tonic-gate (otp->ot_prot & OT_DATA_ACCESS)) { 18430Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS; 18440Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS; 18450Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 18460Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 18470Sstevel@tonic-gate trampolined = 1; 18480Sstevel@tonic-gate /* 18490Sstevel@tonic-gate * for peeks and caut_gets errors are expected 18500Sstevel@tonic-gate */ 18510Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle; 18520Sstevel@tonic-gate if (!hp) 18530Sstevel@tonic-gate expected = DDI_FM_ERR_PEEK; 18540Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access == 18550Sstevel@tonic-gate DDI_CAUTIOUS_ACC) 18560Sstevel@tonic-gate expected = DDI_FM_ERR_EXPECTED; 18570Sstevel@tonic-gate } 18580Sstevel@tonic-gate 18590Sstevel@tonic-gate } else if (curthread->t_lofault) { 18600Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY; 18610Sstevel@tonic-gate rp->r_g1 = EFAULT; 18620Sstevel@tonic-gate rp->r_pc = curthread->t_lofault; 18630Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 18640Sstevel@tonic-gate trampolined = 1; 18650Sstevel@tonic-gate } 18660Sstevel@tonic-gate } 18670Sstevel@tonic-gate 18680Sstevel@tonic-gate /* 18690Sstevel@tonic-gate * If we're in user mode or we're doing a protected copy, we either 18700Sstevel@tonic-gate * want the ASTON code below to send a signal to the user process 18710Sstevel@tonic-gate * or we want to panic if aft_panic is set. 18720Sstevel@tonic-gate * 18730Sstevel@tonic-gate * If we're in privileged mode and we're not doing a copy, then we 18740Sstevel@tonic-gate * need to check if we've trampolined. If we haven't trampolined, 18750Sstevel@tonic-gate * we should panic. 18760Sstevel@tonic-gate */ 18770Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 18780Sstevel@tonic-gate if (t_afsr_errs & 18790Sstevel@tonic-gate ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) & 18800Sstevel@tonic-gate ~(C_AFSR_BERR | C_AFSR_TO))) 18810Sstevel@tonic-gate aflt->flt_panic |= aft_panic; 18820Sstevel@tonic-gate } else if (!trampolined) { 18830Sstevel@tonic-gate aflt->flt_panic = 1; 18840Sstevel@tonic-gate } 18850Sstevel@tonic-gate 18860Sstevel@tonic-gate /* 18870Sstevel@tonic-gate * If we've trampolined due to a privileged TO or BERR, or if an 18880Sstevel@tonic-gate * unprivileged TO or BERR occurred, we don't want to enqueue an 18890Sstevel@tonic-gate * event for that TO or BERR. Queue all other events (if any) besides 18900Sstevel@tonic-gate * the TO/BERR. Since we may not be enqueing any events, we need to 18910Sstevel@tonic-gate * ignore the number of events queued. If we haven't trampolined due 18920Sstevel@tonic-gate * to a TO or BERR, just enqueue events normally. 18930Sstevel@tonic-gate */ 18940Sstevel@tonic-gate log_afsr = t_afsr_errs; 18950Sstevel@tonic-gate if (trampolined) { 18960Sstevel@tonic-gate log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 18970Sstevel@tonic-gate } else if (!aflt->flt_priv) { 18980Sstevel@tonic-gate /* 18990Sstevel@tonic-gate * User mode, suppress messages if 19000Sstevel@tonic-gate * cpu_berr_to_verbose is not set. 19010Sstevel@tonic-gate */ 19020Sstevel@tonic-gate if (!cpu_berr_to_verbose) 19030Sstevel@tonic-gate log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 19040Sstevel@tonic-gate } 19050Sstevel@tonic-gate 19060Sstevel@tonic-gate /* 19070Sstevel@tonic-gate * Log any errors that occurred 19080Sstevel@tonic-gate */ 19090Sstevel@tonic-gate if (((log_afsr & 19105219Skm84432 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) && 19115219Skm84432 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) || 19125219Skm84432 (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) { 19130Sstevel@tonic-gate ch_flt.flt_type = CPU_INV_AFSR; 19140Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 19150Sstevel@tonic-gate (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 19160Sstevel@tonic-gate aflt->flt_panic); 19170Sstevel@tonic-gate } 19180Sstevel@tonic-gate 19190Sstevel@tonic-gate /* 19200Sstevel@tonic-gate * Zero out + invalidate CPU logout. 19210Sstevel@tonic-gate */ 19220Sstevel@tonic-gate if (clop) { 19230Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t)); 19240Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID; 19250Sstevel@tonic-gate } 19260Sstevel@tonic-gate 19270Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 19280Sstevel@tonic-gate /* 19290Sstevel@tonic-gate * UE/RUE/BERR/TO: Call our bus nexus friends to check for 19300Sstevel@tonic-gate * IO errors that may have resulted in this trap. 19310Sstevel@tonic-gate */ 19320Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) { 19330Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected); 19340Sstevel@tonic-gate } 19350Sstevel@tonic-gate 19360Sstevel@tonic-gate /* 19370Sstevel@tonic-gate * UE/RUE: If UE or RUE is in memory, we need to flush the bad 19380Sstevel@tonic-gate * line from the Ecache. We also need to query the bus nexus for 19390Sstevel@tonic-gate * fatal errors. Attempts to do diagnostic read on caches may 19400Sstevel@tonic-gate * introduce more errors (especially when the module is bad). 19410Sstevel@tonic-gate */ 19420Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) { 19430Sstevel@tonic-gate /* 19440Sstevel@tonic-gate * Ask our bus nexus friends if they have any fatal errors. If 19450Sstevel@tonic-gate * so, they will log appropriate error messages. 19460Sstevel@tonic-gate */ 19470Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 19480Sstevel@tonic-gate aflt->flt_panic = 1; 19490Sstevel@tonic-gate 19500Sstevel@tonic-gate /* 19510Sstevel@tonic-gate * We got a UE or RUE and are panicking, save the fault PA in 19520Sstevel@tonic-gate * a known location so that the platform specific panic code 19530Sstevel@tonic-gate * can check for copyback errors. 19540Sstevel@tonic-gate */ 19550Sstevel@tonic-gate if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 19560Sstevel@tonic-gate panic_aflt = *aflt; 19570Sstevel@tonic-gate } 19580Sstevel@tonic-gate } 19590Sstevel@tonic-gate 19600Sstevel@tonic-gate /* 19610Sstevel@tonic-gate * Flush Ecache line or entire Ecache 19620Sstevel@tonic-gate */ 19630Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR)) 19640Sstevel@tonic-gate cpu_error_ecache_flush(&ch_flt); 19650Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 19660Sstevel@tonic-gate /* 19670Sstevel@tonic-gate * UE/BERR/TO: Call our bus nexus friends to check for 19680Sstevel@tonic-gate * IO errors that may have resulted in this trap. 19690Sstevel@tonic-gate */ 19700Sstevel@tonic-gate if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) { 19710Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected); 19720Sstevel@tonic-gate } 19730Sstevel@tonic-gate 19740Sstevel@tonic-gate /* 19750Sstevel@tonic-gate * UE: If the UE is in memory, we need to flush the bad 19760Sstevel@tonic-gate * line from the Ecache. We also need to query the bus nexus for 19770Sstevel@tonic-gate * fatal errors. Attempts to do diagnostic read on caches may 19780Sstevel@tonic-gate * introduce more errors (especially when the module is bad). 19790Sstevel@tonic-gate */ 19800Sstevel@tonic-gate if (t_afsr & C_AFSR_UE) { 19810Sstevel@tonic-gate /* 19820Sstevel@tonic-gate * Ask our legacy bus nexus friends if they have any fatal 19830Sstevel@tonic-gate * errors. If so, they will log appropriate error messages. 19840Sstevel@tonic-gate */ 19850Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 19860Sstevel@tonic-gate aflt->flt_panic = 1; 19870Sstevel@tonic-gate 19880Sstevel@tonic-gate /* 19890Sstevel@tonic-gate * We got a UE and are panicking, save the fault PA in a known 19900Sstevel@tonic-gate * location so that the platform specific panic code can check 19910Sstevel@tonic-gate * for copyback errors. 19920Sstevel@tonic-gate */ 19930Sstevel@tonic-gate if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 19940Sstevel@tonic-gate panic_aflt = *aflt; 19950Sstevel@tonic-gate } 19960Sstevel@tonic-gate } 19970Sstevel@tonic-gate 19980Sstevel@tonic-gate /* 19990Sstevel@tonic-gate * Flush Ecache line or entire Ecache 20000Sstevel@tonic-gate */ 20010Sstevel@tonic-gate if (t_afsr_errs & 20020Sstevel@tonic-gate (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU)) 20030Sstevel@tonic-gate cpu_error_ecache_flush(&ch_flt); 20040Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 20050Sstevel@tonic-gate 20060Sstevel@tonic-gate /* 20070Sstevel@tonic-gate * We carefully re-enable NCEEN and CEEN and then check if any deferred 20080Sstevel@tonic-gate * or disrupting errors have happened. We do this because if a 20090Sstevel@tonic-gate * deferred or disrupting error had occurred with NCEEN/CEEN off, the 20100Sstevel@tonic-gate * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 20110Sstevel@tonic-gate * CEEN works differently on Cheetah than on Spitfire. Also, we enable 20120Sstevel@tonic-gate * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 20130Sstevel@tonic-gate * deferred or disrupting error happening between checking the AFSR and 20140Sstevel@tonic-gate * enabling NCEEN/CEEN. 20150Sstevel@tonic-gate * 20160Sstevel@tonic-gate * Note: CEEN reenabled only if it was on when trap taken. 20170Sstevel@tonic-gate */ 20180Sstevel@tonic-gate set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); 20190Sstevel@tonic-gate if (clear_errors(&ch_flt)) { 20200Sstevel@tonic-gate /* 20210Sstevel@tonic-gate * Check for secondary errors, and avoid panicking if we 20220Sstevel@tonic-gate * have them 20230Sstevel@tonic-gate */ 20240Sstevel@tonic-gate if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs, 20250Sstevel@tonic-gate t_afar) == 0) { 20260Sstevel@tonic-gate aflt->flt_panic |= ((ch_flt.afsr_errs & 20270Sstevel@tonic-gate (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0); 20280Sstevel@tonic-gate } 20290Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 20300Sstevel@tonic-gate NULL); 20310Sstevel@tonic-gate } 20320Sstevel@tonic-gate 20330Sstevel@tonic-gate /* 20340Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will 20350Sstevel@tonic-gate * be logged as part of the panic flow. 20360Sstevel@tonic-gate */ 20370Sstevel@tonic-gate if (aflt->flt_panic) 20380Sstevel@tonic-gate fm_panic("%sError(s)", pr_reason); 20390Sstevel@tonic-gate 20400Sstevel@tonic-gate /* 20410Sstevel@tonic-gate * If we queued an error and we are going to return from the trap and 20420Sstevel@tonic-gate * the error was in user mode or inside of a copy routine, set AST flag 20430Sstevel@tonic-gate * so the queue will be drained before returning to user mode. The 20440Sstevel@tonic-gate * AST processing will also act on our failure policy. 20450Sstevel@tonic-gate */ 20460Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 20470Sstevel@tonic-gate int pcb_flag = 0; 20480Sstevel@tonic-gate 20490Sstevel@tonic-gate if (t_afsr_errs & 20500Sstevel@tonic-gate (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS & 20510Sstevel@tonic-gate ~(C_AFSR_BERR | C_AFSR_TO))) 20520Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR; 20530Sstevel@tonic-gate 20540Sstevel@tonic-gate if (t_afsr & C_AFSR_BERR) 20550Sstevel@tonic-gate pcb_flag |= ASYNC_BERR; 20560Sstevel@tonic-gate 20570Sstevel@tonic-gate if (t_afsr & C_AFSR_TO) 20580Sstevel@tonic-gate pcb_flag |= ASYNC_BTO; 20590Sstevel@tonic-gate 20600Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 20610Sstevel@tonic-gate aston(curthread); 20620Sstevel@tonic-gate } 20630Sstevel@tonic-gate } 20640Sstevel@tonic-gate 20650Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 20660Sstevel@tonic-gate /* 20670Sstevel@tonic-gate * Handling of data and instruction parity errors (traps 0x71, 0x72). 20680Sstevel@tonic-gate * 20690Sstevel@tonic-gate * For Panther, P$ data parity errors during floating point load hits 20700Sstevel@tonic-gate * are also detected (reported as TT 0x71) and handled by this trap 20710Sstevel@tonic-gate * handler. 20720Sstevel@tonic-gate * 20730Sstevel@tonic-gate * AFSR/AFAR are not set for parity errors, only TPC (a virtual address) 20740Sstevel@tonic-gate * is available. 20750Sstevel@tonic-gate */ 20760Sstevel@tonic-gate /*ARGSUSED*/ 20770Sstevel@tonic-gate void 20780Sstevel@tonic-gate cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc) 20790Sstevel@tonic-gate { 20800Sstevel@tonic-gate ch_async_flt_t ch_flt; 20810Sstevel@tonic-gate struct async_flt *aflt; 20820Sstevel@tonic-gate uchar_t tl = ((flags & CH_ERR_TL) != 0); 20830Sstevel@tonic-gate uchar_t iparity = ((flags & CH_ERR_IPE) != 0); 20840Sstevel@tonic-gate uchar_t panic = ((flags & CH_ERR_PANIC) != 0); 20850Sstevel@tonic-gate char *error_class; 20860Sstevel@tonic-gate 20870Sstevel@tonic-gate /* 20880Sstevel@tonic-gate * Log the error. 20890Sstevel@tonic-gate * For icache parity errors the fault address is the trap PC. 20900Sstevel@tonic-gate * For dcache/pcache parity errors the instruction would have to 20910Sstevel@tonic-gate * be decoded to determine the address and that isn't possible 20920Sstevel@tonic-gate * at high PIL. 20930Sstevel@tonic-gate */ 20940Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 20950Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt; 20960Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 20970Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 20980Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 20990Sstevel@tonic-gate aflt->flt_pc = tpc; 21000Sstevel@tonic-gate aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR; 21010Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 21020Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 21030Sstevel@tonic-gate aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0; 21040Sstevel@tonic-gate aflt->flt_tl = tl; 21050Sstevel@tonic-gate aflt->flt_panic = panic; 21060Sstevel@tonic-gate aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP; 21070Sstevel@tonic-gate ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY; 21080Sstevel@tonic-gate 21090Sstevel@tonic-gate if (iparity) { 21100Sstevel@tonic-gate cpu_icache_parity_info(&ch_flt); 21110Sstevel@tonic-gate if (ch_flt.parity_data.ipe.cpl_off != -1) 21120Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_IDSPE; 21130Sstevel@tonic-gate else if (ch_flt.parity_data.ipe.cpl_way != -1) 21140Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_ITSPE; 21150Sstevel@tonic-gate else 21160Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_IPE; 21170Sstevel@tonic-gate aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE; 21180Sstevel@tonic-gate } else { 21190Sstevel@tonic-gate cpu_dcache_parity_info(&ch_flt); 21200Sstevel@tonic-gate if (ch_flt.parity_data.dpe.cpl_off != -1) 21210Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_DDSPE; 21220Sstevel@tonic-gate else if (ch_flt.parity_data.dpe.cpl_way != -1) 21230Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_DTSPE; 21240Sstevel@tonic-gate else 21250Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_DPE; 21260Sstevel@tonic-gate aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE; 21270Sstevel@tonic-gate /* 21280Sstevel@tonic-gate * For panther we also need to check the P$ for parity errors. 21290Sstevel@tonic-gate */ 21300Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 21310Sstevel@tonic-gate cpu_pcache_parity_info(&ch_flt); 21320Sstevel@tonic-gate if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 21330Sstevel@tonic-gate error_class = FM_EREPORT_CPU_USIII_PDSPE; 21340Sstevel@tonic-gate aflt->flt_payload = 21350Sstevel@tonic-gate FM_EREPORT_PAYLOAD_PCACHE_PE; 21360Sstevel@tonic-gate } 21370Sstevel@tonic-gate } 21380Sstevel@tonic-gate } 21390Sstevel@tonic-gate 21400Sstevel@tonic-gate cpu_errorq_dispatch(error_class, (void *)&ch_flt, 21410Sstevel@tonic-gate sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 21420Sstevel@tonic-gate 21430Sstevel@tonic-gate if (iparity) { 21440Sstevel@tonic-gate /* 21450Sstevel@tonic-gate * Invalidate entire I$. 21460Sstevel@tonic-gate * This is required due to the use of diagnostic ASI 21470Sstevel@tonic-gate * accesses that may result in a loss of I$ coherency. 21480Sstevel@tonic-gate */ 21490Sstevel@tonic-gate if (cache_boot_state & DCU_IC) { 21500Sstevel@tonic-gate flush_icache(); 21510Sstevel@tonic-gate } 21520Sstevel@tonic-gate /* 21530Sstevel@tonic-gate * According to section P.3.1 of the Panther PRM, we 21540Sstevel@tonic-gate * need to do a little more for recovery on those 21550Sstevel@tonic-gate * CPUs after encountering an I$ parity error. 21560Sstevel@tonic-gate */ 21570Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 21580Sstevel@tonic-gate flush_ipb(); 21590Sstevel@tonic-gate correct_dcache_parity(dcache_size, 21600Sstevel@tonic-gate dcache_linesize); 21610Sstevel@tonic-gate flush_pcache(); 21620Sstevel@tonic-gate } 21630Sstevel@tonic-gate } else { 21640Sstevel@tonic-gate /* 21650Sstevel@tonic-gate * Since the valid bit is ignored when checking parity the 21660Sstevel@tonic-gate * D$ data and tag must also be corrected. Set D$ data bits 21670Sstevel@tonic-gate * to zero and set utag to 0, 1, 2, 3. 21680Sstevel@tonic-gate */ 21690Sstevel@tonic-gate correct_dcache_parity(dcache_size, dcache_linesize); 21700Sstevel@tonic-gate 21710Sstevel@tonic-gate /* 21720Sstevel@tonic-gate * According to section P.3.3 of the Panther PRM, we 21730Sstevel@tonic-gate * need to do a little more for recovery on those 21740Sstevel@tonic-gate * CPUs after encountering a D$ or P$ parity error. 21750Sstevel@tonic-gate * 21760Sstevel@tonic-gate * As far as clearing P$ parity errors, it is enough to 21770Sstevel@tonic-gate * simply invalidate all entries in the P$ since P$ parity 21780Sstevel@tonic-gate * error traps are only generated for floating point load 21790Sstevel@tonic-gate * hits. 21800Sstevel@tonic-gate */ 21810Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 21820Sstevel@tonic-gate flush_icache(); 21830Sstevel@tonic-gate flush_ipb(); 21840Sstevel@tonic-gate flush_pcache(); 21850Sstevel@tonic-gate } 21860Sstevel@tonic-gate } 21870Sstevel@tonic-gate 21880Sstevel@tonic-gate /* 21890Sstevel@tonic-gate * Invalidate entire D$ if it was enabled. 21900Sstevel@tonic-gate * This is done to avoid stale data in the D$ which might 21910Sstevel@tonic-gate * occur with the D$ disabled and the trap handler doing 21920Sstevel@tonic-gate * stores affecting lines already in the D$. 21930Sstevel@tonic-gate */ 21940Sstevel@tonic-gate if (cache_boot_state & DCU_DC) { 21950Sstevel@tonic-gate flush_dcache(); 21960Sstevel@tonic-gate } 21970Sstevel@tonic-gate 21980Sstevel@tonic-gate /* 21990Sstevel@tonic-gate * Restore caches to their bootup state. 22000Sstevel@tonic-gate */ 22010Sstevel@tonic-gate set_dcu(get_dcu() | cache_boot_state); 22020Sstevel@tonic-gate 22030Sstevel@tonic-gate /* 22040Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. Enqueued errors will 22050Sstevel@tonic-gate * be logged as part of the panic flow. 22060Sstevel@tonic-gate */ 22070Sstevel@tonic-gate if (aflt->flt_panic) 22080Sstevel@tonic-gate fm_panic("%sError(s)", iparity ? "IPE " : "DPE "); 22090Sstevel@tonic-gate 22100Sstevel@tonic-gate /* 22110Sstevel@tonic-gate * If this error occurred at TL>0 then flush the E$ here to reduce 22120Sstevel@tonic-gate * the chance of getting an unrecoverable Fast ECC error. This 22130Sstevel@tonic-gate * flush will evict the part of the parity trap handler that is run 22140Sstevel@tonic-gate * at TL>1. 22150Sstevel@tonic-gate */ 22160Sstevel@tonic-gate if (tl) { 22170Sstevel@tonic-gate cpu_flush_ecache(); 22180Sstevel@tonic-gate } 22190Sstevel@tonic-gate } 22200Sstevel@tonic-gate 22210Sstevel@tonic-gate /* 22220Sstevel@tonic-gate * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t 22230Sstevel@tonic-gate * to indicate which portions of the captured data should be in the ereport. 22240Sstevel@tonic-gate */ 22250Sstevel@tonic-gate void 22260Sstevel@tonic-gate cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt) 22270Sstevel@tonic-gate { 22280Sstevel@tonic-gate int way = ch_flt->parity_data.ipe.cpl_way; 22290Sstevel@tonic-gate int offset = ch_flt->parity_data.ipe.cpl_off; 22300Sstevel@tonic-gate int tag_index; 22310Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 22320Sstevel@tonic-gate 22330Sstevel@tonic-gate 22340Sstevel@tonic-gate if ((offset != -1) || (way != -1)) { 22350Sstevel@tonic-gate /* 22360Sstevel@tonic-gate * Parity error in I$ tag or data 22370Sstevel@tonic-gate */ 22380Sstevel@tonic-gate tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 22390Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 22400Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 22410Sstevel@tonic-gate PN_ICIDX_TO_WAY(tag_index); 22420Sstevel@tonic-gate else 22430Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 22440Sstevel@tonic-gate CH_ICIDX_TO_WAY(tag_index); 22450Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 22460Sstevel@tonic-gate IC_LOGFLAG_MAGIC; 22470Sstevel@tonic-gate } else { 22480Sstevel@tonic-gate /* 22490Sstevel@tonic-gate * Parity error was not identified. 22500Sstevel@tonic-gate * Log tags and data for all ways. 22510Sstevel@tonic-gate */ 22520Sstevel@tonic-gate for (way = 0; way < CH_ICACHE_NWAY; way++) { 22530Sstevel@tonic-gate tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 22540Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 22550Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 22560Sstevel@tonic-gate PN_ICIDX_TO_WAY(tag_index); 22570Sstevel@tonic-gate else 22580Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 22590Sstevel@tonic-gate CH_ICIDX_TO_WAY(tag_index); 22600Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 22610Sstevel@tonic-gate IC_LOGFLAG_MAGIC; 22620Sstevel@tonic-gate } 22630Sstevel@tonic-gate } 22640Sstevel@tonic-gate } 22650Sstevel@tonic-gate 22660Sstevel@tonic-gate /* 22670Sstevel@tonic-gate * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t 22680Sstevel@tonic-gate * to indicate which portions of the captured data should be in the ereport. 22690Sstevel@tonic-gate */ 22700Sstevel@tonic-gate void 22710Sstevel@tonic-gate cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt) 22720Sstevel@tonic-gate { 22730Sstevel@tonic-gate int way = ch_flt->parity_data.dpe.cpl_way; 22740Sstevel@tonic-gate int offset = ch_flt->parity_data.dpe.cpl_off; 22750Sstevel@tonic-gate int tag_index; 22760Sstevel@tonic-gate 22770Sstevel@tonic-gate if (offset != -1) { 22780Sstevel@tonic-gate /* 22790Sstevel@tonic-gate * Parity error in D$ or P$ data array. 22800Sstevel@tonic-gate * 22810Sstevel@tonic-gate * First check to see whether the parity error is in D$ or P$ 22820Sstevel@tonic-gate * since P$ data parity errors are reported in Panther using 22830Sstevel@tonic-gate * the same trap. 22840Sstevel@tonic-gate */ 22850Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 22860Sstevel@tonic-gate tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx; 22870Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_pc[way].pc_way = 22880Sstevel@tonic-gate CH_PCIDX_TO_WAY(tag_index); 22890Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag = 22900Sstevel@tonic-gate PC_LOGFLAG_MAGIC; 22910Sstevel@tonic-gate } else { 22920Sstevel@tonic-gate tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 22930Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 22940Sstevel@tonic-gate CH_DCIDX_TO_WAY(tag_index); 22950Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 22960Sstevel@tonic-gate DC_LOGFLAG_MAGIC; 22970Sstevel@tonic-gate } 22980Sstevel@tonic-gate } else if (way != -1) { 22990Sstevel@tonic-gate /* 23000Sstevel@tonic-gate * Parity error in D$ tag. 23010Sstevel@tonic-gate */ 23020Sstevel@tonic-gate tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 23030Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 23040Sstevel@tonic-gate CH_DCIDX_TO_WAY(tag_index); 23050Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 23060Sstevel@tonic-gate DC_LOGFLAG_MAGIC; 23070Sstevel@tonic-gate } 23080Sstevel@tonic-gate } 23090Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 23100Sstevel@tonic-gate 23110Sstevel@tonic-gate /* 23120Sstevel@tonic-gate * The cpu_async_log_err() function is called via the [uc]e_drain() function to 23130Sstevel@tonic-gate * post-process CPU events that are dequeued. As such, it can be invoked 23140Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the 23150Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and take appropriate actions. 23160Sstevel@tonic-gate * Historically this entry point was used to log the actual cmn_err(9F) text; 23170Sstevel@tonic-gate * now with FMA it is used to prepare 'flt' to be converted into an ereport. 23180Sstevel@tonic-gate * With FMA this function now also returns a flag which indicates to the 23190Sstevel@tonic-gate * caller whether the ereport should be posted (1) or suppressed (0). 23200Sstevel@tonic-gate */ 23210Sstevel@tonic-gate static int 23220Sstevel@tonic-gate cpu_async_log_err(void *flt, errorq_elem_t *eqep) 23230Sstevel@tonic-gate { 23240Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 23250Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)flt; 2326917Selowe uint64_t errors; 23272895Svb70745 extern void memscrub_induced_error(void); 23280Sstevel@tonic-gate 23290Sstevel@tonic-gate switch (ch_flt->flt_type) { 23300Sstevel@tonic-gate case CPU_INV_AFSR: 23310Sstevel@tonic-gate /* 23320Sstevel@tonic-gate * If it is a disrupting trap and the AFSR is zero, then 23330Sstevel@tonic-gate * the event has probably already been noted. Do not post 23340Sstevel@tonic-gate * an ereport. 23350Sstevel@tonic-gate */ 23360Sstevel@tonic-gate if ((aflt->flt_status & ECC_C_TRAP) && 23370Sstevel@tonic-gate (!(aflt->flt_stat & C_AFSR_MASK))) 23380Sstevel@tonic-gate return (0); 23390Sstevel@tonic-gate else 23400Sstevel@tonic-gate return (1); 23410Sstevel@tonic-gate case CPU_TO: 23420Sstevel@tonic-gate case CPU_BERR: 23430Sstevel@tonic-gate case CPU_FATAL: 23440Sstevel@tonic-gate case CPU_FPUERR: 23450Sstevel@tonic-gate return (1); 23460Sstevel@tonic-gate 23470Sstevel@tonic-gate case CPU_UE_ECACHE_RETIRE: 23480Sstevel@tonic-gate cpu_log_err(aflt); 23490Sstevel@tonic-gate cpu_page_retire(ch_flt); 23500Sstevel@tonic-gate return (1); 23510Sstevel@tonic-gate 23520Sstevel@tonic-gate /* 23530Sstevel@tonic-gate * Cases where we may want to suppress logging or perform 23540Sstevel@tonic-gate * extended diagnostics. 23550Sstevel@tonic-gate */ 23560Sstevel@tonic-gate case CPU_CE: 23570Sstevel@tonic-gate case CPU_EMC: 23580Sstevel@tonic-gate /* 23590Sstevel@tonic-gate * We want to skip logging and further classification 23600Sstevel@tonic-gate * only if ALL the following conditions are true: 23610Sstevel@tonic-gate * 23620Sstevel@tonic-gate * 1. There is only one error 23630Sstevel@tonic-gate * 2. That error is a correctable memory error 23640Sstevel@tonic-gate * 3. The error is caused by the memory scrubber (in 23650Sstevel@tonic-gate * which case the error will have occurred under 23660Sstevel@tonic-gate * on_trap protection) 23670Sstevel@tonic-gate * 4. The error is on a retired page 23680Sstevel@tonic-gate * 23690Sstevel@tonic-gate * Note: AFLT_PROT_EC is used places other than the memory 23700Sstevel@tonic-gate * scrubber. However, none of those errors should occur 23710Sstevel@tonic-gate * on a retired page. 23720Sstevel@tonic-gate */ 23730Sstevel@tonic-gate if ((ch_flt->afsr_errs & 23740Sstevel@tonic-gate (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE && 23750Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) { 23760Sstevel@tonic-gate 2377917Selowe if (page_retire_check(aflt->flt_addr, NULL) == 0) { 23785219Skm84432 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 23790Sstevel@tonic-gate 23800Sstevel@tonic-gate /* 23810Sstevel@tonic-gate * Since we're skipping logging, we'll need 23820Sstevel@tonic-gate * to schedule the re-enabling of CEEN 23830Sstevel@tonic-gate */ 23840Sstevel@tonic-gate (void) timeout(cpu_delayed_check_ce_errors, 2385946Smathue (void *)(uintptr_t)aflt->flt_inst, 2386946Smathue drv_usectohz((clock_t)cpu_ceen_delay_secs 23875219Skm84432 * MICROSEC)); 23885219Skm84432 } 23895219Skm84432 23902895Svb70745 /* 23912895Svb70745 * Inform memscrubber - scrubbing induced 23922895Svb70745 * CE on a retired page. 23932895Svb70745 */ 23942895Svb70745 memscrub_induced_error(); 23952895Svb70745 return (0); 23960Sstevel@tonic-gate } 23970Sstevel@tonic-gate } 23980Sstevel@tonic-gate 23990Sstevel@tonic-gate /* 24000Sstevel@tonic-gate * Perform/schedule further classification actions, but 24010Sstevel@tonic-gate * only if the page is healthy (we don't want bad 24020Sstevel@tonic-gate * pages inducing too much diagnostic activity). If we could 24030Sstevel@tonic-gate * not find a page pointer then we also skip this. If 24040Sstevel@tonic-gate * ce_scrub_xdiag_recirc returns nonzero then it has chosen 24050Sstevel@tonic-gate * to copy and recirculate the event (for further diagnostics) 24060Sstevel@tonic-gate * and we should not proceed to log it here. 24070Sstevel@tonic-gate * 24080Sstevel@tonic-gate * This must be the last step here before the cpu_log_err() 24090Sstevel@tonic-gate * below - if an event recirculates cpu_ce_log_err() will 24100Sstevel@tonic-gate * not call the current function but just proceed directly 24110Sstevel@tonic-gate * to cpu_ereport_post after the cpu_log_err() avoided below. 24120Sstevel@tonic-gate * 24130Sstevel@tonic-gate * Note: Check cpu_impl_async_log_err if changing this 24140Sstevel@tonic-gate */ 2415917Selowe if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) { 2416917Selowe CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2417917Selowe CE_XDIAG_SKIP_NOPP); 2418917Selowe } else { 2419917Selowe if (errors != PR_OK) { 24200Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 24210Sstevel@tonic-gate CE_XDIAG_SKIP_PAGEDET); 24220Sstevel@tonic-gate } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep, 24230Sstevel@tonic-gate offsetof(ch_async_flt_t, cmn_asyncflt))) { 24240Sstevel@tonic-gate return (0); 24250Sstevel@tonic-gate } 24260Sstevel@tonic-gate } 24270Sstevel@tonic-gate /*FALLTHRU*/ 24280Sstevel@tonic-gate 24290Sstevel@tonic-gate /* 24300Sstevel@tonic-gate * Cases where we just want to report the error and continue. 24310Sstevel@tonic-gate */ 24320Sstevel@tonic-gate case CPU_CE_ECACHE: 24330Sstevel@tonic-gate case CPU_UE_ECACHE: 24340Sstevel@tonic-gate case CPU_IV: 24350Sstevel@tonic-gate case CPU_ORPH: 24360Sstevel@tonic-gate cpu_log_err(aflt); 24370Sstevel@tonic-gate return (1); 24380Sstevel@tonic-gate 24390Sstevel@tonic-gate /* 24400Sstevel@tonic-gate * Cases where we want to fall through to handle panicking. 24410Sstevel@tonic-gate */ 24420Sstevel@tonic-gate case CPU_UE: 24430Sstevel@tonic-gate /* 24440Sstevel@tonic-gate * We want to skip logging in the same conditions as the 24450Sstevel@tonic-gate * CE case. In addition, we want to make sure we're not 24460Sstevel@tonic-gate * panicking. 24470Sstevel@tonic-gate */ 24480Sstevel@tonic-gate if (!panicstr && (ch_flt->afsr_errs & 24490Sstevel@tonic-gate (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE && 24500Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) { 2451917Selowe if (page_retire_check(aflt->flt_addr, NULL) == 0) { 24520Sstevel@tonic-gate /* Zero the address to clear the error */ 24530Sstevel@tonic-gate softcall(ecc_page_zero, (void *)aflt->flt_addr); 24542895Svb70745 /* 24552895Svb70745 * Inform memscrubber - scrubbing induced 24562895Svb70745 * UE on a retired page. 24572895Svb70745 */ 24582895Svb70745 memscrub_induced_error(); 24590Sstevel@tonic-gate return (0); 24600Sstevel@tonic-gate } 24610Sstevel@tonic-gate } 24620Sstevel@tonic-gate cpu_log_err(aflt); 24630Sstevel@tonic-gate break; 24640Sstevel@tonic-gate 24650Sstevel@tonic-gate default: 24660Sstevel@tonic-gate /* 24670Sstevel@tonic-gate * If the us3_common.c code doesn't know the flt_type, it may 24680Sstevel@tonic-gate * be an implementation-specific code. Call into the impldep 24690Sstevel@tonic-gate * backend to find out what to do: if it tells us to continue, 24700Sstevel@tonic-gate * break and handle as if falling through from a UE; if not, 24710Sstevel@tonic-gate * the impldep backend has handled the error and we're done. 24720Sstevel@tonic-gate */ 24730Sstevel@tonic-gate switch (cpu_impl_async_log_err(flt, eqep)) { 24740Sstevel@tonic-gate case CH_ASYNC_LOG_DONE: 24750Sstevel@tonic-gate return (1); 24760Sstevel@tonic-gate case CH_ASYNC_LOG_RECIRC: 24770Sstevel@tonic-gate return (0); 24780Sstevel@tonic-gate case CH_ASYNC_LOG_CONTINUE: 24790Sstevel@tonic-gate break; /* continue on to handle UE-like error */ 24800Sstevel@tonic-gate default: 24810Sstevel@tonic-gate cmn_err(CE_WARN, "discarding error 0x%p with " 24820Sstevel@tonic-gate "invalid fault type (0x%x)", 24830Sstevel@tonic-gate (void *)aflt, ch_flt->flt_type); 24840Sstevel@tonic-gate return (0); 24850Sstevel@tonic-gate } 24860Sstevel@tonic-gate } 24870Sstevel@tonic-gate 24880Sstevel@tonic-gate /* ... fall through from the UE case */ 24890Sstevel@tonic-gate 24900Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 24910Sstevel@tonic-gate if (!panicstr) { 24920Sstevel@tonic-gate cpu_page_retire(ch_flt); 24930Sstevel@tonic-gate } else { 24940Sstevel@tonic-gate /* 24950Sstevel@tonic-gate * Clear UEs on panic so that we don't 24960Sstevel@tonic-gate * get haunted by them during panic or 24970Sstevel@tonic-gate * after reboot 24980Sstevel@tonic-gate */ 24990Sstevel@tonic-gate cpu_clearphys(aflt); 25000Sstevel@tonic-gate (void) clear_errors(NULL); 25010Sstevel@tonic-gate } 25020Sstevel@tonic-gate } 25030Sstevel@tonic-gate 25040Sstevel@tonic-gate return (1); 25050Sstevel@tonic-gate } 25060Sstevel@tonic-gate 25070Sstevel@tonic-gate /* 25080Sstevel@tonic-gate * Retire the bad page that may contain the flushed error. 25090Sstevel@tonic-gate */ 25100Sstevel@tonic-gate void 25110Sstevel@tonic-gate cpu_page_retire(ch_async_flt_t *ch_flt) 25120Sstevel@tonic-gate { 25130Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 2514917Selowe (void) page_retire(aflt->flt_addr, PR_UE); 25150Sstevel@tonic-gate } 25160Sstevel@tonic-gate 25170Sstevel@tonic-gate /* 25182381Smikechr * Return true if the error specified in the AFSR indicates 25192381Smikechr * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$ 25202381Smikechr * for Panther, none for Jalapeno/Serrano). 25212381Smikechr */ 25222381Smikechr /* ARGSUSED */ 25232381Smikechr static int 25242381Smikechr cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr) 25252381Smikechr { 25262381Smikechr #if defined(JALAPENO) || defined(SERRANO) 25272381Smikechr return (0); 25282381Smikechr #elif defined(CHEETAH_PLUS) 25292381Smikechr if (IS_PANTHER(cpunodes[cpuid].implementation)) 25302381Smikechr return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0); 25312381Smikechr return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0); 25322381Smikechr #else /* CHEETAH_PLUS */ 25332381Smikechr return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0); 25342381Smikechr #endif 25352381Smikechr } 25362381Smikechr 25372381Smikechr /* 25380Sstevel@tonic-gate * The cpu_log_err() function is called by cpu_async_log_err() to perform the 25390Sstevel@tonic-gate * generic event post-processing for correctable and uncorrectable memory, 25400Sstevel@tonic-gate * E$, and MTag errors. Historically this entry point was used to log bits of 25410Sstevel@tonic-gate * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be 25420Sstevel@tonic-gate * converted into an ereport. In addition, it transmits the error to any 25430Sstevel@tonic-gate * platform-specific service-processor FRU logging routines, if available. 25440Sstevel@tonic-gate */ 25450Sstevel@tonic-gate void 25460Sstevel@tonic-gate cpu_log_err(struct async_flt *aflt) 25470Sstevel@tonic-gate { 25480Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 25490Sstevel@tonic-gate int synd_status, synd_code, afar_status; 25500Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 25510Sstevel@tonic-gate 25522381Smikechr if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit)) 25532381Smikechr aflt->flt_status |= ECC_ECACHE; 25542381Smikechr else 25552381Smikechr aflt->flt_status &= ~ECC_ECACHE; 25560Sstevel@tonic-gate /* 25570Sstevel@tonic-gate * Determine syndrome status. 25580Sstevel@tonic-gate */ 25590Sstevel@tonic-gate synd_status = afsr_to_synd_status(aflt->flt_inst, 25600Sstevel@tonic-gate ch_flt->afsr_errs, ch_flt->flt_bit); 25610Sstevel@tonic-gate 25620Sstevel@tonic-gate /* 25630Sstevel@tonic-gate * Determine afar status. 25640Sstevel@tonic-gate */ 25650Sstevel@tonic-gate if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) 25660Sstevel@tonic-gate afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 25675219Skm84432 ch_flt->flt_bit); 25680Sstevel@tonic-gate else 25690Sstevel@tonic-gate afar_status = AFLT_STAT_INVALID; 25700Sstevel@tonic-gate 25712436Smb91622 synd_code = synd_to_synd_code(synd_status, 25722436Smb91622 aflt->flt_synd, ch_flt->flt_bit); 25732436Smb91622 25740Sstevel@tonic-gate /* 25750Sstevel@tonic-gate * If afar status is not invalid do a unum lookup. 25760Sstevel@tonic-gate */ 25770Sstevel@tonic-gate if (afar_status != AFLT_STAT_INVALID) { 25782436Smb91622 (void) cpu_get_mem_unum_synd(synd_code, aflt, unum); 25790Sstevel@tonic-gate } else { 25800Sstevel@tonic-gate unum[0] = '\0'; 25810Sstevel@tonic-gate } 25820Sstevel@tonic-gate 25830Sstevel@tonic-gate /* 25840Sstevel@tonic-gate * Do not send the fruid message (plat_ecc_error_data_t) 25850Sstevel@tonic-gate * to the SC if it can handle the enhanced error information 25860Sstevel@tonic-gate * (plat_ecc_error2_data_t) or when the tunable 25870Sstevel@tonic-gate * ecc_log_fruid_enable is set to 0. 25880Sstevel@tonic-gate */ 25890Sstevel@tonic-gate 25900Sstevel@tonic-gate if (&plat_ecc_capability_sc_get && 25910Sstevel@tonic-gate plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) { 25920Sstevel@tonic-gate if (&plat_log_fruid_error) 25930Sstevel@tonic-gate plat_log_fruid_error(synd_code, aflt, unum, 25940Sstevel@tonic-gate ch_flt->flt_bit); 25950Sstevel@tonic-gate } 25960Sstevel@tonic-gate 25970Sstevel@tonic-gate if (aflt->flt_func != NULL) 25980Sstevel@tonic-gate aflt->flt_func(aflt, unum); 25990Sstevel@tonic-gate 26000Sstevel@tonic-gate if (afar_status != AFLT_STAT_INVALID) 26010Sstevel@tonic-gate cpu_log_diag_info(ch_flt); 26020Sstevel@tonic-gate 26030Sstevel@tonic-gate /* 26040Sstevel@tonic-gate * If we have a CEEN error , we do not reenable CEEN until after 26050Sstevel@tonic-gate * we exit the trap handler. Otherwise, another error may 26060Sstevel@tonic-gate * occur causing the handler to be entered recursively. 26070Sstevel@tonic-gate * We set a timeout to trigger in cpu_ceen_delay_secs seconds, 26080Sstevel@tonic-gate * to try and ensure that the CPU makes progress in the face 26090Sstevel@tonic-gate * of a CE storm. 26100Sstevel@tonic-gate */ 26110Sstevel@tonic-gate if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 26120Sstevel@tonic-gate (void) timeout(cpu_delayed_check_ce_errors, 2613946Smathue (void *)(uintptr_t)aflt->flt_inst, 26140Sstevel@tonic-gate drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 26150Sstevel@tonic-gate } 26160Sstevel@tonic-gate } 26170Sstevel@tonic-gate 26180Sstevel@tonic-gate /* 26190Sstevel@tonic-gate * Invoked by error_init() early in startup and therefore before 26200Sstevel@tonic-gate * startup_errorq() is called to drain any error Q - 26210Sstevel@tonic-gate * 26220Sstevel@tonic-gate * startup() 26230Sstevel@tonic-gate * startup_end() 26240Sstevel@tonic-gate * error_init() 26250Sstevel@tonic-gate * cpu_error_init() 26260Sstevel@tonic-gate * errorq_init() 26270Sstevel@tonic-gate * errorq_drain() 26280Sstevel@tonic-gate * start_other_cpus() 26290Sstevel@tonic-gate * 26300Sstevel@tonic-gate * The purpose of this routine is to create error-related taskqs. Taskqs 26310Sstevel@tonic-gate * are used for this purpose because cpu_lock can't be grabbed from interrupt 26320Sstevel@tonic-gate * context. 26330Sstevel@tonic-gate */ 26340Sstevel@tonic-gate void 26350Sstevel@tonic-gate cpu_error_init(int items) 26360Sstevel@tonic-gate { 26370Sstevel@tonic-gate /* 26380Sstevel@tonic-gate * Create taskq(s) to reenable CE 26390Sstevel@tonic-gate */ 26400Sstevel@tonic-gate ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri, 26410Sstevel@tonic-gate items, items, TASKQ_PREPOPULATE); 26420Sstevel@tonic-gate } 26430Sstevel@tonic-gate 26440Sstevel@tonic-gate void 26450Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep) 26460Sstevel@tonic-gate { 26470Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 26480Sstevel@tonic-gate int len; 26490Sstevel@tonic-gate 26500Sstevel@tonic-gate switch (aflt->flt_class) { 26510Sstevel@tonic-gate case CPU_FAULT: 26520Sstevel@tonic-gate cpu_ereport_init(aflt); 26530Sstevel@tonic-gate if (cpu_async_log_err(aflt, eqep)) 26540Sstevel@tonic-gate cpu_ereport_post(aflt); 26550Sstevel@tonic-gate break; 26560Sstevel@tonic-gate 26570Sstevel@tonic-gate case BUS_FAULT: 26580Sstevel@tonic-gate if (aflt->flt_func != NULL) { 26590Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, 26600Sstevel@tonic-gate unum, UNUM_NAMLEN, &len); 26610Sstevel@tonic-gate aflt->flt_func(aflt, unum); 26620Sstevel@tonic-gate } 26630Sstevel@tonic-gate break; 26640Sstevel@tonic-gate 26650Sstevel@tonic-gate case RECIRC_CPU_FAULT: 26660Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 26670Sstevel@tonic-gate cpu_log_err(aflt); 26680Sstevel@tonic-gate cpu_ereport_post(aflt); 26690Sstevel@tonic-gate break; 26700Sstevel@tonic-gate 26710Sstevel@tonic-gate case RECIRC_BUS_FAULT: 26720Sstevel@tonic-gate ASSERT(aflt->flt_class != RECIRC_BUS_FAULT); 26730Sstevel@tonic-gate /*FALLTHRU*/ 26740Sstevel@tonic-gate default: 26750Sstevel@tonic-gate cmn_err(CE_WARN, "discarding CE error 0x%p with invalid " 26760Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class); 26770Sstevel@tonic-gate return; 26780Sstevel@tonic-gate } 26790Sstevel@tonic-gate } 26800Sstevel@tonic-gate 26810Sstevel@tonic-gate /* 26820Sstevel@tonic-gate * Scrub and classify a CE. This function must not modify the 26830Sstevel@tonic-gate * fault structure passed to it but instead should return the classification 26840Sstevel@tonic-gate * information. 26850Sstevel@tonic-gate */ 26860Sstevel@tonic-gate 26870Sstevel@tonic-gate static uchar_t 26880Sstevel@tonic-gate cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried) 26890Sstevel@tonic-gate { 26900Sstevel@tonic-gate uchar_t disp = CE_XDIAG_EXTALG; 26910Sstevel@tonic-gate on_trap_data_t otd; 26920Sstevel@tonic-gate uint64_t orig_err; 26930Sstevel@tonic-gate ch_cpu_logout_t *clop; 26940Sstevel@tonic-gate 26950Sstevel@tonic-gate /* 26960Sstevel@tonic-gate * Clear CEEN. CPU CE TL > 0 trap handling will already have done 26970Sstevel@tonic-gate * this, but our other callers have not. Disable preemption to 26980Sstevel@tonic-gate * avoid CPU migration so that we restore CEEN on the correct 26990Sstevel@tonic-gate * cpu later. 27000Sstevel@tonic-gate * 27010Sstevel@tonic-gate * CEEN is cleared so that further CEs that our instruction and 27020Sstevel@tonic-gate * data footprint induce do not cause use to either creep down 27030Sstevel@tonic-gate * kernel stack to the point of overflow, or do so much CE 27040Sstevel@tonic-gate * notification as to make little real forward progress. 27050Sstevel@tonic-gate * 27060Sstevel@tonic-gate * NCEEN must not be cleared. However it is possible that 27070Sstevel@tonic-gate * our accesses to the flt_addr may provoke a bus error or timeout 27080Sstevel@tonic-gate * if the offending address has just been unconfigured as part of 27090Sstevel@tonic-gate * a DR action. So we must operate under on_trap protection. 27100Sstevel@tonic-gate */ 27110Sstevel@tonic-gate kpreempt_disable(); 27120Sstevel@tonic-gate orig_err = get_error_enable(); 27130Sstevel@tonic-gate if (orig_err & EN_REG_CEEN) 27145219Skm84432 set_error_enable(orig_err & ~EN_REG_CEEN); 27150Sstevel@tonic-gate 27160Sstevel@tonic-gate /* 27170Sstevel@tonic-gate * Our classification algorithm includes the line state before 27180Sstevel@tonic-gate * the scrub; we'd like this captured after the detection and 27190Sstevel@tonic-gate * before the algorithm below - the earlier the better. 27200Sstevel@tonic-gate * 27210Sstevel@tonic-gate * If we've come from a cpu CE trap then this info already exists 27220Sstevel@tonic-gate * in the cpu logout area. 27230Sstevel@tonic-gate * 27240Sstevel@tonic-gate * For a CE detected by memscrub for which there was no trap 27250Sstevel@tonic-gate * (running with CEEN off) cpu_log_and_clear_ce has called 27260Sstevel@tonic-gate * cpu_ce_delayed_ec_logout to capture some cache data, and 27270Sstevel@tonic-gate * marked the fault structure as incomplete as a flag to later 27280Sstevel@tonic-gate * logging code. 27290Sstevel@tonic-gate * 27300Sstevel@tonic-gate * If called directly from an IO detected CE there has been 27310Sstevel@tonic-gate * no line data capture. In this case we logout to the cpu logout 27320Sstevel@tonic-gate * area - that's appropriate since it's the cpu cache data we need 27330Sstevel@tonic-gate * for classification. We thus borrow the cpu logout area for a 27340Sstevel@tonic-gate * short time, and cpu_ce_delayed_ec_logout will mark it as busy in 27350Sstevel@tonic-gate * this time (we will invalidate it again below). 27360Sstevel@tonic-gate * 27370Sstevel@tonic-gate * If called from the partner check xcall handler then this cpu 27380Sstevel@tonic-gate * (the partner) has not necessarily experienced a CE at this 27390Sstevel@tonic-gate * address. But we want to capture line state before its scrub 27400Sstevel@tonic-gate * attempt since we use that in our classification. 27410Sstevel@tonic-gate */ 27420Sstevel@tonic-gate if (logout_tried == B_FALSE) { 27430Sstevel@tonic-gate if (!cpu_ce_delayed_ec_logout(ecc->flt_addr)) 27440Sstevel@tonic-gate disp |= CE_XDIAG_NOLOGOUT; 27450Sstevel@tonic-gate } 27460Sstevel@tonic-gate 27470Sstevel@tonic-gate /* 27480Sstevel@tonic-gate * Scrub memory, then check AFSR for errors. The AFAR we scrub may 27490Sstevel@tonic-gate * no longer be valid (if DR'd since the initial event) so we 27500Sstevel@tonic-gate * perform this scrub under on_trap protection. If this access is 27510Sstevel@tonic-gate * ok then further accesses below will also be ok - DR cannot 27520Sstevel@tonic-gate * proceed while this thread is active (preemption is disabled); 27530Sstevel@tonic-gate * to be safe we'll nonetheless use on_trap again below. 27540Sstevel@tonic-gate */ 27550Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) { 27560Sstevel@tonic-gate cpu_scrubphys(ecc); 27570Sstevel@tonic-gate } else { 27580Sstevel@tonic-gate no_trap(); 27590Sstevel@tonic-gate if (orig_err & EN_REG_CEEN) 27605219Skm84432 set_error_enable(orig_err); 27610Sstevel@tonic-gate kpreempt_enable(); 27620Sstevel@tonic-gate return (disp); 27630Sstevel@tonic-gate } 27640Sstevel@tonic-gate no_trap(); 27650Sstevel@tonic-gate 27660Sstevel@tonic-gate /* 27670Sstevel@tonic-gate * Did the casx read of the scrub log a CE that matches the AFAR? 27680Sstevel@tonic-gate * Note that it's quite possible that the read sourced the data from 27690Sstevel@tonic-gate * another cpu. 27700Sstevel@tonic-gate */ 27710Sstevel@tonic-gate if (clear_ecc(ecc)) 27720Sstevel@tonic-gate disp |= CE_XDIAG_CE1; 27730Sstevel@tonic-gate 27740Sstevel@tonic-gate /* 27750Sstevel@tonic-gate * Read the data again. This time the read is very likely to 27760Sstevel@tonic-gate * come from memory since the scrub induced a writeback to memory. 27770Sstevel@tonic-gate */ 27780Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) { 27790Sstevel@tonic-gate (void) lddphys(P2ALIGN(ecc->flt_addr, 8)); 27800Sstevel@tonic-gate } else { 27810Sstevel@tonic-gate no_trap(); 27820Sstevel@tonic-gate if (orig_err & EN_REG_CEEN) 27835219Skm84432 set_error_enable(orig_err); 27840Sstevel@tonic-gate kpreempt_enable(); 27850Sstevel@tonic-gate return (disp); 27860Sstevel@tonic-gate } 27870Sstevel@tonic-gate no_trap(); 27880Sstevel@tonic-gate 27890Sstevel@tonic-gate /* Did that read induce a CE that matches the AFAR? */ 27900Sstevel@tonic-gate if (clear_ecc(ecc)) 27910Sstevel@tonic-gate disp |= CE_XDIAG_CE2; 27920Sstevel@tonic-gate 27930Sstevel@tonic-gate /* 27940Sstevel@tonic-gate * Look at the logout information and record whether we found the 27950Sstevel@tonic-gate * line in l2/l3 cache. For Panther we are interested in whether 27960Sstevel@tonic-gate * we found it in either cache (it won't reside in both but 27970Sstevel@tonic-gate * it is possible to read it that way given the moving target). 27980Sstevel@tonic-gate */ 27990Sstevel@tonic-gate clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL; 28000Sstevel@tonic-gate if (!(disp & CE_XDIAG_NOLOGOUT) && clop && 28010Sstevel@tonic-gate clop->clo_data.chd_afar != LOGOUT_INVALID) { 28020Sstevel@tonic-gate int hit, level; 28030Sstevel@tonic-gate int state; 28040Sstevel@tonic-gate int totalsize; 28050Sstevel@tonic-gate ch_ec_data_t *ecp; 28060Sstevel@tonic-gate 28070Sstevel@tonic-gate /* 28080Sstevel@tonic-gate * If hit is nonzero then a match was found and hit will 28090Sstevel@tonic-gate * be one greater than the index which hit. For Panther we 28100Sstevel@tonic-gate * also need to pay attention to level to see which of l2$ or 28110Sstevel@tonic-gate * l3$ it hit in. 28120Sstevel@tonic-gate */ 28130Sstevel@tonic-gate hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data, 28140Sstevel@tonic-gate 0, &level); 28150Sstevel@tonic-gate 28160Sstevel@tonic-gate if (hit) { 28170Sstevel@tonic-gate --hit; 28180Sstevel@tonic-gate disp |= CE_XDIAG_AFARMATCH; 28190Sstevel@tonic-gate 28200Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 28210Sstevel@tonic-gate if (level == 2) 28220Sstevel@tonic-gate ecp = &clop->clo_data.chd_l2_data[hit]; 28230Sstevel@tonic-gate else 28240Sstevel@tonic-gate ecp = &clop->clo_data.chd_ec_data[hit]; 28250Sstevel@tonic-gate } else { 28260Sstevel@tonic-gate ASSERT(level == 2); 28270Sstevel@tonic-gate ecp = &clop->clo_data.chd_ec_data[hit]; 28280Sstevel@tonic-gate } 28290Sstevel@tonic-gate totalsize = cpunodes[CPU->cpu_id].ecache_size; 28300Sstevel@tonic-gate state = cpu_ectag_pa_to_subblk_state(totalsize, 28310Sstevel@tonic-gate ecc->flt_addr, ecp->ec_tag); 28320Sstevel@tonic-gate 28330Sstevel@tonic-gate /* 28340Sstevel@tonic-gate * Cheetah variants use different state encodings - 28350Sstevel@tonic-gate * the CH_ECSTATE_* defines vary depending on the 28360Sstevel@tonic-gate * module we're compiled for. Translate into our 28370Sstevel@tonic-gate * one true version. Conflate Owner-Shared state 28380Sstevel@tonic-gate * of SSM mode with Owner as victimisation of such 28390Sstevel@tonic-gate * lines may cause a writeback. 28400Sstevel@tonic-gate */ 28410Sstevel@tonic-gate switch (state) { 28420Sstevel@tonic-gate case CH_ECSTATE_MOD: 28430Sstevel@tonic-gate disp |= EC_STATE_M; 28440Sstevel@tonic-gate break; 28450Sstevel@tonic-gate 28460Sstevel@tonic-gate case CH_ECSTATE_OWN: 28470Sstevel@tonic-gate case CH_ECSTATE_OWS: 28480Sstevel@tonic-gate disp |= EC_STATE_O; 28490Sstevel@tonic-gate break; 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate case CH_ECSTATE_EXL: 28520Sstevel@tonic-gate disp |= EC_STATE_E; 28530Sstevel@tonic-gate break; 28540Sstevel@tonic-gate 28550Sstevel@tonic-gate case CH_ECSTATE_SHR: 28560Sstevel@tonic-gate disp |= EC_STATE_S; 28570Sstevel@tonic-gate break; 28580Sstevel@tonic-gate 28590Sstevel@tonic-gate default: 28600Sstevel@tonic-gate disp |= EC_STATE_I; 28610Sstevel@tonic-gate break; 28620Sstevel@tonic-gate } 28630Sstevel@tonic-gate } 28640Sstevel@tonic-gate 28650Sstevel@tonic-gate /* 28660Sstevel@tonic-gate * If we initiated the delayed logout then we are responsible 28670Sstevel@tonic-gate * for invalidating the logout area. 28680Sstevel@tonic-gate */ 28690Sstevel@tonic-gate if (logout_tried == B_FALSE) { 28700Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t)); 28710Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID; 28720Sstevel@tonic-gate } 28730Sstevel@tonic-gate } 28740Sstevel@tonic-gate 28750Sstevel@tonic-gate /* 28760Sstevel@tonic-gate * Re-enable CEEN if we turned it off. 28770Sstevel@tonic-gate */ 28780Sstevel@tonic-gate if (orig_err & EN_REG_CEEN) 28795219Skm84432 set_error_enable(orig_err); 28800Sstevel@tonic-gate kpreempt_enable(); 28810Sstevel@tonic-gate 28820Sstevel@tonic-gate return (disp); 28830Sstevel@tonic-gate } 28840Sstevel@tonic-gate 28850Sstevel@tonic-gate /* 28860Sstevel@tonic-gate * Scrub a correctable memory error and collect data for classification 28870Sstevel@tonic-gate * of CE type. This function is called in the detection path, ie tl0 handling 28880Sstevel@tonic-gate * of a correctable error trap (cpus) or interrupt (IO) at high PIL. 28890Sstevel@tonic-gate */ 28900Sstevel@tonic-gate void 28910Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried) 28920Sstevel@tonic-gate { 28930Sstevel@tonic-gate /* 28940Sstevel@tonic-gate * Cheetah CE classification does not set any bits in flt_status. 28950Sstevel@tonic-gate * Instead we will record classification datapoints in flt_disp. 28960Sstevel@tonic-gate */ 28970Sstevel@tonic-gate ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 28980Sstevel@tonic-gate 28990Sstevel@tonic-gate /* 29000Sstevel@tonic-gate * To check if the error detected by IO is persistent, sticky or 29010Sstevel@tonic-gate * intermittent. This is noticed by clear_ecc(). 29020Sstevel@tonic-gate */ 29030Sstevel@tonic-gate if (ecc->flt_status & ECC_IOBUS) 29040Sstevel@tonic-gate ecc->flt_stat = C_AFSR_MEMORY; 29050Sstevel@tonic-gate 29060Sstevel@tonic-gate /* 29070Sstevel@tonic-gate * Record information from this first part of the algorithm in 29080Sstevel@tonic-gate * flt_disp. 29090Sstevel@tonic-gate */ 29100Sstevel@tonic-gate ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried); 29110Sstevel@tonic-gate } 29120Sstevel@tonic-gate 29130Sstevel@tonic-gate /* 29140Sstevel@tonic-gate * Select a partner to perform a further CE classification check from. 29150Sstevel@tonic-gate * Must be called with kernel preemption disabled (to stop the cpu list 29160Sstevel@tonic-gate * from changing). The detecting cpu we are partnering has cpuid 29170Sstevel@tonic-gate * aflt->flt_inst; we might not be running on the detecting cpu. 29180Sstevel@tonic-gate * 29190Sstevel@tonic-gate * Restrict choice to active cpus in the same cpu partition as ourselves in 29200Sstevel@tonic-gate * an effort to stop bad cpus in one partition causing other partitions to 29210Sstevel@tonic-gate * perform excessive diagnostic activity. Actually since the errorq drain 29220Sstevel@tonic-gate * is run from a softint most of the time and that is a global mechanism 29230Sstevel@tonic-gate * this isolation is only partial. Return NULL if we fail to find a 29240Sstevel@tonic-gate * suitable partner. 29250Sstevel@tonic-gate * 29260Sstevel@tonic-gate * We prefer a partner that is in a different latency group to ourselves as 29270Sstevel@tonic-gate * we will share fewer datapaths. If such a partner is unavailable then 29280Sstevel@tonic-gate * choose one in the same lgroup but prefer a different chip and only allow 29290Sstevel@tonic-gate * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and 29300Sstevel@tonic-gate * flags includes PTNR_SELFOK then permit selection of the original detector. 29310Sstevel@tonic-gate * 29320Sstevel@tonic-gate * We keep a cache of the last partner selected for a cpu, and we'll try to 29330Sstevel@tonic-gate * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds 29340Sstevel@tonic-gate * have passed since that selection was made. This provides the benefit 29350Sstevel@tonic-gate * of the point-of-view of different partners over time but without 29360Sstevel@tonic-gate * requiring frequent cpu list traversals. 29370Sstevel@tonic-gate */ 29380Sstevel@tonic-gate 29390Sstevel@tonic-gate #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */ 29400Sstevel@tonic-gate #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */ 29410Sstevel@tonic-gate 29420Sstevel@tonic-gate static cpu_t * 29430Sstevel@tonic-gate ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) 29440Sstevel@tonic-gate { 29450Sstevel@tonic-gate cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr; 29460Sstevel@tonic-gate hrtime_t lasttime, thistime; 29470Sstevel@tonic-gate 29480Sstevel@tonic-gate ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL); 29490Sstevel@tonic-gate 29500Sstevel@tonic-gate dtcr = cpu[aflt->flt_inst]; 29510Sstevel@tonic-gate 29520Sstevel@tonic-gate /* 29530Sstevel@tonic-gate * Short-circuit for the following cases: 29540Sstevel@tonic-gate * . the dtcr is not flagged active 29550Sstevel@tonic-gate * . there is just one cpu present 29560Sstevel@tonic-gate * . the detector has disappeared 29570Sstevel@tonic-gate * . we were given a bad flt_inst cpuid; this should not happen 29580Sstevel@tonic-gate * (eg PCI code now fills flt_inst) but if it does it is no 29590Sstevel@tonic-gate * reason to panic. 29600Sstevel@tonic-gate * . there is just one cpu left online in the cpu partition 29610Sstevel@tonic-gate * 29620Sstevel@tonic-gate * If we return NULL after this point then we do not update the 29630Sstevel@tonic-gate * chpr_ceptnr_seltime which will cause us to perform a full lookup 29640Sstevel@tonic-gate * again next time; this is the case where the only other cpu online 29650Sstevel@tonic-gate * in the detector's partition is on the same chip as the detector 29660Sstevel@tonic-gate * and since CEEN re-enable is throttled even that case should not 29670Sstevel@tonic-gate * hurt performance. 29680Sstevel@tonic-gate */ 29690Sstevel@tonic-gate if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) { 29700Sstevel@tonic-gate return (NULL); 29710Sstevel@tonic-gate } 29720Sstevel@tonic-gate if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) { 29730Sstevel@tonic-gate if (flags & PTNR_SELFOK) { 29740Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SELF; 29750Sstevel@tonic-gate return (dtcr); 29760Sstevel@tonic-gate } else { 29770Sstevel@tonic-gate return (NULL); 29780Sstevel@tonic-gate } 29790Sstevel@tonic-gate } 29800Sstevel@tonic-gate 29810Sstevel@tonic-gate thistime = gethrtime(); 29820Sstevel@tonic-gate lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime); 29830Sstevel@tonic-gate 29840Sstevel@tonic-gate /* 29850Sstevel@tonic-gate * Select a starting point. 29860Sstevel@tonic-gate */ 29870Sstevel@tonic-gate if (!lasttime) { 29880Sstevel@tonic-gate /* 29890Sstevel@tonic-gate * We've never selected a partner for this detector before. 29900Sstevel@tonic-gate * Start the scan at the next online cpu in the same cpu 29910Sstevel@tonic-gate * partition. 29920Sstevel@tonic-gate */ 29930Sstevel@tonic-gate sp = dtcr->cpu_next_part; 29940Sstevel@tonic-gate } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) { 29950Sstevel@tonic-gate /* 29960Sstevel@tonic-gate * Our last selection has not aged yet. If this partner: 29970Sstevel@tonic-gate * . is still a valid cpu, 29980Sstevel@tonic-gate * . is still in the same partition as the detector 29990Sstevel@tonic-gate * . is still marked active 30000Sstevel@tonic-gate * . satisfies the 'flags' argument criteria 30010Sstevel@tonic-gate * then select it again without updating the timestamp. 30020Sstevel@tonic-gate */ 30030Sstevel@tonic-gate sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 30040Sstevel@tonic-gate if (sp == NULL || sp->cpu_part != dtcr->cpu_part || 30050Sstevel@tonic-gate !cpu_flagged_active(sp->cpu_flags) || 30060Sstevel@tonic-gate (sp == dtcr && !(flags & PTNR_SELFOK)) || 30073434Sesaxe (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) && 30080Sstevel@tonic-gate !(flags & PTNR_SIBLINGOK))) { 30090Sstevel@tonic-gate sp = dtcr->cpu_next_part; 30100Sstevel@tonic-gate } else { 30110Sstevel@tonic-gate if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 30120Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_REMOTE; 30130Sstevel@tonic-gate } else if (sp == dtcr) { 30140Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SELF; 30153434Sesaxe } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) { 30160Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SIBLING; 30170Sstevel@tonic-gate } else { 30180Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_LOCAL; 30190Sstevel@tonic-gate } 30200Sstevel@tonic-gate return (sp); 30210Sstevel@tonic-gate } 30220Sstevel@tonic-gate } else { 30230Sstevel@tonic-gate /* 30240Sstevel@tonic-gate * Our last selection has aged. If it is nonetheless still a 30250Sstevel@tonic-gate * valid cpu then start the scan at the next cpu in the 30260Sstevel@tonic-gate * partition after our last partner. If the last selection 30270Sstevel@tonic-gate * is no longer a valid cpu then go with our default. In 30280Sstevel@tonic-gate * this way we slowly cycle through possible partners to 30290Sstevel@tonic-gate * obtain multiple viewpoints over time. 30300Sstevel@tonic-gate */ 30310Sstevel@tonic-gate sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 30320Sstevel@tonic-gate if (sp == NULL) { 30330Sstevel@tonic-gate sp = dtcr->cpu_next_part; 30340Sstevel@tonic-gate } else { 30350Sstevel@tonic-gate sp = sp->cpu_next_part; /* may be dtcr */ 30360Sstevel@tonic-gate if (sp->cpu_part != dtcr->cpu_part) 30370Sstevel@tonic-gate sp = dtcr; 30380Sstevel@tonic-gate } 30390Sstevel@tonic-gate } 30400Sstevel@tonic-gate 30410Sstevel@tonic-gate /* 30420Sstevel@tonic-gate * We have a proposed starting point for our search, but if this 30430Sstevel@tonic-gate * cpu is offline then its cpu_next_part will point to itself 30440Sstevel@tonic-gate * so we can't use that to iterate over cpus in this partition in 30450Sstevel@tonic-gate * the loop below. We still want to avoid iterating over cpus not 30460Sstevel@tonic-gate * in our partition, so in the case that our starting point is offline 30470Sstevel@tonic-gate * we will repoint it to be the detector itself; and if the detector 30480Sstevel@tonic-gate * happens to be offline we'll return NULL from the following loop. 30490Sstevel@tonic-gate */ 30500Sstevel@tonic-gate if (!cpu_flagged_active(sp->cpu_flags)) { 30510Sstevel@tonic-gate sp = dtcr; 30520Sstevel@tonic-gate } 30530Sstevel@tonic-gate 30540Sstevel@tonic-gate ptnr = sp; 30550Sstevel@tonic-gate locptnr = NULL; 30560Sstevel@tonic-gate sibptnr = NULL; 30570Sstevel@tonic-gate do { 30580Sstevel@tonic-gate if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags)) 30590Sstevel@tonic-gate continue; 30600Sstevel@tonic-gate if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 30610Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id; 30620Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 30630Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_REMOTE; 30640Sstevel@tonic-gate return (ptnr); 30650Sstevel@tonic-gate } 30663434Sesaxe if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) { 30670Sstevel@tonic-gate if (sibptnr == NULL) 30680Sstevel@tonic-gate sibptnr = ptnr; 30690Sstevel@tonic-gate continue; 30700Sstevel@tonic-gate } 30710Sstevel@tonic-gate if (locptnr == NULL) 30720Sstevel@tonic-gate locptnr = ptnr; 30730Sstevel@tonic-gate } while ((ptnr = ptnr->cpu_next_part) != sp); 30740Sstevel@tonic-gate 30750Sstevel@tonic-gate /* 30760Sstevel@tonic-gate * A foreign partner has already been returned if one was available. 30770Sstevel@tonic-gate * 30780Sstevel@tonic-gate * If locptnr is not NULL it is a cpu in the same lgroup as the 30790Sstevel@tonic-gate * detector, is active, and is not a sibling of the detector. 30800Sstevel@tonic-gate * 30810Sstevel@tonic-gate * If sibptnr is not NULL it is a sibling of the detector, and is 30820Sstevel@tonic-gate * active. 30830Sstevel@tonic-gate * 30840Sstevel@tonic-gate * If we have to resort to using the detector itself we have already 30850Sstevel@tonic-gate * checked that it is active. 30860Sstevel@tonic-gate */ 30870Sstevel@tonic-gate if (locptnr) { 30880Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id; 30890Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 30900Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_LOCAL; 30910Sstevel@tonic-gate return (locptnr); 30920Sstevel@tonic-gate } else if (sibptnr && flags & PTNR_SIBLINGOK) { 30930Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id; 30940Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 30950Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SIBLING; 30960Sstevel@tonic-gate return (sibptnr); 30970Sstevel@tonic-gate } else if (flags & PTNR_SELFOK) { 30980Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id; 30990Sstevel@tonic-gate CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 31000Sstevel@tonic-gate *typep = CE_XDIAG_PTNR_SELF; 31010Sstevel@tonic-gate return (dtcr); 31020Sstevel@tonic-gate } 31030Sstevel@tonic-gate 31040Sstevel@tonic-gate return (NULL); 31050Sstevel@tonic-gate } 31060Sstevel@tonic-gate 31070Sstevel@tonic-gate /* 31080Sstevel@tonic-gate * Cross call handler that is requested to run on the designated partner of 31090Sstevel@tonic-gate * a cpu that experienced a possibly sticky or possibly persistnet CE. 31100Sstevel@tonic-gate */ 31110Sstevel@tonic-gate static void 31120Sstevel@tonic-gate ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp) 31130Sstevel@tonic-gate { 31140Sstevel@tonic-gate *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE); 31150Sstevel@tonic-gate } 31160Sstevel@tonic-gate 31170Sstevel@tonic-gate /* 31180Sstevel@tonic-gate * The associated errorqs are never destroyed so we do not need to deal with 31190Sstevel@tonic-gate * them disappearing before this timeout fires. If the affected memory 31200Sstevel@tonic-gate * has been DR'd out since the original event the scrub algrithm will catch 31210Sstevel@tonic-gate * any errors and return null disposition info. If the original detecting 31220Sstevel@tonic-gate * cpu has been DR'd out then ereport detector info will not be able to 31230Sstevel@tonic-gate * lookup CPU type; with a small timeout this is unlikely. 31240Sstevel@tonic-gate */ 31250Sstevel@tonic-gate static void 31260Sstevel@tonic-gate ce_lkychk_cb(ce_lkychk_cb_t *cbarg) 31270Sstevel@tonic-gate { 31280Sstevel@tonic-gate struct async_flt *aflt = cbarg->lkycb_aflt; 31290Sstevel@tonic-gate uchar_t disp; 31300Sstevel@tonic-gate cpu_t *cp; 31310Sstevel@tonic-gate int ptnrtype; 31320Sstevel@tonic-gate 31330Sstevel@tonic-gate kpreempt_disable(); 31340Sstevel@tonic-gate if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK, 31350Sstevel@tonic-gate &ptnrtype)) { 31360Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, 31370Sstevel@tonic-gate (uint64_t)&disp); 31380Sstevel@tonic-gate CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp); 31390Sstevel@tonic-gate CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 31400Sstevel@tonic-gate CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 31410Sstevel@tonic-gate } else { 31420Sstevel@tonic-gate ce_xdiag_lkydrops++; 31430Sstevel@tonic-gate if (ncpus > 1) 31440Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 31450Sstevel@tonic-gate CE_XDIAG_SKIP_NOPTNR); 31460Sstevel@tonic-gate } 31470Sstevel@tonic-gate kpreempt_enable(); 31480Sstevel@tonic-gate 31490Sstevel@tonic-gate errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC); 31500Sstevel@tonic-gate kmem_free(cbarg, sizeof (ce_lkychk_cb_t)); 31510Sstevel@tonic-gate } 31520Sstevel@tonic-gate 31530Sstevel@tonic-gate /* 31540Sstevel@tonic-gate * Called from errorq drain code when processing a CE error, both from 31550Sstevel@tonic-gate * CPU and PCI drain functions. Decide what further classification actions, 31560Sstevel@tonic-gate * if any, we will perform. Perform immediate actions now, and schedule 31570Sstevel@tonic-gate * delayed actions as required. Note that we are no longer necessarily running 31580Sstevel@tonic-gate * on the detecting cpu, and that the async_flt structure will not persist on 31590Sstevel@tonic-gate * return from this function. 31600Sstevel@tonic-gate * 31610Sstevel@tonic-gate * Calls to this function should aim to be self-throtlling in some way. With 31620Sstevel@tonic-gate * the delayed re-enable of CEEN the absolute rate of calls should not 31630Sstevel@tonic-gate * be excessive. Callers should also avoid performing in-depth classification 31640Sstevel@tonic-gate * for events in pages that are already known to be suspect. 31650Sstevel@tonic-gate * 31660Sstevel@tonic-gate * We return nonzero to indicate that the event has been copied and 31670Sstevel@tonic-gate * recirculated for further testing. The caller should not log the event 31680Sstevel@tonic-gate * in this case - it will be logged when further test results are available. 31690Sstevel@tonic-gate * 31700Sstevel@tonic-gate * Our possible contexts are that of errorq_drain: below lock level or from 31710Sstevel@tonic-gate * panic context. We can assume that the cpu we are running on is online. 31720Sstevel@tonic-gate */ 31730Sstevel@tonic-gate 31740Sstevel@tonic-gate 31750Sstevel@tonic-gate #ifdef DEBUG 31760Sstevel@tonic-gate static int ce_xdiag_forceaction; 31770Sstevel@tonic-gate #endif 31780Sstevel@tonic-gate 31790Sstevel@tonic-gate int 31800Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 31810Sstevel@tonic-gate errorq_elem_t *eqep, size_t afltoffset) 31820Sstevel@tonic-gate { 31830Sstevel@tonic-gate ce_dispact_t dispact, action; 31840Sstevel@tonic-gate cpu_t *cp; 31850Sstevel@tonic-gate uchar_t dtcrinfo, disp; 31860Sstevel@tonic-gate int ptnrtype; 31870Sstevel@tonic-gate 31880Sstevel@tonic-gate if (!ce_disp_inited || panicstr || ce_xdiag_off) { 31890Sstevel@tonic-gate ce_xdiag_drops++; 31900Sstevel@tonic-gate return (0); 31910Sstevel@tonic-gate } else if (!aflt->flt_in_memory) { 31920Sstevel@tonic-gate ce_xdiag_drops++; 31930Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM); 31940Sstevel@tonic-gate return (0); 31950Sstevel@tonic-gate } 31960Sstevel@tonic-gate 31970Sstevel@tonic-gate dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 31980Sstevel@tonic-gate 31990Sstevel@tonic-gate /* 32000Sstevel@tonic-gate * Some correctable events are not scrubbed/classified, such as those 32010Sstevel@tonic-gate * noticed at the tail of cpu_deferred_error. So if there is no 32020Sstevel@tonic-gate * initial detector classification go no further. 32030Sstevel@tonic-gate */ 32040Sstevel@tonic-gate if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) { 32050Sstevel@tonic-gate ce_xdiag_drops++; 32060Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB); 32070Sstevel@tonic-gate return (0); 32080Sstevel@tonic-gate } 32090Sstevel@tonic-gate 32100Sstevel@tonic-gate dispact = CE_DISPACT(ce_disp_table, 32110Sstevel@tonic-gate CE_XDIAG_AFARMATCHED(dtcrinfo), 32120Sstevel@tonic-gate CE_XDIAG_STATE(dtcrinfo), 32130Sstevel@tonic-gate CE_XDIAG_CE1SEEN(dtcrinfo), 32140Sstevel@tonic-gate CE_XDIAG_CE2SEEN(dtcrinfo)); 32150Sstevel@tonic-gate 32160Sstevel@tonic-gate 32170Sstevel@tonic-gate action = CE_ACT(dispact); /* bad lookup caught below */ 32180Sstevel@tonic-gate #ifdef DEBUG 32190Sstevel@tonic-gate if (ce_xdiag_forceaction != 0) 32200Sstevel@tonic-gate action = ce_xdiag_forceaction; 32210Sstevel@tonic-gate #endif 32220Sstevel@tonic-gate 32230Sstevel@tonic-gate switch (action) { 32240Sstevel@tonic-gate case CE_ACT_LKYCHK: { 32250Sstevel@tonic-gate caddr_t ndata; 32260Sstevel@tonic-gate errorq_elem_t *neqep; 32270Sstevel@tonic-gate struct async_flt *ecc; 32280Sstevel@tonic-gate ce_lkychk_cb_t *cbargp; 32290Sstevel@tonic-gate 32300Sstevel@tonic-gate if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) { 32310Sstevel@tonic-gate ce_xdiag_lkydrops++; 32320Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 32330Sstevel@tonic-gate CE_XDIAG_SKIP_DUPFAIL); 32340Sstevel@tonic-gate break; 32350Sstevel@tonic-gate } 32360Sstevel@tonic-gate ecc = (struct async_flt *)(ndata + afltoffset); 32370Sstevel@tonic-gate 32380Sstevel@tonic-gate ASSERT(ecc->flt_class == CPU_FAULT || 32390Sstevel@tonic-gate ecc->flt_class == BUS_FAULT); 32400Sstevel@tonic-gate ecc->flt_class = (ecc->flt_class == CPU_FAULT) ? 32410Sstevel@tonic-gate RECIRC_CPU_FAULT : RECIRC_BUS_FAULT; 32420Sstevel@tonic-gate 32430Sstevel@tonic-gate cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP); 32440Sstevel@tonic-gate cbargp->lkycb_aflt = ecc; 32450Sstevel@tonic-gate cbargp->lkycb_eqp = eqp; 32460Sstevel@tonic-gate cbargp->lkycb_eqep = neqep; 32470Sstevel@tonic-gate 32480Sstevel@tonic-gate (void) timeout((void (*)(void *))ce_lkychk_cb, 32490Sstevel@tonic-gate (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec)); 32500Sstevel@tonic-gate return (1); 32510Sstevel@tonic-gate } 32520Sstevel@tonic-gate 32530Sstevel@tonic-gate case CE_ACT_PTNRCHK: 32540Sstevel@tonic-gate kpreempt_disable(); /* stop cpu list changing */ 32550Sstevel@tonic-gate if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) { 32560Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, 32570Sstevel@tonic-gate (uint64_t)aflt, (uint64_t)&disp); 32580Sstevel@tonic-gate CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp); 32590Sstevel@tonic-gate CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 32600Sstevel@tonic-gate CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 32610Sstevel@tonic-gate } else if (ncpus > 1) { 32620Sstevel@tonic-gate ce_xdiag_ptnrdrops++; 32630Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 32640Sstevel@tonic-gate CE_XDIAG_SKIP_NOPTNR); 32650Sstevel@tonic-gate } else { 32660Sstevel@tonic-gate ce_xdiag_ptnrdrops++; 32670Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 32680Sstevel@tonic-gate CE_XDIAG_SKIP_UNIPROC); 32690Sstevel@tonic-gate } 32700Sstevel@tonic-gate kpreempt_enable(); 32710Sstevel@tonic-gate break; 32720Sstevel@tonic-gate 32730Sstevel@tonic-gate case CE_ACT_DONE: 32740Sstevel@tonic-gate break; 32750Sstevel@tonic-gate 32760Sstevel@tonic-gate case CE_ACT(CE_DISP_BAD): 32770Sstevel@tonic-gate default: 32780Sstevel@tonic-gate #ifdef DEBUG 32790Sstevel@tonic-gate cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); 32800Sstevel@tonic-gate #endif 32810Sstevel@tonic-gate ce_xdiag_bad++; 32820Sstevel@tonic-gate CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD); 32830Sstevel@tonic-gate break; 32840Sstevel@tonic-gate } 32850Sstevel@tonic-gate 32860Sstevel@tonic-gate return (0); 32870Sstevel@tonic-gate } 32880Sstevel@tonic-gate 32890Sstevel@tonic-gate /* 32900Sstevel@tonic-gate * We route all errors through a single switch statement. 32910Sstevel@tonic-gate */ 32920Sstevel@tonic-gate void 32930Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt) 32940Sstevel@tonic-gate { 32950Sstevel@tonic-gate switch (aflt->flt_class) { 32960Sstevel@tonic-gate case CPU_FAULT: 32970Sstevel@tonic-gate cpu_ereport_init(aflt); 32980Sstevel@tonic-gate if (cpu_async_log_err(aflt, NULL)) 32990Sstevel@tonic-gate cpu_ereport_post(aflt); 33000Sstevel@tonic-gate break; 33010Sstevel@tonic-gate 33020Sstevel@tonic-gate case BUS_FAULT: 33030Sstevel@tonic-gate bus_async_log_err(aflt); 33040Sstevel@tonic-gate break; 33050Sstevel@tonic-gate 33060Sstevel@tonic-gate default: 33070Sstevel@tonic-gate cmn_err(CE_WARN, "discarding async error %p with invalid " 33080Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class); 33090Sstevel@tonic-gate return; 33100Sstevel@tonic-gate } 33110Sstevel@tonic-gate } 33120Sstevel@tonic-gate 33130Sstevel@tonic-gate /* 33140Sstevel@tonic-gate * Routine for panic hook callback from panic_idle(). 33150Sstevel@tonic-gate */ 33160Sstevel@tonic-gate void 33170Sstevel@tonic-gate cpu_async_panic_callb(void) 33180Sstevel@tonic-gate { 33190Sstevel@tonic-gate ch_async_flt_t ch_flt; 33200Sstevel@tonic-gate struct async_flt *aflt; 33210Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 33220Sstevel@tonic-gate uint64_t afsr_errs; 33230Sstevel@tonic-gate 33240Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 33250Sstevel@tonic-gate 33260Sstevel@tonic-gate afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 33272381Smikechr (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); 33280Sstevel@tonic-gate 33290Sstevel@tonic-gate if (afsr_errs) { 33300Sstevel@tonic-gate 33310Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 33320Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt; 33330Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 33340Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 33350Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 33360Sstevel@tonic-gate aflt->flt_stat = cpu_error_regs.afsr; 33370Sstevel@tonic-gate aflt->flt_addr = cpu_error_regs.afar; 33380Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 33390Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 33400Sstevel@tonic-gate aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0); 33410Sstevel@tonic-gate aflt->flt_panic = 1; 33420Sstevel@tonic-gate ch_flt.afsr_ext = cpu_error_regs.afsr_ext; 33430Sstevel@tonic-gate ch_flt.afsr_errs = afsr_errs; 33440Sstevel@tonic-gate #if defined(SERRANO) 33450Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs.afar2; 33460Sstevel@tonic-gate #endif /* SERRANO */ 33470Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL); 33480Sstevel@tonic-gate } 33490Sstevel@tonic-gate } 33500Sstevel@tonic-gate 33510Sstevel@tonic-gate /* 33520Sstevel@tonic-gate * Routine to convert a syndrome into a syndrome code. 33530Sstevel@tonic-gate */ 33540Sstevel@tonic-gate static int 33550Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit) 33560Sstevel@tonic-gate { 33570Sstevel@tonic-gate if (synd_status == AFLT_STAT_INVALID) 33580Sstevel@tonic-gate return (-1); 33590Sstevel@tonic-gate 33600Sstevel@tonic-gate /* 33610Sstevel@tonic-gate * Use the syndrome to index the appropriate syndrome table, 33620Sstevel@tonic-gate * to get the code indicating which bit(s) is(are) bad. 33630Sstevel@tonic-gate */ 33640Sstevel@tonic-gate if (afsr_bit & 33650Sstevel@tonic-gate (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 33660Sstevel@tonic-gate if (afsr_bit & C_AFSR_MSYND_ERRS) { 33670Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 33680Sstevel@tonic-gate if ((synd == 0) || (synd >= BSYND_TBL_SIZE)) 33690Sstevel@tonic-gate return (-1); 33700Sstevel@tonic-gate else 33710Sstevel@tonic-gate return (BPAR0 + synd); 33720Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 33730Sstevel@tonic-gate if ((synd == 0) || (synd >= MSYND_TBL_SIZE)) 33740Sstevel@tonic-gate return (-1); 33750Sstevel@tonic-gate else 33760Sstevel@tonic-gate return (mtag_syndrome_tab[synd]); 33770Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 33780Sstevel@tonic-gate } else { 33790Sstevel@tonic-gate if ((synd == 0) || (synd >= ESYND_TBL_SIZE)) 33800Sstevel@tonic-gate return (-1); 33810Sstevel@tonic-gate else 33820Sstevel@tonic-gate return (ecc_syndrome_tab[synd]); 33830Sstevel@tonic-gate } 33840Sstevel@tonic-gate } else { 33850Sstevel@tonic-gate return (-1); 33860Sstevel@tonic-gate } 33870Sstevel@tonic-gate } 33880Sstevel@tonic-gate 33891186Sayznaga int 33901186Sayznaga cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 33911186Sayznaga { 33921186Sayznaga if (&plat_get_mem_sid) 33931186Sayznaga return (plat_get_mem_sid(unum, buf, buflen, lenp)); 33941186Sayznaga else 33951186Sayznaga return (ENOTSUP); 33961186Sayznaga } 33971186Sayznaga 33981186Sayznaga int 33991186Sayznaga cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 34001186Sayznaga { 34011186Sayznaga if (&plat_get_mem_offset) 34021186Sayznaga return (plat_get_mem_offset(flt_addr, offp)); 34031186Sayznaga else 34041186Sayznaga return (ENOTSUP); 34051186Sayznaga } 34061186Sayznaga 34071186Sayznaga int 34081186Sayznaga cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 34091186Sayznaga { 34101186Sayznaga if (&plat_get_mem_addr) 34111186Sayznaga return (plat_get_mem_addr(unum, sid, offset, addrp)); 34121186Sayznaga else 34131186Sayznaga return (ENOTSUP); 34141186Sayznaga } 34151186Sayznaga 34160Sstevel@tonic-gate /* 34170Sstevel@tonic-gate * Routine to return a string identifying the physical name 34180Sstevel@tonic-gate * associated with a memory/cache error. 34190Sstevel@tonic-gate */ 34200Sstevel@tonic-gate int 34210Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 34220Sstevel@tonic-gate uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 34230Sstevel@tonic-gate ushort_t flt_status, char *buf, int buflen, int *lenp) 34240Sstevel@tonic-gate { 34250Sstevel@tonic-gate int synd_code; 34260Sstevel@tonic-gate int ret; 34270Sstevel@tonic-gate 34280Sstevel@tonic-gate /* 34290Sstevel@tonic-gate * An AFSR of -1 defaults to a memory syndrome. 34300Sstevel@tonic-gate */ 34310Sstevel@tonic-gate if (flt_stat == (uint64_t)-1) 34320Sstevel@tonic-gate flt_stat = C_AFSR_CE; 34330Sstevel@tonic-gate 34340Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat); 34350Sstevel@tonic-gate 34360Sstevel@tonic-gate /* 34370Sstevel@tonic-gate * Syndrome code must be either a single-bit error code 34380Sstevel@tonic-gate * (0...143) or -1 for unum lookup. 34390Sstevel@tonic-gate */ 34400Sstevel@tonic-gate if (synd_code < 0 || synd_code >= M2) 34410Sstevel@tonic-gate synd_code = -1; 34420Sstevel@tonic-gate if (&plat_get_mem_unum) { 34430Sstevel@tonic-gate if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 34440Sstevel@tonic-gate flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 34450Sstevel@tonic-gate buf[0] = '\0'; 34460Sstevel@tonic-gate *lenp = 0; 34470Sstevel@tonic-gate } 34480Sstevel@tonic-gate 34490Sstevel@tonic-gate return (ret); 34500Sstevel@tonic-gate } 34510Sstevel@tonic-gate 34520Sstevel@tonic-gate return (ENOTSUP); 34530Sstevel@tonic-gate } 34540Sstevel@tonic-gate 34550Sstevel@tonic-gate /* 34560Sstevel@tonic-gate * Wrapper for cpu_get_mem_unum() routine that takes an 34570Sstevel@tonic-gate * async_flt struct rather than explicit arguments. 34580Sstevel@tonic-gate */ 34590Sstevel@tonic-gate int 34600Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 34610Sstevel@tonic-gate char *buf, int buflen, int *lenp) 34620Sstevel@tonic-gate { 34630Sstevel@tonic-gate /* 34640Sstevel@tonic-gate * If we come thru here for an IO bus error aflt->flt_stat will 34650Sstevel@tonic-gate * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum() 34660Sstevel@tonic-gate * so it will interpret this as a memory error. 34670Sstevel@tonic-gate */ 34680Sstevel@tonic-gate return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 34690Sstevel@tonic-gate (aflt->flt_class == BUS_FAULT) ? 34702381Smikechr (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit, 34710Sstevel@tonic-gate aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 34720Sstevel@tonic-gate aflt->flt_status, buf, buflen, lenp)); 34730Sstevel@tonic-gate } 34740Sstevel@tonic-gate 34750Sstevel@tonic-gate /* 34762436Smb91622 * Return unum string given synd_code and async_flt into 34772436Smb91622 * the buf with size UNUM_NAMLEN 34782436Smb91622 */ 34792436Smb91622 static int 34802436Smb91622 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf) 34812436Smb91622 { 34822436Smb91622 int ret, len; 34832436Smb91622 34842436Smb91622 /* 34852436Smb91622 * Syndrome code must be either a single-bit error code 34862436Smb91622 * (0...143) or -1 for unum lookup. 34872436Smb91622 */ 34882436Smb91622 if (synd_code < 0 || synd_code >= M2) 34892436Smb91622 synd_code = -1; 34902436Smb91622 if (&plat_get_mem_unum) { 34912436Smb91622 if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr, 34922436Smb91622 aflt->flt_bus_id, aflt->flt_in_memory, 34932436Smb91622 aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) { 34942436Smb91622 buf[0] = '\0'; 34952436Smb91622 } 34962436Smb91622 return (ret); 34972436Smb91622 } 34982436Smb91622 34992436Smb91622 buf[0] = '\0'; 35002436Smb91622 return (ENOTSUP); 35012436Smb91622 } 35022436Smb91622 35032436Smb91622 /* 35040Sstevel@tonic-gate * This routine is a more generic interface to cpu_get_mem_unum() 35052381Smikechr * that may be used by other modules (e.g. the 'mm' driver, through 35062381Smikechr * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's 35072381Smikechr * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events). 35080Sstevel@tonic-gate */ 35090Sstevel@tonic-gate int 35100Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 35110Sstevel@tonic-gate char *buf, int buflen, int *lenp) 35120Sstevel@tonic-gate { 35130Sstevel@tonic-gate int synd_status, flt_in_memory, ret; 35140Sstevel@tonic-gate ushort_t flt_status = 0; 35150Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 35162381Smikechr uint64_t t_afsr_errs; 35170Sstevel@tonic-gate 35180Sstevel@tonic-gate /* 35190Sstevel@tonic-gate * Check for an invalid address. 35200Sstevel@tonic-gate */ 35210Sstevel@tonic-gate if (afar == (uint64_t)-1) 35220Sstevel@tonic-gate return (ENXIO); 35230Sstevel@tonic-gate 35240Sstevel@tonic-gate if (synd == (uint64_t)-1) 35250Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID; 35260Sstevel@tonic-gate else 35270Sstevel@tonic-gate synd_status = AFLT_STAT_VALID; 35280Sstevel@tonic-gate 35290Sstevel@tonic-gate flt_in_memory = (*afsr & C_AFSR_MEMORY) && 35300Sstevel@tonic-gate pf_is_memory(afar >> MMU_PAGESHIFT); 35310Sstevel@tonic-gate 35320Sstevel@tonic-gate /* 35332381Smikechr * Get aggregate AFSR for call to cpu_error_is_ecache_data. 35342381Smikechr */ 35352381Smikechr if (*afsr == (uint64_t)-1) 35362381Smikechr t_afsr_errs = C_AFSR_CE; 35372381Smikechr else { 35382381Smikechr t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS); 35392381Smikechr #if defined(CHEETAH_PLUS) 35402381Smikechr if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 35412381Smikechr t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS); 35422381Smikechr #endif /* CHEETAH_PLUS */ 35432381Smikechr } 35442381Smikechr 35452381Smikechr /* 35462381Smikechr * Turn on ECC_ECACHE if error type is E$ Data. 35472381Smikechr */ 35482381Smikechr if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs)) 35492381Smikechr flt_status |= ECC_ECACHE; 35502381Smikechr 35512381Smikechr ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar, 35520Sstevel@tonic-gate CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 35530Sstevel@tonic-gate if (ret != 0) 35540Sstevel@tonic-gate return (ret); 35550Sstevel@tonic-gate 35560Sstevel@tonic-gate if (*lenp >= buflen) 35570Sstevel@tonic-gate return (ENAMETOOLONG); 35580Sstevel@tonic-gate 35590Sstevel@tonic-gate (void) strncpy(buf, unum, buflen); 35600Sstevel@tonic-gate 35610Sstevel@tonic-gate return (0); 35620Sstevel@tonic-gate } 35630Sstevel@tonic-gate 35640Sstevel@tonic-gate /* 35650Sstevel@tonic-gate * Routine to return memory information associated 35660Sstevel@tonic-gate * with a physical address and syndrome. 35670Sstevel@tonic-gate */ 35680Sstevel@tonic-gate int 35690Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar, 35700Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 35710Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp) 35720Sstevel@tonic-gate { 35730Sstevel@tonic-gate int synd_status, synd_code; 35740Sstevel@tonic-gate 35750Sstevel@tonic-gate if (afar == (uint64_t)-1) 35760Sstevel@tonic-gate return (ENXIO); 35770Sstevel@tonic-gate 35780Sstevel@tonic-gate if (synd == (uint64_t)-1) 35790Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID; 35800Sstevel@tonic-gate else 35810Sstevel@tonic-gate synd_status = AFLT_STAT_VALID; 35820Sstevel@tonic-gate 35830Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE); 35840Sstevel@tonic-gate 35850Sstevel@tonic-gate if (p2get_mem_info != NULL) 35860Sstevel@tonic-gate return ((p2get_mem_info)(synd_code, afar, 35875219Skm84432 mem_sizep, seg_sizep, bank_sizep, 35885219Skm84432 segsp, banksp, mcidp)); 35890Sstevel@tonic-gate else 35900Sstevel@tonic-gate return (ENOTSUP); 35910Sstevel@tonic-gate } 35920Sstevel@tonic-gate 35930Sstevel@tonic-gate /* 35940Sstevel@tonic-gate * Routine to return a string identifying the physical 35950Sstevel@tonic-gate * name associated with a cpuid. 35960Sstevel@tonic-gate */ 35970Sstevel@tonic-gate int 35980Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 35990Sstevel@tonic-gate { 36000Sstevel@tonic-gate int ret; 36010Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 36020Sstevel@tonic-gate 36030Sstevel@tonic-gate if (&plat_get_cpu_unum) { 36040Sstevel@tonic-gate if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 36050Sstevel@tonic-gate != 0) 36060Sstevel@tonic-gate return (ret); 36070Sstevel@tonic-gate } else { 36080Sstevel@tonic-gate return (ENOTSUP); 36090Sstevel@tonic-gate } 36100Sstevel@tonic-gate 36110Sstevel@tonic-gate if (*lenp >= buflen) 36120Sstevel@tonic-gate return (ENAMETOOLONG); 36130Sstevel@tonic-gate 36140Sstevel@tonic-gate (void) strncpy(buf, unum, buflen); 36150Sstevel@tonic-gate 36160Sstevel@tonic-gate return (0); 36170Sstevel@tonic-gate } 36180Sstevel@tonic-gate 36190Sstevel@tonic-gate /* 36200Sstevel@tonic-gate * This routine exports the name buffer size. 36210Sstevel@tonic-gate */ 36220Sstevel@tonic-gate size_t 36230Sstevel@tonic-gate cpu_get_name_bufsize() 36240Sstevel@tonic-gate { 36250Sstevel@tonic-gate return (UNUM_NAMLEN); 36260Sstevel@tonic-gate } 36270Sstevel@tonic-gate 36280Sstevel@tonic-gate /* 36290Sstevel@tonic-gate * Historical function, apparantly not used. 36300Sstevel@tonic-gate */ 36310Sstevel@tonic-gate /* ARGSUSED */ 36320Sstevel@tonic-gate void 36330Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 36340Sstevel@tonic-gate {} 36350Sstevel@tonic-gate 36360Sstevel@tonic-gate /* 36370Sstevel@tonic-gate * Historical function only called for SBus errors in debugging. 36380Sstevel@tonic-gate */ 36390Sstevel@tonic-gate /*ARGSUSED*/ 36400Sstevel@tonic-gate void 36410Sstevel@tonic-gate read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 36420Sstevel@tonic-gate {} 36430Sstevel@tonic-gate 36440Sstevel@tonic-gate /* 36450Sstevel@tonic-gate * Clear the AFSR sticky bits. The routine returns a non-zero value if 36460Sstevel@tonic-gate * any of the AFSR's sticky errors are detected. If a non-null pointer to 36470Sstevel@tonic-gate * an async fault structure argument is passed in, the captured error state 36480Sstevel@tonic-gate * (AFSR, AFAR) info will be returned in the structure. 36490Sstevel@tonic-gate */ 36500Sstevel@tonic-gate int 36510Sstevel@tonic-gate clear_errors(ch_async_flt_t *ch_flt) 36520Sstevel@tonic-gate { 36530Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 36540Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 36550Sstevel@tonic-gate 36560Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 36570Sstevel@tonic-gate 36580Sstevel@tonic-gate if (ch_flt != NULL) { 36590Sstevel@tonic-gate aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK; 36600Sstevel@tonic-gate aflt->flt_addr = cpu_error_regs.afar; 36610Sstevel@tonic-gate ch_flt->afsr_ext = cpu_error_regs.afsr_ext; 36620Sstevel@tonic-gate ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 36630Sstevel@tonic-gate (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); 36640Sstevel@tonic-gate #if defined(SERRANO) 36650Sstevel@tonic-gate ch_flt->afar2 = cpu_error_regs.afar2; 36660Sstevel@tonic-gate #endif /* SERRANO */ 36670Sstevel@tonic-gate } 36680Sstevel@tonic-gate 36690Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs); 36700Sstevel@tonic-gate 36710Sstevel@tonic-gate return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 36720Sstevel@tonic-gate (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0); 36730Sstevel@tonic-gate } 36740Sstevel@tonic-gate 36750Sstevel@tonic-gate /* 36760Sstevel@tonic-gate * Clear any AFSR error bits, and check for persistence. 36770Sstevel@tonic-gate * 36780Sstevel@tonic-gate * It would be desirable to also insist that syndrome match. PCI handling 36790Sstevel@tonic-gate * has already filled flt_synd. For errors trapped by CPU we only fill 36800Sstevel@tonic-gate * flt_synd when we queue the event, so we do not have a valid flt_synd 36810Sstevel@tonic-gate * during initial classification (it is valid if we're called as part of 36820Sstevel@tonic-gate * subsequent low-pil additional classification attempts). We could try 36830Sstevel@tonic-gate * to determine which syndrome to use: we know we're only called for 36840Sstevel@tonic-gate * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use 36850Sstevel@tonic-gate * would be esynd/none and esynd/msynd, respectively. If that is 36860Sstevel@tonic-gate * implemented then what do we do in the case that we do experience an 36870Sstevel@tonic-gate * error on the same afar but with different syndrome? At the very least 36880Sstevel@tonic-gate * we should count such occurences. Anyway, for now, we'll leave it as 36890Sstevel@tonic-gate * it has been for ages. 36900Sstevel@tonic-gate */ 36910Sstevel@tonic-gate static int 36920Sstevel@tonic-gate clear_ecc(struct async_flt *aflt) 36930Sstevel@tonic-gate { 36940Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 36950Sstevel@tonic-gate 36960Sstevel@tonic-gate /* 36970Sstevel@tonic-gate * Snapshot the AFSR and AFAR and clear any errors 36980Sstevel@tonic-gate */ 36990Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 37000Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs); 37010Sstevel@tonic-gate 37020Sstevel@tonic-gate /* 37030Sstevel@tonic-gate * If any of the same memory access error bits are still on and 37040Sstevel@tonic-gate * the AFAR matches, return that the error is persistent. 37050Sstevel@tonic-gate */ 37060Sstevel@tonic-gate return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 && 37070Sstevel@tonic-gate cpu_error_regs.afar == aflt->flt_addr); 37080Sstevel@tonic-gate } 37090Sstevel@tonic-gate 37100Sstevel@tonic-gate /* 37110Sstevel@tonic-gate * Turn off all cpu error detection, normally only used for panics. 37120Sstevel@tonic-gate */ 37130Sstevel@tonic-gate void 37140Sstevel@tonic-gate cpu_disable_errors(void) 37150Sstevel@tonic-gate { 37160Sstevel@tonic-gate xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE); 3717960Srscott 3718960Srscott /* 3719960Srscott * With error detection now turned off, check the other cpus 3720960Srscott * logout areas for any unlogged errors. 3721960Srscott */ 3722960Srscott if (enable_check_other_cpus_logout) { 3723960Srscott cpu_check_other_cpus_logout(); 3724960Srscott /* 3725960Srscott * Make a second pass over the logout areas, in case 3726960Srscott * there is a failing CPU in an error-trap loop which 3727960Srscott * will write to the logout area once it is emptied. 3728960Srscott */ 3729960Srscott cpu_check_other_cpus_logout(); 3730960Srscott } 37310Sstevel@tonic-gate } 37320Sstevel@tonic-gate 37330Sstevel@tonic-gate /* 37340Sstevel@tonic-gate * Enable errors. 37350Sstevel@tonic-gate */ 37360Sstevel@tonic-gate void 37370Sstevel@tonic-gate cpu_enable_errors(void) 37380Sstevel@tonic-gate { 37390Sstevel@tonic-gate xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE); 37400Sstevel@tonic-gate } 37410Sstevel@tonic-gate 37420Sstevel@tonic-gate /* 37430Sstevel@tonic-gate * Flush the entire ecache using displacement flush by reading through a 37440Sstevel@tonic-gate * physical address range twice as large as the Ecache. 37450Sstevel@tonic-gate */ 37460Sstevel@tonic-gate void 37470Sstevel@tonic-gate cpu_flush_ecache(void) 37480Sstevel@tonic-gate { 37490Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 37500Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 37510Sstevel@tonic-gate } 37520Sstevel@tonic-gate 37530Sstevel@tonic-gate /* 37540Sstevel@tonic-gate * Return CPU E$ set size - E$ size divided by the associativity. 37550Sstevel@tonic-gate * We use this function in places where the CPU_PRIVATE ptr may not be 37560Sstevel@tonic-gate * initialized yet. Note that for send_mondo and in the Ecache scrubber, 37570Sstevel@tonic-gate * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set 37580Sstevel@tonic-gate * up before the kernel switches from OBP's to the kernel's trap table, so 37590Sstevel@tonic-gate * we don't have to worry about cpunodes being unitialized. 37600Sstevel@tonic-gate */ 37610Sstevel@tonic-gate int 37620Sstevel@tonic-gate cpu_ecache_set_size(struct cpu *cp) 37630Sstevel@tonic-gate { 37640Sstevel@tonic-gate if (CPU_PRIVATE(cp)) 37650Sstevel@tonic-gate return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size)); 37660Sstevel@tonic-gate 37670Sstevel@tonic-gate return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway()); 37680Sstevel@tonic-gate } 37690Sstevel@tonic-gate 37700Sstevel@tonic-gate /* 37710Sstevel@tonic-gate * Flush Ecache line. 37720Sstevel@tonic-gate * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno. 37730Sstevel@tonic-gate * Uses normal displacement flush for Cheetah. 37740Sstevel@tonic-gate */ 37750Sstevel@tonic-gate static void 37760Sstevel@tonic-gate cpu_flush_ecache_line(ch_async_flt_t *ch_flt) 37770Sstevel@tonic-gate { 37780Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 37790Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU); 37800Sstevel@tonic-gate 37810Sstevel@tonic-gate ecache_flush_line(aflt->flt_addr, ec_set_size); 37820Sstevel@tonic-gate } 37830Sstevel@tonic-gate 37840Sstevel@tonic-gate /* 37850Sstevel@tonic-gate * Scrub physical address. 37860Sstevel@tonic-gate * Scrub code is different depending upon whether this a Cheetah+ with 2-way 37870Sstevel@tonic-gate * Ecache or direct-mapped Ecache. 37880Sstevel@tonic-gate */ 37890Sstevel@tonic-gate static void 37900Sstevel@tonic-gate cpu_scrubphys(struct async_flt *aflt) 37910Sstevel@tonic-gate { 37920Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU); 37930Sstevel@tonic-gate 37940Sstevel@tonic-gate scrubphys(aflt->flt_addr, ec_set_size); 37950Sstevel@tonic-gate } 37960Sstevel@tonic-gate 37970Sstevel@tonic-gate /* 37980Sstevel@tonic-gate * Clear physical address. 37990Sstevel@tonic-gate * Scrub code is different depending upon whether this a Cheetah+ with 2-way 38000Sstevel@tonic-gate * Ecache or direct-mapped Ecache. 38010Sstevel@tonic-gate */ 38020Sstevel@tonic-gate void 38030Sstevel@tonic-gate cpu_clearphys(struct async_flt *aflt) 38040Sstevel@tonic-gate { 38050Sstevel@tonic-gate int lsize = cpunodes[CPU->cpu_id].ecache_linesize; 38060Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU); 38070Sstevel@tonic-gate 38080Sstevel@tonic-gate 38094567Sanbui clearphys(aflt->flt_addr, ec_set_size, lsize); 38100Sstevel@tonic-gate } 38110Sstevel@tonic-gate 38120Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC) 38130Sstevel@tonic-gate /* 38140Sstevel@tonic-gate * Check for a matching valid line in all the sets. 38150Sstevel@tonic-gate * If found, return set# + 1. Otherwise return 0. 38160Sstevel@tonic-gate */ 38170Sstevel@tonic-gate static int 38180Sstevel@tonic-gate cpu_ecache_line_valid(ch_async_flt_t *ch_flt) 38190Sstevel@tonic-gate { 38200Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 38210Sstevel@tonic-gate int totalsize = cpunodes[CPU->cpu_id].ecache_size; 38220Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU); 38230Sstevel@tonic-gate ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 38240Sstevel@tonic-gate int nway = cpu_ecache_nway(); 38250Sstevel@tonic-gate int i; 38260Sstevel@tonic-gate 38270Sstevel@tonic-gate for (i = 0; i < nway; i++, ecp++) { 38280Sstevel@tonic-gate if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) && 38290Sstevel@tonic-gate (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) == 38300Sstevel@tonic-gate cpu_ectag_to_pa(ec_set_size, ecp->ec_tag)) 38310Sstevel@tonic-gate return (i+1); 38320Sstevel@tonic-gate } 38330Sstevel@tonic-gate return (0); 38340Sstevel@tonic-gate } 38350Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */ 38360Sstevel@tonic-gate 38370Sstevel@tonic-gate /* 38380Sstevel@tonic-gate * Check whether a line in the given logout info matches the specified 38390Sstevel@tonic-gate * fault address. If reqval is set then the line must not be Invalid. 38400Sstevel@tonic-gate * Returns 0 on failure; on success (way + 1) is returned an *level is 38410Sstevel@tonic-gate * set to 2 for l2$ or 3 for l3$. 38420Sstevel@tonic-gate */ 38430Sstevel@tonic-gate static int 38440Sstevel@tonic-gate cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level) 38450Sstevel@tonic-gate { 38460Sstevel@tonic-gate ch_diag_data_t *cdp = data; 38470Sstevel@tonic-gate ch_ec_data_t *ecp; 38480Sstevel@tonic-gate int totalsize, ec_set_size; 38490Sstevel@tonic-gate int i, ways; 38500Sstevel@tonic-gate int match = 0; 38510Sstevel@tonic-gate int tagvalid; 38520Sstevel@tonic-gate uint64_t addr, tagpa; 38530Sstevel@tonic-gate int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation); 38540Sstevel@tonic-gate 38550Sstevel@tonic-gate /* 38560Sstevel@tonic-gate * Check the l2$ logout data 38570Sstevel@tonic-gate */ 38580Sstevel@tonic-gate if (ispanther) { 38590Sstevel@tonic-gate ecp = &cdp->chd_l2_data[0]; 38600Sstevel@tonic-gate ec_set_size = PN_L2_SET_SIZE; 38610Sstevel@tonic-gate ways = PN_L2_NWAYS; 38620Sstevel@tonic-gate } else { 38630Sstevel@tonic-gate ecp = &cdp->chd_ec_data[0]; 38640Sstevel@tonic-gate ec_set_size = cpu_ecache_set_size(CPU); 38650Sstevel@tonic-gate ways = cpu_ecache_nway(); 38660Sstevel@tonic-gate totalsize = cpunodes[CPU->cpu_id].ecache_size; 38670Sstevel@tonic-gate } 38680Sstevel@tonic-gate /* remove low order PA bits from fault address not used in PA tag */ 38690Sstevel@tonic-gate addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 38700Sstevel@tonic-gate for (i = 0; i < ways; i++, ecp++) { 38710Sstevel@tonic-gate if (ispanther) { 38720Sstevel@tonic-gate tagpa = PN_L2TAG_TO_PA(ecp->ec_tag); 38730Sstevel@tonic-gate tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag); 38740Sstevel@tonic-gate } else { 38750Sstevel@tonic-gate tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag); 38760Sstevel@tonic-gate tagvalid = !cpu_ectag_line_invalid(totalsize, 38770Sstevel@tonic-gate ecp->ec_tag); 38780Sstevel@tonic-gate } 38790Sstevel@tonic-gate if (tagpa == addr && (!reqval || tagvalid)) { 38800Sstevel@tonic-gate match = i + 1; 38810Sstevel@tonic-gate *level = 2; 38820Sstevel@tonic-gate break; 38830Sstevel@tonic-gate } 38840Sstevel@tonic-gate } 38850Sstevel@tonic-gate 38860Sstevel@tonic-gate if (match || !ispanther) 38870Sstevel@tonic-gate return (match); 38880Sstevel@tonic-gate 38890Sstevel@tonic-gate /* For Panther we also check the l3$ */ 38900Sstevel@tonic-gate ecp = &cdp->chd_ec_data[0]; 38910Sstevel@tonic-gate ec_set_size = PN_L3_SET_SIZE; 38920Sstevel@tonic-gate ways = PN_L3_NWAYS; 38930Sstevel@tonic-gate addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 38940Sstevel@tonic-gate 38950Sstevel@tonic-gate for (i = 0; i < ways; i++, ecp++) { 38960Sstevel@tonic-gate if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval || 38970Sstevel@tonic-gate !PN_L3_LINE_INVALID(ecp->ec_tag))) { 38980Sstevel@tonic-gate match = i + 1; 38990Sstevel@tonic-gate *level = 3; 39000Sstevel@tonic-gate break; 39010Sstevel@tonic-gate } 39020Sstevel@tonic-gate } 39030Sstevel@tonic-gate 39040Sstevel@tonic-gate return (match); 39050Sstevel@tonic-gate } 39060Sstevel@tonic-gate 39070Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 39080Sstevel@tonic-gate /* 39090Sstevel@tonic-gate * Record information related to the source of an Dcache Parity Error. 39100Sstevel@tonic-gate */ 39110Sstevel@tonic-gate static void 39120Sstevel@tonic-gate cpu_dcache_parity_info(ch_async_flt_t *ch_flt) 39130Sstevel@tonic-gate { 39140Sstevel@tonic-gate int dc_set_size = dcache_size / CH_DCACHE_NWAY; 39150Sstevel@tonic-gate int index; 39160Sstevel@tonic-gate 39170Sstevel@tonic-gate /* 39180Sstevel@tonic-gate * Since instruction decode cannot be done at high PIL 39190Sstevel@tonic-gate * just examine the entire Dcache to locate the error. 39200Sstevel@tonic-gate */ 39210Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 39220Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = -1; 39230Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = -1; 39240Sstevel@tonic-gate } 39250Sstevel@tonic-gate for (index = 0; index < dc_set_size; index += dcache_linesize) 39260Sstevel@tonic-gate cpu_dcache_parity_check(ch_flt, index); 39270Sstevel@tonic-gate } 39280Sstevel@tonic-gate 39290Sstevel@tonic-gate /* 39300Sstevel@tonic-gate * Check all ways of the Dcache at a specified index for good parity. 39310Sstevel@tonic-gate */ 39320Sstevel@tonic-gate static void 39330Sstevel@tonic-gate cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index) 39340Sstevel@tonic-gate { 39350Sstevel@tonic-gate int dc_set_size = dcache_size / CH_DCACHE_NWAY; 39360Sstevel@tonic-gate uint64_t parity_bits, pbits, data_word; 39370Sstevel@tonic-gate static int parity_bits_popc[] = { 0, 1, 1, 0 }; 39380Sstevel@tonic-gate int way, word, data_byte; 39390Sstevel@tonic-gate ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0]; 39400Sstevel@tonic-gate ch_dc_data_t tmp_dcp; 39410Sstevel@tonic-gate 39420Sstevel@tonic-gate for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) { 39430Sstevel@tonic-gate /* 39440Sstevel@tonic-gate * Perform diagnostic read. 39450Sstevel@tonic-gate */ 39460Sstevel@tonic-gate get_dcache_dtag(index + way * dc_set_size, 39475219Skm84432 (uint64_t *)&tmp_dcp); 39480Sstevel@tonic-gate 39490Sstevel@tonic-gate /* 39500Sstevel@tonic-gate * Check tag for even parity. 39510Sstevel@tonic-gate * Sum of 1 bits (including parity bit) should be even. 39520Sstevel@tonic-gate */ 39530Sstevel@tonic-gate if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) { 39540Sstevel@tonic-gate /* 39550Sstevel@tonic-gate * If this is the first error log detailed information 39560Sstevel@tonic-gate * about it and check the snoop tag. Otherwise just 39570Sstevel@tonic-gate * record the fact that we found another error. 39580Sstevel@tonic-gate */ 39590Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 39600Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = way; 39610Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_cache = 39620Sstevel@tonic-gate CPU_DC_PARITY; 39630Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG; 39640Sstevel@tonic-gate 39650Sstevel@tonic-gate if (popc64(tmp_dcp.dc_sntag & 39665219Skm84432 CHP_DCSNTAG_PARMASK) & 1) { 39670Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_tag |= 39685219Skm84432 CHP_DC_SNTAG; 39690Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++; 39700Sstevel@tonic-gate } 39710Sstevel@tonic-gate 39720Sstevel@tonic-gate bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t)); 39730Sstevel@tonic-gate } 39740Sstevel@tonic-gate 39750Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++; 39760Sstevel@tonic-gate } 39770Sstevel@tonic-gate 39780Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 39790Sstevel@tonic-gate /* 39800Sstevel@tonic-gate * Panther has more parity bits than the other 39810Sstevel@tonic-gate * processors for covering dcache data and so each 39820Sstevel@tonic-gate * byte of data in each word has its own parity bit. 39830Sstevel@tonic-gate */ 39840Sstevel@tonic-gate parity_bits = tmp_dcp.dc_pn_data_parity; 39850Sstevel@tonic-gate for (word = 0; word < 4; word++) { 39860Sstevel@tonic-gate data_word = tmp_dcp.dc_data[word]; 39870Sstevel@tonic-gate pbits = parity_bits & PN_DC_DATA_PARITY_MASK; 39880Sstevel@tonic-gate for (data_byte = 0; data_byte < 8; 39890Sstevel@tonic-gate data_byte++) { 39900Sstevel@tonic-gate if (((popc64(data_word & 39910Sstevel@tonic-gate PN_DC_DATA_PARITY_MASK)) & 1) ^ 39920Sstevel@tonic-gate (pbits & 1)) { 39930Sstevel@tonic-gate cpu_record_dc_data_parity( 39945219Skm84432 ch_flt, dcp, &tmp_dcp, way, 39955219Skm84432 word); 39960Sstevel@tonic-gate } 39970Sstevel@tonic-gate pbits >>= 1; 39980Sstevel@tonic-gate data_word >>= 8; 39990Sstevel@tonic-gate } 40000Sstevel@tonic-gate parity_bits >>= 8; 40010Sstevel@tonic-gate } 40020Sstevel@tonic-gate } else { 40030Sstevel@tonic-gate /* 40040Sstevel@tonic-gate * Check data array for even parity. 40050Sstevel@tonic-gate * The 8 parity bits are grouped into 4 pairs each 40060Sstevel@tonic-gate * of which covers a 64-bit word. The endianness is 40070Sstevel@tonic-gate * reversed -- the low-order parity bits cover the 40080Sstevel@tonic-gate * high-order data words. 40090Sstevel@tonic-gate */ 40100Sstevel@tonic-gate parity_bits = tmp_dcp.dc_utag >> 8; 40110Sstevel@tonic-gate for (word = 0; word < 4; word++) { 40120Sstevel@tonic-gate pbits = (parity_bits >> (6 - word * 2)) & 3; 40130Sstevel@tonic-gate if ((popc64(tmp_dcp.dc_data[word]) + 40140Sstevel@tonic-gate parity_bits_popc[pbits]) & 1) { 40150Sstevel@tonic-gate cpu_record_dc_data_parity(ch_flt, dcp, 40160Sstevel@tonic-gate &tmp_dcp, way, word); 40170Sstevel@tonic-gate } 40180Sstevel@tonic-gate } 40190Sstevel@tonic-gate } 40200Sstevel@tonic-gate } 40210Sstevel@tonic-gate } 40220Sstevel@tonic-gate 40230Sstevel@tonic-gate static void 40240Sstevel@tonic-gate cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 40250Sstevel@tonic-gate ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word) 40260Sstevel@tonic-gate { 40270Sstevel@tonic-gate /* 40280Sstevel@tonic-gate * If this is the first error log detailed information about it. 40290Sstevel@tonic-gate * Otherwise just record the fact that we found another error. 40300Sstevel@tonic-gate */ 40310Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 40320Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = way; 40330Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; 40340Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = word * 8; 40350Sstevel@tonic-gate bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t)); 40360Sstevel@tonic-gate } 40370Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++; 40380Sstevel@tonic-gate } 40390Sstevel@tonic-gate 40400Sstevel@tonic-gate /* 40410Sstevel@tonic-gate * Record information related to the source of an Icache Parity Error. 40420Sstevel@tonic-gate * 40430Sstevel@tonic-gate * Called with the Icache disabled so any diagnostic accesses are safe. 40440Sstevel@tonic-gate */ 40450Sstevel@tonic-gate static void 40460Sstevel@tonic-gate cpu_icache_parity_info(ch_async_flt_t *ch_flt) 40470Sstevel@tonic-gate { 40480Sstevel@tonic-gate int ic_set_size; 40490Sstevel@tonic-gate int ic_linesize; 40500Sstevel@tonic-gate int index; 40510Sstevel@tonic-gate 40520Sstevel@tonic-gate if (CPU_PRIVATE(CPU)) { 40530Sstevel@tonic-gate ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 40540Sstevel@tonic-gate CH_ICACHE_NWAY; 40550Sstevel@tonic-gate ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 40560Sstevel@tonic-gate } else { 40570Sstevel@tonic-gate ic_set_size = icache_size / CH_ICACHE_NWAY; 40580Sstevel@tonic-gate ic_linesize = icache_linesize; 40590Sstevel@tonic-gate } 40600Sstevel@tonic-gate 40610Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_way = -1; 40620Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_off = -1; 40630Sstevel@tonic-gate 40640Sstevel@tonic-gate for (index = 0; index < ic_set_size; index += ic_linesize) 40650Sstevel@tonic-gate cpu_icache_parity_check(ch_flt, index); 40660Sstevel@tonic-gate } 40670Sstevel@tonic-gate 40680Sstevel@tonic-gate /* 40690Sstevel@tonic-gate * Check all ways of the Icache at a specified index for good parity. 40700Sstevel@tonic-gate */ 40710Sstevel@tonic-gate static void 40720Sstevel@tonic-gate cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index) 40730Sstevel@tonic-gate { 40740Sstevel@tonic-gate uint64_t parmask, pn_inst_parity; 40750Sstevel@tonic-gate int ic_set_size; 40760Sstevel@tonic-gate int ic_linesize; 40770Sstevel@tonic-gate int flt_index, way, instr, num_instr; 40780Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 40790Sstevel@tonic-gate ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0]; 40800Sstevel@tonic-gate ch_ic_data_t tmp_icp; 40810Sstevel@tonic-gate 40820Sstevel@tonic-gate if (CPU_PRIVATE(CPU)) { 40830Sstevel@tonic-gate ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 40840Sstevel@tonic-gate CH_ICACHE_NWAY; 40850Sstevel@tonic-gate ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 40860Sstevel@tonic-gate } else { 40870Sstevel@tonic-gate ic_set_size = icache_size / CH_ICACHE_NWAY; 40880Sstevel@tonic-gate ic_linesize = icache_linesize; 40890Sstevel@tonic-gate } 40900Sstevel@tonic-gate 40910Sstevel@tonic-gate /* 40920Sstevel@tonic-gate * Panther has twice as many instructions per icache line and the 40930Sstevel@tonic-gate * instruction parity bit is in a different location. 40940Sstevel@tonic-gate */ 40950Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 40960Sstevel@tonic-gate num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t); 40970Sstevel@tonic-gate pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK; 40980Sstevel@tonic-gate } else { 40990Sstevel@tonic-gate num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t); 41000Sstevel@tonic-gate pn_inst_parity = 0; 41010Sstevel@tonic-gate } 41020Sstevel@tonic-gate 41030Sstevel@tonic-gate /* 41040Sstevel@tonic-gate * Index at which we expect to find the parity error. 41050Sstevel@tonic-gate */ 41060Sstevel@tonic-gate flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize); 41070Sstevel@tonic-gate 41080Sstevel@tonic-gate for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) { 41090Sstevel@tonic-gate /* 41100Sstevel@tonic-gate * Diagnostic reads expect address argument in ASI format. 41110Sstevel@tonic-gate */ 41120Sstevel@tonic-gate get_icache_dtag(2 * (index + way * ic_set_size), 41135219Skm84432 (uint64_t *)&tmp_icp); 41140Sstevel@tonic-gate 41150Sstevel@tonic-gate /* 41160Sstevel@tonic-gate * If this is the index in which we expect to find the 41170Sstevel@tonic-gate * error log detailed information about each of the ways. 41180Sstevel@tonic-gate * This information will be displayed later if we can't 41190Sstevel@tonic-gate * determine the exact way in which the error is located. 41200Sstevel@tonic-gate */ 41210Sstevel@tonic-gate if (flt_index == index) 41220Sstevel@tonic-gate bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t)); 41230Sstevel@tonic-gate 41240Sstevel@tonic-gate /* 41250Sstevel@tonic-gate * Check tag for even parity. 41260Sstevel@tonic-gate * Sum of 1 bits (including parity bit) should be even. 41270Sstevel@tonic-gate */ 41280Sstevel@tonic-gate if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) { 41290Sstevel@tonic-gate /* 41300Sstevel@tonic-gate * If this way is the one in which we expected 41310Sstevel@tonic-gate * to find the error record the way and check the 41320Sstevel@tonic-gate * snoop tag. Otherwise just record the fact we 41330Sstevel@tonic-gate * found another error. 41340Sstevel@tonic-gate */ 41350Sstevel@tonic-gate if (flt_index == index) { 41360Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_way = way; 41370Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG; 41380Sstevel@tonic-gate 41390Sstevel@tonic-gate if (popc64(tmp_icp.ic_sntag & 41405219Skm84432 CHP_ICSNTAG_PARMASK) & 1) { 41410Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_tag |= 41425219Skm84432 CHP_IC_SNTAG; 41430Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_lcnt++; 41440Sstevel@tonic-gate } 41450Sstevel@tonic-gate 41460Sstevel@tonic-gate } 41470Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_lcnt++; 41480Sstevel@tonic-gate continue; 41490Sstevel@tonic-gate } 41500Sstevel@tonic-gate 41510Sstevel@tonic-gate /* 41520Sstevel@tonic-gate * Check instruction data for even parity. 41530Sstevel@tonic-gate * Bits participating in parity differ for PC-relative 41540Sstevel@tonic-gate * versus non-PC-relative instructions. 41550Sstevel@tonic-gate */ 41560Sstevel@tonic-gate for (instr = 0; instr < num_instr; instr++) { 41570Sstevel@tonic-gate parmask = (tmp_icp.ic_data[instr] & 41585219Skm84432 CH_ICDATA_PRED_ISPCREL) ? 41595219Skm84432 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) : 41605219Skm84432 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity); 41610Sstevel@tonic-gate if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) { 41620Sstevel@tonic-gate /* 41630Sstevel@tonic-gate * If this way is the one in which we expected 41640Sstevel@tonic-gate * to find the error record the way and offset. 41650Sstevel@tonic-gate * Otherwise just log the fact we found another 41660Sstevel@tonic-gate * error. 41670Sstevel@tonic-gate */ 41680Sstevel@tonic-gate if (flt_index == index) { 41690Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_way = way; 41700Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_off = 41715219Skm84432 instr * 4; 41720Sstevel@tonic-gate } 41730Sstevel@tonic-gate ch_flt->parity_data.ipe.cpl_lcnt++; 41740Sstevel@tonic-gate continue; 41750Sstevel@tonic-gate } 41760Sstevel@tonic-gate } 41770Sstevel@tonic-gate } 41780Sstevel@tonic-gate } 41790Sstevel@tonic-gate 41800Sstevel@tonic-gate /* 41810Sstevel@tonic-gate * Record information related to the source of an Pcache Parity Error. 41820Sstevel@tonic-gate */ 41830Sstevel@tonic-gate static void 41840Sstevel@tonic-gate cpu_pcache_parity_info(ch_async_flt_t *ch_flt) 41850Sstevel@tonic-gate { 41860Sstevel@tonic-gate int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 41870Sstevel@tonic-gate int index; 41880Sstevel@tonic-gate 41890Sstevel@tonic-gate /* 41900Sstevel@tonic-gate * Since instruction decode cannot be done at high PIL just 41910Sstevel@tonic-gate * examine the entire Pcache to check for any parity errors. 41920Sstevel@tonic-gate */ 41930Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 41940Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = -1; 41950Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = -1; 41960Sstevel@tonic-gate } 41970Sstevel@tonic-gate for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE) 41980Sstevel@tonic-gate cpu_pcache_parity_check(ch_flt, index); 41990Sstevel@tonic-gate } 42000Sstevel@tonic-gate 42010Sstevel@tonic-gate /* 42020Sstevel@tonic-gate * Check all ways of the Pcache at a specified index for good parity. 42030Sstevel@tonic-gate */ 42040Sstevel@tonic-gate static void 42050Sstevel@tonic-gate cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index) 42060Sstevel@tonic-gate { 42070Sstevel@tonic-gate int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 42080Sstevel@tonic-gate int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t); 42090Sstevel@tonic-gate int way, word, pbit, parity_bits; 42100Sstevel@tonic-gate ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0]; 42110Sstevel@tonic-gate ch_pc_data_t tmp_pcp; 42120Sstevel@tonic-gate 42130Sstevel@tonic-gate for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) { 42140Sstevel@tonic-gate /* 42150Sstevel@tonic-gate * Perform diagnostic read. 42160Sstevel@tonic-gate */ 42170Sstevel@tonic-gate get_pcache_dtag(index + way * pc_set_size, 42185219Skm84432 (uint64_t *)&tmp_pcp); 42190Sstevel@tonic-gate /* 42200Sstevel@tonic-gate * Check data array for odd parity. There are 8 parity 42210Sstevel@tonic-gate * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each 42220Sstevel@tonic-gate * of those bits covers exactly 8 bytes of the data 42230Sstevel@tonic-gate * array: 42240Sstevel@tonic-gate * 42250Sstevel@tonic-gate * parity bit P$ data bytes covered 42260Sstevel@tonic-gate * ---------- --------------------- 42270Sstevel@tonic-gate * 50 63:56 42280Sstevel@tonic-gate * 51 55:48 42290Sstevel@tonic-gate * 52 47:40 42300Sstevel@tonic-gate * 53 39:32 42310Sstevel@tonic-gate * 54 31:24 42320Sstevel@tonic-gate * 55 23:16 42330Sstevel@tonic-gate * 56 15:8 42340Sstevel@tonic-gate * 57 7:0 42350Sstevel@tonic-gate */ 42360Sstevel@tonic-gate parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status); 42370Sstevel@tonic-gate for (word = 0; word < pc_data_words; word++) { 42380Sstevel@tonic-gate pbit = (parity_bits >> (pc_data_words - word - 1)) & 1; 42390Sstevel@tonic-gate if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) { 42400Sstevel@tonic-gate /* 42410Sstevel@tonic-gate * If this is the first error log detailed 42420Sstevel@tonic-gate * information about it. Otherwise just record 42430Sstevel@tonic-gate * the fact that we found another error. 42440Sstevel@tonic-gate */ 42450Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 42460Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_way = way; 42470Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_cache = 42480Sstevel@tonic-gate CPU_PC_PARITY; 42490Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_off = 42500Sstevel@tonic-gate word * sizeof (uint64_t); 42510Sstevel@tonic-gate bcopy(&tmp_pcp, pcp, 42525219Skm84432 sizeof (ch_pc_data_t)); 42530Sstevel@tonic-gate } 42540Sstevel@tonic-gate ch_flt->parity_data.dpe.cpl_lcnt++; 42550Sstevel@tonic-gate } 42560Sstevel@tonic-gate } 42570Sstevel@tonic-gate } 42580Sstevel@tonic-gate } 42590Sstevel@tonic-gate 42600Sstevel@tonic-gate 42610Sstevel@tonic-gate /* 42620Sstevel@tonic-gate * Add L1 Data cache data to the ereport payload. 42630Sstevel@tonic-gate */ 42640Sstevel@tonic-gate static void 42650Sstevel@tonic-gate cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl) 42660Sstevel@tonic-gate { 42670Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 42680Sstevel@tonic-gate ch_dc_data_t *dcp; 42690Sstevel@tonic-gate ch_dc_data_t dcdata[CH_DCACHE_NWAY]; 42700Sstevel@tonic-gate uint_t nelem; 42710Sstevel@tonic-gate int i, ways_to_check, ways_logged = 0; 42720Sstevel@tonic-gate 42730Sstevel@tonic-gate /* 42740Sstevel@tonic-gate * If this is an D$ fault then there may be multiple 42750Sstevel@tonic-gate * ways captured in the ch_parity_log_t structure. 42760Sstevel@tonic-gate * Otherwise, there will be at most one way captured 42770Sstevel@tonic-gate * in the ch_diag_data_t struct. 42780Sstevel@tonic-gate * Check each way to see if it should be encoded. 42790Sstevel@tonic-gate */ 42800Sstevel@tonic-gate if (ch_flt->flt_type == CPU_DC_PARITY) 42810Sstevel@tonic-gate ways_to_check = CH_DCACHE_NWAY; 42820Sstevel@tonic-gate else 42830Sstevel@tonic-gate ways_to_check = 1; 42840Sstevel@tonic-gate for (i = 0; i < ways_to_check; i++) { 42850Sstevel@tonic-gate if (ch_flt->flt_type == CPU_DC_PARITY) 42860Sstevel@tonic-gate dcp = &ch_flt->parity_data.dpe.cpl_dc[i]; 42870Sstevel@tonic-gate else 42880Sstevel@tonic-gate dcp = &ch_flt->flt_diag_data.chd_dc_data; 42890Sstevel@tonic-gate if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) { 42900Sstevel@tonic-gate bcopy(dcp, &dcdata[ways_logged], 42915219Skm84432 sizeof (ch_dc_data_t)); 42920Sstevel@tonic-gate ways_logged++; 42930Sstevel@tonic-gate } 42940Sstevel@tonic-gate } 42950Sstevel@tonic-gate 42960Sstevel@tonic-gate /* 42970Sstevel@tonic-gate * Add the dcache data to the payload. 42980Sstevel@tonic-gate */ 42990Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS, 43000Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 43010Sstevel@tonic-gate if (ways_logged != 0) { 43020Sstevel@tonic-gate nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged; 43030Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA, 43040Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL); 43050Sstevel@tonic-gate } 43060Sstevel@tonic-gate } 43070Sstevel@tonic-gate 43080Sstevel@tonic-gate /* 43090Sstevel@tonic-gate * Add L1 Instruction cache data to the ereport payload. 43100Sstevel@tonic-gate */ 43110Sstevel@tonic-gate static void 43120Sstevel@tonic-gate cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl) 43130Sstevel@tonic-gate { 43140Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 43150Sstevel@tonic-gate ch_ic_data_t *icp; 43160Sstevel@tonic-gate ch_ic_data_t icdata[CH_ICACHE_NWAY]; 43170Sstevel@tonic-gate uint_t nelem; 43180Sstevel@tonic-gate int i, ways_to_check, ways_logged = 0; 43190Sstevel@tonic-gate 43200Sstevel@tonic-gate /* 43210Sstevel@tonic-gate * If this is an I$ fault then there may be multiple 43220Sstevel@tonic-gate * ways captured in the ch_parity_log_t structure. 43230Sstevel@tonic-gate * Otherwise, there will be at most one way captured 43240Sstevel@tonic-gate * in the ch_diag_data_t struct. 43250Sstevel@tonic-gate * Check each way to see if it should be encoded. 43260Sstevel@tonic-gate */ 43270Sstevel@tonic-gate if (ch_flt->flt_type == CPU_IC_PARITY) 43280Sstevel@tonic-gate ways_to_check = CH_ICACHE_NWAY; 43290Sstevel@tonic-gate else 43300Sstevel@tonic-gate ways_to_check = 1; 43310Sstevel@tonic-gate for (i = 0; i < ways_to_check; i++) { 43320Sstevel@tonic-gate if (ch_flt->flt_type == CPU_IC_PARITY) 43330Sstevel@tonic-gate icp = &ch_flt->parity_data.ipe.cpl_ic[i]; 43340Sstevel@tonic-gate else 43350Sstevel@tonic-gate icp = &ch_flt->flt_diag_data.chd_ic_data; 43360Sstevel@tonic-gate if (icp->ic_logflag == IC_LOGFLAG_MAGIC) { 43370Sstevel@tonic-gate bcopy(icp, &icdata[ways_logged], 43385219Skm84432 sizeof (ch_ic_data_t)); 43390Sstevel@tonic-gate ways_logged++; 43400Sstevel@tonic-gate } 43410Sstevel@tonic-gate } 43420Sstevel@tonic-gate 43430Sstevel@tonic-gate /* 43440Sstevel@tonic-gate * Add the icache data to the payload. 43450Sstevel@tonic-gate */ 43460Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS, 43470Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 43480Sstevel@tonic-gate if (ways_logged != 0) { 43490Sstevel@tonic-gate nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged; 43500Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA, 43510Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL); 43520Sstevel@tonic-gate } 43530Sstevel@tonic-gate } 43540Sstevel@tonic-gate 43550Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 43560Sstevel@tonic-gate 43570Sstevel@tonic-gate /* 43580Sstevel@tonic-gate * Add ecache data to payload. 43590Sstevel@tonic-gate */ 43600Sstevel@tonic-gate static void 43610Sstevel@tonic-gate cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl) 43620Sstevel@tonic-gate { 43630Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 43640Sstevel@tonic-gate ch_ec_data_t *ecp; 43650Sstevel@tonic-gate ch_ec_data_t ecdata[CHD_EC_DATA_SETS]; 43660Sstevel@tonic-gate uint_t nelem; 43670Sstevel@tonic-gate int i, ways_logged = 0; 43680Sstevel@tonic-gate 43690Sstevel@tonic-gate /* 43700Sstevel@tonic-gate * Check each way to see if it should be encoded 43710Sstevel@tonic-gate * and concatinate it into a temporary buffer. 43720Sstevel@tonic-gate */ 43730Sstevel@tonic-gate for (i = 0; i < CHD_EC_DATA_SETS; i++) { 43740Sstevel@tonic-gate ecp = &ch_flt->flt_diag_data.chd_ec_data[i]; 43750Sstevel@tonic-gate if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 43760Sstevel@tonic-gate bcopy(ecp, &ecdata[ways_logged], 43775219Skm84432 sizeof (ch_ec_data_t)); 43780Sstevel@tonic-gate ways_logged++; 43790Sstevel@tonic-gate } 43800Sstevel@tonic-gate } 43810Sstevel@tonic-gate 43820Sstevel@tonic-gate /* 43830Sstevel@tonic-gate * Panther CPUs have an additional level of cache and so 43840Sstevel@tonic-gate * what we just collected was the L3 (ecache) and not the 43850Sstevel@tonic-gate * L2 cache. 43860Sstevel@tonic-gate */ 43870Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 43880Sstevel@tonic-gate /* 43890Sstevel@tonic-gate * Add the L3 (ecache) data to the payload. 43900Sstevel@tonic-gate */ 43910Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS, 43920Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 43930Sstevel@tonic-gate if (ways_logged != 0) { 43940Sstevel@tonic-gate nelem = sizeof (ch_ec_data_t) / 43950Sstevel@tonic-gate sizeof (uint64_t) * ways_logged; 43960Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA, 43970Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, 43980Sstevel@tonic-gate (uint64_t *)ecdata, NULL); 43990Sstevel@tonic-gate } 44000Sstevel@tonic-gate 44010Sstevel@tonic-gate /* 44020Sstevel@tonic-gate * Now collect the L2 cache. 44030Sstevel@tonic-gate */ 44040Sstevel@tonic-gate ways_logged = 0; 44050Sstevel@tonic-gate for (i = 0; i < PN_L2_NWAYS; i++) { 44060Sstevel@tonic-gate ecp = &ch_flt->flt_diag_data.chd_l2_data[i]; 44070Sstevel@tonic-gate if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 44080Sstevel@tonic-gate bcopy(ecp, &ecdata[ways_logged], 44090Sstevel@tonic-gate sizeof (ch_ec_data_t)); 44100Sstevel@tonic-gate ways_logged++; 44110Sstevel@tonic-gate } 44120Sstevel@tonic-gate } 44130Sstevel@tonic-gate } 44140Sstevel@tonic-gate 44150Sstevel@tonic-gate /* 44160Sstevel@tonic-gate * Add the L2 cache data to the payload. 44170Sstevel@tonic-gate */ 44180Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS, 44190Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 44200Sstevel@tonic-gate if (ways_logged != 0) { 44210Sstevel@tonic-gate nelem = sizeof (ch_ec_data_t) / 44225219Skm84432 sizeof (uint64_t) * ways_logged; 44230Sstevel@tonic-gate fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA, 44240Sstevel@tonic-gate DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); 44250Sstevel@tonic-gate } 44260Sstevel@tonic-gate } 44270Sstevel@tonic-gate 44280Sstevel@tonic-gate /* 44292381Smikechr * Initialize cpu scheme for specified cpu. 44302381Smikechr */ 44312381Smikechr static void 44322381Smikechr cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid) 44332381Smikechr { 44342381Smikechr char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 44352381Smikechr uint8_t mask; 44362381Smikechr 44372381Smikechr mask = cpunodes[cpuid].version; 44382381Smikechr (void) snprintf(sbuf, sizeof (sbuf), "%llX", 44392381Smikechr (u_longlong_t)cpunodes[cpuid].device_id); 44402381Smikechr (void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL, 44412381Smikechr cpuid, &mask, (const char *)sbuf); 44422381Smikechr } 44432381Smikechr 44442381Smikechr /* 44452381Smikechr * Returns ereport resource type. 44462381Smikechr */ 44472381Smikechr static int 44482381Smikechr cpu_error_to_resource_type(struct async_flt *aflt) 44492381Smikechr { 44502381Smikechr ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 44512381Smikechr 44522381Smikechr switch (ch_flt->flt_type) { 44532381Smikechr 44542381Smikechr case CPU_CE_ECACHE: 44552381Smikechr case CPU_UE_ECACHE: 44562381Smikechr case CPU_UE_ECACHE_RETIRE: 44572381Smikechr case CPU_ORPH: 44582381Smikechr /* 44592381Smikechr * If AFSR error bit indicates L2$ Data for Cheetah, 44602381Smikechr * Cheetah+ or Jaguar, or L3$ Data for Panther, return 44612381Smikechr * E$ Data type, otherwise, return CPU type. 44622381Smikechr */ 44632381Smikechr if (cpu_error_is_ecache_data(aflt->flt_inst, 44642381Smikechr ch_flt->flt_bit)) 44652381Smikechr return (ERRTYPE_ECACHE_DATA); 44662381Smikechr return (ERRTYPE_CPU); 44672381Smikechr 44682381Smikechr case CPU_CE: 44692381Smikechr case CPU_UE: 44702381Smikechr case CPU_EMC: 44712381Smikechr case CPU_DUE: 44722381Smikechr case CPU_RCE: 44732381Smikechr case CPU_RUE: 44742381Smikechr case CPU_FRC: 44752381Smikechr case CPU_FRU: 44762381Smikechr return (ERRTYPE_MEMORY); 44772381Smikechr 44782381Smikechr case CPU_IC_PARITY: 44792381Smikechr case CPU_DC_PARITY: 44802381Smikechr case CPU_FPUERR: 44812381Smikechr case CPU_PC_PARITY: 44822381Smikechr case CPU_ITLB_PARITY: 44832381Smikechr case CPU_DTLB_PARITY: 44842381Smikechr return (ERRTYPE_CPU); 44852381Smikechr } 44862381Smikechr return (ERRTYPE_UNKNOWN); 44872381Smikechr } 44882381Smikechr 44892381Smikechr /* 44900Sstevel@tonic-gate * Encode the data saved in the ch_async_flt_t struct into 44910Sstevel@tonic-gate * the FM ereport payload. 44920Sstevel@tonic-gate */ 44930Sstevel@tonic-gate static void 44940Sstevel@tonic-gate cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 44950Sstevel@tonic-gate nvlist_t *resource, int *afar_status, int *synd_status) 44960Sstevel@tonic-gate { 44970Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 44980Sstevel@tonic-gate *synd_status = AFLT_STAT_INVALID; 44990Sstevel@tonic-gate *afar_status = AFLT_STAT_INVALID; 45000Sstevel@tonic-gate 45010Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) { 45020Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR, 45030Sstevel@tonic-gate DATA_TYPE_UINT64, aflt->flt_stat, NULL); 45040Sstevel@tonic-gate } 45050Sstevel@tonic-gate 45060Sstevel@tonic-gate if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) && 45070Sstevel@tonic-gate IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 45080Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT, 45090Sstevel@tonic-gate DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL); 45100Sstevel@tonic-gate } 45110Sstevel@tonic-gate 45120Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) { 45130Sstevel@tonic-gate *afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 45140Sstevel@tonic-gate ch_flt->flt_bit); 45150Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, 45160Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL); 45170Sstevel@tonic-gate } 45180Sstevel@tonic-gate 45190Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) { 45200Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR, 45210Sstevel@tonic-gate DATA_TYPE_UINT64, aflt->flt_addr, NULL); 45220Sstevel@tonic-gate } 45230Sstevel@tonic-gate 45240Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 45250Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 45260Sstevel@tonic-gate DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 45270Sstevel@tonic-gate } 45280Sstevel@tonic-gate 45290Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 45300Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 45310Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 45320Sstevel@tonic-gate } 45330Sstevel@tonic-gate 45340Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 45350Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 45360Sstevel@tonic-gate DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 45370Sstevel@tonic-gate } 45380Sstevel@tonic-gate 45390Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 45400Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 45410Sstevel@tonic-gate DATA_TYPE_BOOLEAN_VALUE, 45420Sstevel@tonic-gate (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 45430Sstevel@tonic-gate } 45440Sstevel@tonic-gate 45450Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) { 45460Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME, 45470Sstevel@tonic-gate DATA_TYPE_BOOLEAN_VALUE, 45480Sstevel@tonic-gate (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL); 45490Sstevel@tonic-gate } 45500Sstevel@tonic-gate 45510Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) { 45520Sstevel@tonic-gate *synd_status = afsr_to_synd_status(aflt->flt_inst, 45530Sstevel@tonic-gate ch_flt->afsr_errs, ch_flt->flt_bit); 45540Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, 45550Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL); 45560Sstevel@tonic-gate } 45570Sstevel@tonic-gate 45580Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) { 45590Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, 45600Sstevel@tonic-gate DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL); 45610Sstevel@tonic-gate } 45620Sstevel@tonic-gate 45630Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) { 45640Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, 45650Sstevel@tonic-gate DATA_TYPE_STRING, flt_to_error_type(aflt), NULL); 45660Sstevel@tonic-gate } 45670Sstevel@tonic-gate 45680Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) { 45690Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 45700Sstevel@tonic-gate DATA_TYPE_UINT64, aflt->flt_disp, NULL); 45710Sstevel@tonic-gate } 45720Sstevel@tonic-gate 45730Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2) 45740Sstevel@tonic-gate cpu_payload_add_ecache(aflt, payload); 45750Sstevel@tonic-gate 45760Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) { 45770Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION, 45780Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL); 45790Sstevel@tonic-gate } 45800Sstevel@tonic-gate 45810Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) { 45820Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED, 45830Sstevel@tonic-gate DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL); 45840Sstevel@tonic-gate } 45850Sstevel@tonic-gate 45860Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) { 45870Sstevel@tonic-gate fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK, 45880Sstevel@tonic-gate DATA_TYPE_UINT32_ARRAY, 16, 45890Sstevel@tonic-gate (uint32_t *)&ch_flt->flt_fpdata, NULL); 45900Sstevel@tonic-gate } 45910Sstevel@tonic-gate 45920Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 45930Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D) 45940Sstevel@tonic-gate cpu_payload_add_dcache(aflt, payload); 45950Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I) 45960Sstevel@tonic-gate cpu_payload_add_icache(aflt, payload); 45970Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 45980Sstevel@tonic-gate 45990Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 46000Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P) 46010Sstevel@tonic-gate cpu_payload_add_pcache(aflt, payload); 46020Sstevel@tonic-gate if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB) 46030Sstevel@tonic-gate cpu_payload_add_tlb(aflt, payload); 46040Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 46050Sstevel@tonic-gate /* 46060Sstevel@tonic-gate * Create the FMRI that goes into the payload 46070Sstevel@tonic-gate * and contains the unum info if necessary. 46080Sstevel@tonic-gate */ 46092381Smikechr if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) { 46101186Sayznaga char unum[UNUM_NAMLEN] = ""; 46111186Sayznaga char sid[DIMM_SERIAL_ID_LEN] = ""; 46122436Smb91622 int len, ret, rtype, synd_code; 46132381Smikechr uint64_t offset = (uint64_t)-1; 46142381Smikechr 46152381Smikechr rtype = cpu_error_to_resource_type(aflt); 46162381Smikechr switch (rtype) { 46172381Smikechr 46182381Smikechr case ERRTYPE_MEMORY: 46192381Smikechr case ERRTYPE_ECACHE_DATA: 46202381Smikechr 46212381Smikechr /* 46222381Smikechr * Memory errors, do unum lookup 46232381Smikechr */ 46242381Smikechr if (*afar_status == AFLT_STAT_INVALID) 46252381Smikechr break; 46262381Smikechr 46272381Smikechr if (rtype == ERRTYPE_ECACHE_DATA) 46282381Smikechr aflt->flt_status |= ECC_ECACHE; 46292381Smikechr else 46302381Smikechr aflt->flt_status &= ~ECC_ECACHE; 46312381Smikechr 46322436Smb91622 synd_code = synd_to_synd_code(*synd_status, 46332436Smb91622 aflt->flt_synd, ch_flt->flt_bit); 46342436Smb91622 46352436Smb91622 if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0) 46362381Smikechr break; 46371186Sayznaga 46381186Sayznaga ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN, 46391186Sayznaga &len); 46401186Sayznaga 46411186Sayznaga if (ret == 0) { 46421186Sayznaga (void) cpu_get_mem_offset(aflt->flt_addr, 46431186Sayznaga &offset); 46441186Sayznaga } 46451186Sayznaga 46460Sstevel@tonic-gate fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 46471186Sayznaga NULL, unum, (ret == 0) ? sid : NULL, offset); 46480Sstevel@tonic-gate fm_payload_set(payload, 46490Sstevel@tonic-gate FM_EREPORT_PAYLOAD_NAME_RESOURCE, 46500Sstevel@tonic-gate DATA_TYPE_NVLIST, resource, NULL); 46512381Smikechr break; 46522381Smikechr 46532381Smikechr case ERRTYPE_CPU: 46542381Smikechr /* 46552381Smikechr * On-board processor array error, add cpu resource. 46562381Smikechr */ 46572381Smikechr cpu_fmri_cpu_set(resource, aflt->flt_inst); 46582381Smikechr fm_payload_set(payload, 46592381Smikechr FM_EREPORT_PAYLOAD_NAME_RESOURCE, 46602381Smikechr DATA_TYPE_NVLIST, resource, NULL); 46612381Smikechr break; 46620Sstevel@tonic-gate } 46630Sstevel@tonic-gate } 46640Sstevel@tonic-gate } 46650Sstevel@tonic-gate 46660Sstevel@tonic-gate /* 46670Sstevel@tonic-gate * Initialize the way info if necessary. 46680Sstevel@tonic-gate */ 46690Sstevel@tonic-gate void 46700Sstevel@tonic-gate cpu_ereport_init(struct async_flt *aflt) 46710Sstevel@tonic-gate { 46720Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 46730Sstevel@tonic-gate ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 46740Sstevel@tonic-gate ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0]; 46750Sstevel@tonic-gate int i; 46760Sstevel@tonic-gate 46770Sstevel@tonic-gate /* 46780Sstevel@tonic-gate * Initialize the info in the CPU logout structure. 46790Sstevel@tonic-gate * The I$/D$ way information is not initialized here 46800Sstevel@tonic-gate * since it is captured in the logout assembly code. 46810Sstevel@tonic-gate */ 46820Sstevel@tonic-gate for (i = 0; i < CHD_EC_DATA_SETS; i++) 46830Sstevel@tonic-gate (ecp + i)->ec_way = i; 46840Sstevel@tonic-gate 46850Sstevel@tonic-gate for (i = 0; i < PN_L2_NWAYS; i++) 46860Sstevel@tonic-gate (l2p + i)->ec_way = i; 46870Sstevel@tonic-gate } 46880Sstevel@tonic-gate 46890Sstevel@tonic-gate /* 46900Sstevel@tonic-gate * Returns whether fault address is valid for this error bit and 46910Sstevel@tonic-gate * whether the address is "in memory" (i.e. pf_is_memory returns 1). 46920Sstevel@tonic-gate */ 46930Sstevel@tonic-gate int 46940Sstevel@tonic-gate cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) 46950Sstevel@tonic-gate { 46960Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 46970Sstevel@tonic-gate 46982381Smikechr return ((t_afsr_bit & C_AFSR_MEMORY) && 46990Sstevel@tonic-gate afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) == 47000Sstevel@tonic-gate AFLT_STAT_VALID && 47010Sstevel@tonic-gate pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 47020Sstevel@tonic-gate } 47030Sstevel@tonic-gate 47042436Smb91622 /* 47052436Smb91622 * Returns whether fault address is valid based on the error bit for the 47062436Smb91622 * one event being queued and whether the address is "in memory". 47072436Smb91622 */ 47082436Smb91622 static int 47092436Smb91622 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) 47102436Smb91622 { 47112436Smb91622 struct async_flt *aflt = (struct async_flt *)ch_flt; 47122436Smb91622 int afar_status; 47132436Smb91622 uint64_t afsr_errs, afsr_ow, *ow_bits; 47142436Smb91622 47152436Smb91622 if (!(t_afsr_bit & C_AFSR_MEMORY) || 47162436Smb91622 !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) 47172436Smb91622 return (0); 47182436Smb91622 47192436Smb91622 afsr_errs = ch_flt->afsr_errs; 47202436Smb91622 afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit); 47212436Smb91622 47222436Smb91622 switch (afar_status) { 47232436Smb91622 case AFLT_STAT_VALID: 47242436Smb91622 return (1); 47252436Smb91622 47262436Smb91622 case AFLT_STAT_AMBIGUOUS: 47272436Smb91622 /* 47282436Smb91622 * Status is ambiguous since another error bit (or bits) 47292436Smb91622 * of equal priority to the specified bit on in the afsr, 47302436Smb91622 * so check those bits. Return 1 only if the bits on in the 47312436Smb91622 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits. 47322436Smb91622 * Otherwise not all the equal priority bits are for memory 47332436Smb91622 * errors, so return 0. 47342436Smb91622 */ 47352436Smb91622 ow_bits = afar_overwrite; 47362436Smb91622 while ((afsr_ow = *ow_bits++) != 0) { 47372436Smb91622 /* 47382436Smb91622 * Get other bits that are on in t_afsr_bit's priority 47392436Smb91622 * class to check for Memory Error bits only. 47402436Smb91622 */ 47412436Smb91622 if (afsr_ow & t_afsr_bit) { 47422436Smb91622 if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY) 47432436Smb91622 return (0); 47442436Smb91622 else 47452436Smb91622 return (1); 47462436Smb91622 } 47472436Smb91622 } 47482436Smb91622 /*FALLTHRU*/ 47492436Smb91622 47502436Smb91622 default: 47512436Smb91622 return (0); 47522436Smb91622 } 47532436Smb91622 } 47542436Smb91622 47550Sstevel@tonic-gate static void 47560Sstevel@tonic-gate cpu_log_diag_info(ch_async_flt_t *ch_flt) 47570Sstevel@tonic-gate { 47580Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 47590Sstevel@tonic-gate ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data; 47600Sstevel@tonic-gate ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data; 47610Sstevel@tonic-gate ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 47620Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC) 47630Sstevel@tonic-gate int i, nway; 47640Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */ 47650Sstevel@tonic-gate 47660Sstevel@tonic-gate /* 47670Sstevel@tonic-gate * Check if the CPU log out captured was valid. 47680Sstevel@tonic-gate */ 47690Sstevel@tonic-gate if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID || 47700Sstevel@tonic-gate ch_flt->flt_data_incomplete) 47710Sstevel@tonic-gate return; 47720Sstevel@tonic-gate 47730Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC) 47740Sstevel@tonic-gate nway = cpu_ecache_nway(); 47750Sstevel@tonic-gate i = cpu_ecache_line_valid(ch_flt); 47760Sstevel@tonic-gate if (i == 0 || i > nway) { 47770Sstevel@tonic-gate for (i = 0; i < nway; i++) 47780Sstevel@tonic-gate ecp[i].ec_logflag = EC_LOGFLAG_MAGIC; 47790Sstevel@tonic-gate } else 47800Sstevel@tonic-gate ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC; 47810Sstevel@tonic-gate #else /* CPU_IMP_ECACHE_ASSOC */ 47820Sstevel@tonic-gate ecp->ec_logflag = EC_LOGFLAG_MAGIC; 47830Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */ 47840Sstevel@tonic-gate 47850Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 47860Sstevel@tonic-gate pn_cpu_log_diag_l2_info(ch_flt); 47870Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 47880Sstevel@tonic-gate 47890Sstevel@tonic-gate if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) { 47900Sstevel@tonic-gate dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx); 47910Sstevel@tonic-gate dcp->dc_logflag = DC_LOGFLAG_MAGIC; 47920Sstevel@tonic-gate } 47930Sstevel@tonic-gate 47940Sstevel@tonic-gate if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) { 47950Sstevel@tonic-gate if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 47960Sstevel@tonic-gate icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx); 47970Sstevel@tonic-gate else 47980Sstevel@tonic-gate icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx); 47990Sstevel@tonic-gate icp->ic_logflag = IC_LOGFLAG_MAGIC; 48000Sstevel@tonic-gate } 48010Sstevel@tonic-gate } 48020Sstevel@tonic-gate 48030Sstevel@tonic-gate /* 48040Sstevel@tonic-gate * Cheetah ECC calculation. 48050Sstevel@tonic-gate * 48060Sstevel@tonic-gate * We only need to do the calculation on the data bits and can ignore check 48070Sstevel@tonic-gate * bit and Mtag bit terms in the calculation. 48080Sstevel@tonic-gate */ 48090Sstevel@tonic-gate static uint64_t ch_ecc_table[9][2] = { 48100Sstevel@tonic-gate /* 48110Sstevel@tonic-gate * low order 64-bits high-order 64-bits 48120Sstevel@tonic-gate */ 48130Sstevel@tonic-gate { 0x46bffffeccd1177f, 0x488800022100014c }, 48140Sstevel@tonic-gate { 0x42fccc81331ff77f, 0x14424f1010249184 }, 48150Sstevel@tonic-gate { 0x8898827c222f1ffe, 0x22c1222808184aaf }, 48160Sstevel@tonic-gate { 0xf7632203e131ccf1, 0xe1241121848292b8 }, 48170Sstevel@tonic-gate { 0x7f5511421b113809, 0x901c88d84288aafe }, 48180Sstevel@tonic-gate { 0x1d49412184882487, 0x8f338c87c044c6ef }, 48190Sstevel@tonic-gate { 0xf552181014448344, 0x7ff8f4443e411911 }, 48200Sstevel@tonic-gate { 0x2189240808f24228, 0xfeeff8cc81333f42 }, 48210Sstevel@tonic-gate { 0x3280008440001112, 0xfee88b337ffffd62 }, 48220Sstevel@tonic-gate }; 48230Sstevel@tonic-gate 48240Sstevel@tonic-gate /* 48250Sstevel@tonic-gate * 64-bit population count, use well-known popcnt trick. 48260Sstevel@tonic-gate * We could use the UltraSPARC V9 POPC instruction, but some 48270Sstevel@tonic-gate * CPUs including Cheetahplus and Jaguar do not support that 48280Sstevel@tonic-gate * instruction. 48290Sstevel@tonic-gate */ 48300Sstevel@tonic-gate int 48310Sstevel@tonic-gate popc64(uint64_t val) 48320Sstevel@tonic-gate { 48330Sstevel@tonic-gate int cnt; 48340Sstevel@tonic-gate 48350Sstevel@tonic-gate for (cnt = 0; val != 0; val &= val - 1) 48360Sstevel@tonic-gate cnt++; 48370Sstevel@tonic-gate return (cnt); 48380Sstevel@tonic-gate } 48390Sstevel@tonic-gate 48400Sstevel@tonic-gate /* 48410Sstevel@tonic-gate * Generate the 9 ECC bits for the 128-bit chunk based on the table above. 48420Sstevel@tonic-gate * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number 48430Sstevel@tonic-gate * of 1 bits == 0, so we can just use the least significant bit of the popcnt 48440Sstevel@tonic-gate * instead of doing all the xor's. 48450Sstevel@tonic-gate */ 48460Sstevel@tonic-gate uint32_t 48470Sstevel@tonic-gate us3_gen_ecc(uint64_t data_low, uint64_t data_high) 48480Sstevel@tonic-gate { 48490Sstevel@tonic-gate int bitno, s; 48500Sstevel@tonic-gate int synd = 0; 48510Sstevel@tonic-gate 48520Sstevel@tonic-gate for (bitno = 0; bitno < 9; bitno++) { 48530Sstevel@tonic-gate s = (popc64(data_low & ch_ecc_table[bitno][0]) + 48540Sstevel@tonic-gate popc64(data_high & ch_ecc_table[bitno][1])) & 1; 48550Sstevel@tonic-gate synd |= (s << bitno); 48560Sstevel@tonic-gate } 48570Sstevel@tonic-gate return (synd); 48580Sstevel@tonic-gate 48590Sstevel@tonic-gate } 48600Sstevel@tonic-gate 48610Sstevel@tonic-gate /* 48620Sstevel@tonic-gate * Queue one event based on ecc_type_to_info entry. If the event has an AFT1 48630Sstevel@tonic-gate * tag associated with it or is a fatal event (aflt_panic set), it is sent to 48640Sstevel@tonic-gate * the UE event queue. Otherwise it is dispatched to the CE event queue. 48650Sstevel@tonic-gate */ 48660Sstevel@tonic-gate static void 48670Sstevel@tonic-gate cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 48680Sstevel@tonic-gate ecc_type_to_info_t *eccp, ch_diag_data_t *cdp) 48690Sstevel@tonic-gate { 48700Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 48710Sstevel@tonic-gate 48720Sstevel@tonic-gate if (reason && 48730Sstevel@tonic-gate strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 48740Sstevel@tonic-gate (void) strcat(reason, eccp->ec_reason); 48750Sstevel@tonic-gate } 48760Sstevel@tonic-gate 48770Sstevel@tonic-gate ch_flt->flt_bit = eccp->ec_afsr_bit; 48780Sstevel@tonic-gate ch_flt->flt_type = eccp->ec_flt_type; 48790Sstevel@tonic-gate if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID) 48800Sstevel@tonic-gate ch_flt->flt_diag_data = *cdp; 48810Sstevel@tonic-gate else 48820Sstevel@tonic-gate ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 48832436Smb91622 aflt->flt_in_memory = 48842436Smb91622 cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit); 48850Sstevel@tonic-gate 48860Sstevel@tonic-gate if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS) 48870Sstevel@tonic-gate aflt->flt_synd = GET_M_SYND(aflt->flt_stat); 48880Sstevel@tonic-gate else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) 48890Sstevel@tonic-gate aflt->flt_synd = GET_E_SYND(aflt->flt_stat); 48900Sstevel@tonic-gate else 48910Sstevel@tonic-gate aflt->flt_synd = 0; 48920Sstevel@tonic-gate 48930Sstevel@tonic-gate aflt->flt_payload = eccp->ec_err_payload; 48940Sstevel@tonic-gate 48950Sstevel@tonic-gate if (aflt->flt_panic || (eccp->ec_afsr_bit & 48960Sstevel@tonic-gate (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1))) 48970Sstevel@tonic-gate cpu_errorq_dispatch(eccp->ec_err_class, 48980Sstevel@tonic-gate (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 48990Sstevel@tonic-gate aflt->flt_panic); 49000Sstevel@tonic-gate else 49010Sstevel@tonic-gate cpu_errorq_dispatch(eccp->ec_err_class, 49020Sstevel@tonic-gate (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue, 49030Sstevel@tonic-gate aflt->flt_panic); 49040Sstevel@tonic-gate } 49050Sstevel@tonic-gate 49060Sstevel@tonic-gate /* 49070Sstevel@tonic-gate * Queue events on async event queue one event per error bit. First we 49080Sstevel@tonic-gate * queue the events that we "expect" for the given trap, then we queue events 49090Sstevel@tonic-gate * that we may not expect. Return number of events queued. 49100Sstevel@tonic-gate */ 49110Sstevel@tonic-gate int 49120Sstevel@tonic-gate cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs, 49130Sstevel@tonic-gate ch_cpu_logout_t *clop) 49140Sstevel@tonic-gate { 49150Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 49160Sstevel@tonic-gate ecc_type_to_info_t *eccp; 49170Sstevel@tonic-gate int nevents = 0; 49180Sstevel@tonic-gate uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat; 49190Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 49200Sstevel@tonic-gate uint64_t orig_t_afsr_errs; 49210Sstevel@tonic-gate #endif 49220Sstevel@tonic-gate uint64_t primary_afsr_ext = ch_flt->afsr_ext; 49230Sstevel@tonic-gate uint64_t primary_afsr_errs = ch_flt->afsr_errs; 49240Sstevel@tonic-gate ch_diag_data_t *cdp = NULL; 49250Sstevel@tonic-gate 49260Sstevel@tonic-gate t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS); 49270Sstevel@tonic-gate 49280Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 49290Sstevel@tonic-gate orig_t_afsr_errs = t_afsr_errs; 49300Sstevel@tonic-gate 49310Sstevel@tonic-gate /* 49320Sstevel@tonic-gate * For Cheetah+, log the shadow AFSR/AFAR bits first. 49330Sstevel@tonic-gate */ 49340Sstevel@tonic-gate if (clop != NULL) { 49350Sstevel@tonic-gate /* 49360Sstevel@tonic-gate * Set the AFSR and AFAR fields to the shadow registers. The 49370Sstevel@tonic-gate * flt_addr and flt_stat fields will be reset to the primaries 49380Sstevel@tonic-gate * below, but the sdw_addr and sdw_stat will stay as the 49390Sstevel@tonic-gate * secondaries. 49400Sstevel@tonic-gate */ 49410Sstevel@tonic-gate cdp = &clop->clo_sdw_data; 49420Sstevel@tonic-gate aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar; 49430Sstevel@tonic-gate aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr; 49440Sstevel@tonic-gate ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext; 49450Sstevel@tonic-gate ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 49460Sstevel@tonic-gate (cdp->chd_afsr & C_AFSR_ALL_ERRS); 49470Sstevel@tonic-gate 49480Sstevel@tonic-gate /* 49490Sstevel@tonic-gate * If the primary and shadow AFSR differ, tag the shadow as 49500Sstevel@tonic-gate * the first fault. 49510Sstevel@tonic-gate */ 49520Sstevel@tonic-gate if ((primary_afar != cdp->chd_afar) || 49530Sstevel@tonic-gate (primary_afsr_errs != ch_flt->afsr_errs)) { 49540Sstevel@tonic-gate aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT); 49550Sstevel@tonic-gate } 49560Sstevel@tonic-gate 49570Sstevel@tonic-gate /* 49580Sstevel@tonic-gate * Check AFSR bits as well as AFSR_EXT bits in order of 49590Sstevel@tonic-gate * the AFAR overwrite priority. Our stored AFSR_EXT value 49600Sstevel@tonic-gate * is expected to be zero for those CPUs which do not have 49610Sstevel@tonic-gate * an AFSR_EXT register. 49620Sstevel@tonic-gate */ 49630Sstevel@tonic-gate for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) { 49640Sstevel@tonic-gate if ((eccp->ec_afsr_bit & 49650Sstevel@tonic-gate (ch_flt->afsr_errs & t_afsr_errs)) && 49660Sstevel@tonic-gate ((eccp->ec_flags & aflt->flt_status) != 0)) { 49670Sstevel@tonic-gate cpu_queue_one_event(ch_flt, reason, eccp, cdp); 49680Sstevel@tonic-gate cdp = NULL; 49690Sstevel@tonic-gate t_afsr_errs &= ~eccp->ec_afsr_bit; 49700Sstevel@tonic-gate nevents++; 49710Sstevel@tonic-gate } 49720Sstevel@tonic-gate } 49730Sstevel@tonic-gate 49740Sstevel@tonic-gate /* 49750Sstevel@tonic-gate * If the ME bit is on in the primary AFSR turn all the 49760Sstevel@tonic-gate * error bits on again that may set the ME bit to make 49770Sstevel@tonic-gate * sure we see the ME AFSR error logs. 49780Sstevel@tonic-gate */ 49790Sstevel@tonic-gate if ((primary_afsr & C_AFSR_ME) != 0) 49800Sstevel@tonic-gate t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS); 49810Sstevel@tonic-gate } 49820Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 49830Sstevel@tonic-gate 49840Sstevel@tonic-gate if (clop != NULL) 49850Sstevel@tonic-gate cdp = &clop->clo_data; 49860Sstevel@tonic-gate 49870Sstevel@tonic-gate /* 49880Sstevel@tonic-gate * Queue expected errors, error bit and fault type must match 49890Sstevel@tonic-gate * in the ecc_type_to_info table. 49900Sstevel@tonic-gate */ 49910Sstevel@tonic-gate for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 49920Sstevel@tonic-gate eccp++) { 49930Sstevel@tonic-gate if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 49940Sstevel@tonic-gate (eccp->ec_flags & aflt->flt_status) != 0) { 49950Sstevel@tonic-gate #if defined(SERRANO) 49960Sstevel@tonic-gate /* 49970Sstevel@tonic-gate * For FRC/FRU errors on Serrano the afar2 captures 49980Sstevel@tonic-gate * the address and the associated data is 49990Sstevel@tonic-gate * in the shadow logout area. 50000Sstevel@tonic-gate */ 50010Sstevel@tonic-gate if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 50020Sstevel@tonic-gate if (clop != NULL) 50030Sstevel@tonic-gate cdp = &clop->clo_sdw_data; 50040Sstevel@tonic-gate aflt->flt_addr = ch_flt->afar2; 50050Sstevel@tonic-gate } else { 50060Sstevel@tonic-gate if (clop != NULL) 50070Sstevel@tonic-gate cdp = &clop->clo_data; 50080Sstevel@tonic-gate aflt->flt_addr = primary_afar; 50090Sstevel@tonic-gate } 50100Sstevel@tonic-gate #else /* SERRANO */ 50110Sstevel@tonic-gate aflt->flt_addr = primary_afar; 50120Sstevel@tonic-gate #endif /* SERRANO */ 50130Sstevel@tonic-gate aflt->flt_stat = primary_afsr; 50140Sstevel@tonic-gate ch_flt->afsr_ext = primary_afsr_ext; 50150Sstevel@tonic-gate ch_flt->afsr_errs = primary_afsr_errs; 50160Sstevel@tonic-gate cpu_queue_one_event(ch_flt, reason, eccp, cdp); 50170Sstevel@tonic-gate cdp = NULL; 50180Sstevel@tonic-gate t_afsr_errs &= ~eccp->ec_afsr_bit; 50190Sstevel@tonic-gate nevents++; 50200Sstevel@tonic-gate } 50210Sstevel@tonic-gate } 50220Sstevel@tonic-gate 50230Sstevel@tonic-gate /* 50240Sstevel@tonic-gate * Queue unexpected errors, error bit only match. 50250Sstevel@tonic-gate */ 50260Sstevel@tonic-gate for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 50270Sstevel@tonic-gate eccp++) { 50280Sstevel@tonic-gate if (eccp->ec_afsr_bit & t_afsr_errs) { 50290Sstevel@tonic-gate #if defined(SERRANO) 50300Sstevel@tonic-gate /* 50310Sstevel@tonic-gate * For FRC/FRU errors on Serrano the afar2 captures 50320Sstevel@tonic-gate * the address and the associated data is 50330Sstevel@tonic-gate * in the shadow logout area. 50340Sstevel@tonic-gate */ 50350Sstevel@tonic-gate if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 50360Sstevel@tonic-gate if (clop != NULL) 50370Sstevel@tonic-gate cdp = &clop->clo_sdw_data; 50380Sstevel@tonic-gate aflt->flt_addr = ch_flt->afar2; 50390Sstevel@tonic-gate } else { 50400Sstevel@tonic-gate if (clop != NULL) 50410Sstevel@tonic-gate cdp = &clop->clo_data; 50420Sstevel@tonic-gate aflt->flt_addr = primary_afar; 50430Sstevel@tonic-gate } 50440Sstevel@tonic-gate #else /* SERRANO */ 50450Sstevel@tonic-gate aflt->flt_addr = primary_afar; 50460Sstevel@tonic-gate #endif /* SERRANO */ 50470Sstevel@tonic-gate aflt->flt_stat = primary_afsr; 50480Sstevel@tonic-gate ch_flt->afsr_ext = primary_afsr_ext; 50490Sstevel@tonic-gate ch_flt->afsr_errs = primary_afsr_errs; 50500Sstevel@tonic-gate cpu_queue_one_event(ch_flt, reason, eccp, cdp); 50510Sstevel@tonic-gate cdp = NULL; 50520Sstevel@tonic-gate t_afsr_errs &= ~eccp->ec_afsr_bit; 50530Sstevel@tonic-gate nevents++; 50540Sstevel@tonic-gate } 50550Sstevel@tonic-gate } 50560Sstevel@tonic-gate return (nevents); 50570Sstevel@tonic-gate } 50580Sstevel@tonic-gate 50590Sstevel@tonic-gate /* 50600Sstevel@tonic-gate * Return trap type number. 50610Sstevel@tonic-gate */ 50620Sstevel@tonic-gate uint8_t 50630Sstevel@tonic-gate flt_to_trap_type(struct async_flt *aflt) 50640Sstevel@tonic-gate { 50650Sstevel@tonic-gate if (aflt->flt_status & ECC_I_TRAP) 50660Sstevel@tonic-gate return (TRAP_TYPE_ECC_I); 50670Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 50680Sstevel@tonic-gate return (TRAP_TYPE_ECC_D); 50690Sstevel@tonic-gate if (aflt->flt_status & ECC_F_TRAP) 50700Sstevel@tonic-gate return (TRAP_TYPE_ECC_F); 50710Sstevel@tonic-gate if (aflt->flt_status & ECC_C_TRAP) 50720Sstevel@tonic-gate return (TRAP_TYPE_ECC_C); 50730Sstevel@tonic-gate if (aflt->flt_status & ECC_DP_TRAP) 50740Sstevel@tonic-gate return (TRAP_TYPE_ECC_DP); 50750Sstevel@tonic-gate if (aflt->flt_status & ECC_IP_TRAP) 50760Sstevel@tonic-gate return (TRAP_TYPE_ECC_IP); 50770Sstevel@tonic-gate if (aflt->flt_status & ECC_ITLB_TRAP) 50780Sstevel@tonic-gate return (TRAP_TYPE_ECC_ITLB); 50790Sstevel@tonic-gate if (aflt->flt_status & ECC_DTLB_TRAP) 50800Sstevel@tonic-gate return (TRAP_TYPE_ECC_DTLB); 50810Sstevel@tonic-gate return (TRAP_TYPE_UNKNOWN); 50820Sstevel@tonic-gate } 50830Sstevel@tonic-gate 50840Sstevel@tonic-gate /* 50850Sstevel@tonic-gate * Decide an error type based on detector and leaky/partner tests. 50860Sstevel@tonic-gate * The following array is used for quick translation - it must 50870Sstevel@tonic-gate * stay in sync with ce_dispact_t. 50880Sstevel@tonic-gate */ 50890Sstevel@tonic-gate 50900Sstevel@tonic-gate static char *cetypes[] = { 50910Sstevel@tonic-gate CE_DISP_DESC_U, 50920Sstevel@tonic-gate CE_DISP_DESC_I, 50930Sstevel@tonic-gate CE_DISP_DESC_PP, 50940Sstevel@tonic-gate CE_DISP_DESC_P, 50950Sstevel@tonic-gate CE_DISP_DESC_L, 50960Sstevel@tonic-gate CE_DISP_DESC_PS, 50970Sstevel@tonic-gate CE_DISP_DESC_S 50980Sstevel@tonic-gate }; 50990Sstevel@tonic-gate 51000Sstevel@tonic-gate char * 51010Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt) 51020Sstevel@tonic-gate { 51030Sstevel@tonic-gate ce_dispact_t dispact, disp; 51040Sstevel@tonic-gate uchar_t dtcrinfo, ptnrinfo, lkyinfo; 51050Sstevel@tonic-gate 51060Sstevel@tonic-gate /* 51070Sstevel@tonic-gate * The memory payload bundle is shared by some events that do 51080Sstevel@tonic-gate * not perform any classification. For those flt_disp will be 51090Sstevel@tonic-gate * 0 and we will return "unknown". 51100Sstevel@tonic-gate */ 51110Sstevel@tonic-gate if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0) 51120Sstevel@tonic-gate return (cetypes[CE_DISP_UNKNOWN]); 51130Sstevel@tonic-gate 51140Sstevel@tonic-gate dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 51150Sstevel@tonic-gate 51160Sstevel@tonic-gate /* 51170Sstevel@tonic-gate * It is also possible that no scrub/classification was performed 51180Sstevel@tonic-gate * by the detector, for instance where a disrupting error logged 51190Sstevel@tonic-gate * in the AFSR while CEEN was off in cpu_deferred_error. 51200Sstevel@tonic-gate */ 51210Sstevel@tonic-gate if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) 51220Sstevel@tonic-gate return (cetypes[CE_DISP_UNKNOWN]); 51230Sstevel@tonic-gate 51240Sstevel@tonic-gate /* 51250Sstevel@tonic-gate * Lookup type in initial classification/action table 51260Sstevel@tonic-gate */ 51270Sstevel@tonic-gate dispact = CE_DISPACT(ce_disp_table, 51280Sstevel@tonic-gate CE_XDIAG_AFARMATCHED(dtcrinfo), 51290Sstevel@tonic-gate CE_XDIAG_STATE(dtcrinfo), 51300Sstevel@tonic-gate CE_XDIAG_CE1SEEN(dtcrinfo), 51310Sstevel@tonic-gate CE_XDIAG_CE2SEEN(dtcrinfo)); 51320Sstevel@tonic-gate 51330Sstevel@tonic-gate /* 51340Sstevel@tonic-gate * A bad lookup is not something to panic production systems for. 51350Sstevel@tonic-gate */ 51360Sstevel@tonic-gate ASSERT(dispact != CE_DISP_BAD); 51370Sstevel@tonic-gate if (dispact == CE_DISP_BAD) 51380Sstevel@tonic-gate return (cetypes[CE_DISP_UNKNOWN]); 51390Sstevel@tonic-gate 51400Sstevel@tonic-gate disp = CE_DISP(dispact); 51410Sstevel@tonic-gate 51420Sstevel@tonic-gate switch (disp) { 51430Sstevel@tonic-gate case CE_DISP_UNKNOWN: 51440Sstevel@tonic-gate case CE_DISP_INTERMITTENT: 51450Sstevel@tonic-gate break; 51460Sstevel@tonic-gate 51470Sstevel@tonic-gate case CE_DISP_POSS_PERS: 51480Sstevel@tonic-gate /* 51490Sstevel@tonic-gate * "Possible persistent" errors to which we have applied a valid 51500Sstevel@tonic-gate * leaky test can be separated into "persistent" or "leaky". 51510Sstevel@tonic-gate */ 51520Sstevel@tonic-gate lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp); 51530Sstevel@tonic-gate if (CE_XDIAG_TESTVALID(lkyinfo)) { 51540Sstevel@tonic-gate if (CE_XDIAG_CE1SEEN(lkyinfo) || 51550Sstevel@tonic-gate CE_XDIAG_CE2SEEN(lkyinfo)) 51560Sstevel@tonic-gate disp = CE_DISP_LEAKY; 51570Sstevel@tonic-gate else 51580Sstevel@tonic-gate disp = CE_DISP_PERS; 51590Sstevel@tonic-gate } 51600Sstevel@tonic-gate break; 51610Sstevel@tonic-gate 51620Sstevel@tonic-gate case CE_DISP_POSS_STICKY: 51630Sstevel@tonic-gate /* 51640Sstevel@tonic-gate * Promote "possible sticky" results that have been 51650Sstevel@tonic-gate * confirmed by a partner test to "sticky". Unconfirmed 51660Sstevel@tonic-gate * "possible sticky" events are left at that status - we do not 51670Sstevel@tonic-gate * guess at any bad reader/writer etc status here. 51680Sstevel@tonic-gate */ 51690Sstevel@tonic-gate ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp); 51700Sstevel@tonic-gate if (CE_XDIAG_TESTVALID(ptnrinfo) && 51710Sstevel@tonic-gate CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo)) 51720Sstevel@tonic-gate disp = CE_DISP_STICKY; 51730Sstevel@tonic-gate 51740Sstevel@tonic-gate /* 51750Sstevel@tonic-gate * Promote "possible sticky" results on a uniprocessor 51760Sstevel@tonic-gate * to "sticky" 51770Sstevel@tonic-gate */ 51780Sstevel@tonic-gate if (disp == CE_DISP_POSS_STICKY && 51790Sstevel@tonic-gate CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC) 51800Sstevel@tonic-gate disp = CE_DISP_STICKY; 51810Sstevel@tonic-gate break; 51820Sstevel@tonic-gate 51830Sstevel@tonic-gate default: 51840Sstevel@tonic-gate disp = CE_DISP_UNKNOWN; 51850Sstevel@tonic-gate break; 51860Sstevel@tonic-gate } 51870Sstevel@tonic-gate 51880Sstevel@tonic-gate return (cetypes[disp]); 51890Sstevel@tonic-gate } 51900Sstevel@tonic-gate 51910Sstevel@tonic-gate /* 51920Sstevel@tonic-gate * Given the entire afsr, the specific bit to check and a prioritized list of 51930Sstevel@tonic-gate * error bits, determine the validity of the various overwrite priority 51940Sstevel@tonic-gate * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have 51950Sstevel@tonic-gate * different overwrite priorities. 51960Sstevel@tonic-gate * 51970Sstevel@tonic-gate * Given a specific afsr error bit and the entire afsr, there are three cases: 51980Sstevel@tonic-gate * INVALID: The specified bit is lower overwrite priority than some other 51990Sstevel@tonic-gate * error bit which is on in the afsr (or IVU/IVC). 52000Sstevel@tonic-gate * VALID: The specified bit is higher priority than all other error bits 52010Sstevel@tonic-gate * which are on in the afsr. 52020Sstevel@tonic-gate * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified 52030Sstevel@tonic-gate * bit is on in the afsr. 52040Sstevel@tonic-gate */ 52050Sstevel@tonic-gate int 52060Sstevel@tonic-gate afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits) 52070Sstevel@tonic-gate { 52080Sstevel@tonic-gate uint64_t afsr_ow; 52090Sstevel@tonic-gate 52100Sstevel@tonic-gate while ((afsr_ow = *ow_bits++) != 0) { 52110Sstevel@tonic-gate /* 52120Sstevel@tonic-gate * If bit is in the priority class, check to see if another 52130Sstevel@tonic-gate * bit in the same class is on => ambiguous. Otherwise, 52140Sstevel@tonic-gate * the value is valid. If the bit is not on at this priority 52150Sstevel@tonic-gate * class, but a higher priority bit is on, then the value is 52160Sstevel@tonic-gate * invalid. 52170Sstevel@tonic-gate */ 52180Sstevel@tonic-gate if (afsr_ow & afsr_bit) { 52190Sstevel@tonic-gate /* 52200Sstevel@tonic-gate * If equal pri bit is on, ambiguous. 52210Sstevel@tonic-gate */ 52220Sstevel@tonic-gate if (afsr & (afsr_ow & ~afsr_bit)) 52230Sstevel@tonic-gate return (AFLT_STAT_AMBIGUOUS); 52240Sstevel@tonic-gate return (AFLT_STAT_VALID); 52250Sstevel@tonic-gate } else if (afsr & afsr_ow) 52260Sstevel@tonic-gate break; 52270Sstevel@tonic-gate } 52280Sstevel@tonic-gate 52290Sstevel@tonic-gate /* 52300Sstevel@tonic-gate * We didn't find a match or a higher priority bit was on. Not 52310Sstevel@tonic-gate * finding a match handles the case of invalid AFAR for IVC, IVU. 52320Sstevel@tonic-gate */ 52330Sstevel@tonic-gate return (AFLT_STAT_INVALID); 52340Sstevel@tonic-gate } 52350Sstevel@tonic-gate 52360Sstevel@tonic-gate static int 52370Sstevel@tonic-gate afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit) 52380Sstevel@tonic-gate { 52390Sstevel@tonic-gate #if defined(SERRANO) 52400Sstevel@tonic-gate if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) 52410Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite)); 52420Sstevel@tonic-gate else 52430Sstevel@tonic-gate #endif /* SERRANO */ 52440Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite)); 52450Sstevel@tonic-gate } 52460Sstevel@tonic-gate 52470Sstevel@tonic-gate static int 52480Sstevel@tonic-gate afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit) 52490Sstevel@tonic-gate { 52500Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite)); 52510Sstevel@tonic-gate } 52520Sstevel@tonic-gate 52530Sstevel@tonic-gate static int 52540Sstevel@tonic-gate afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit) 52550Sstevel@tonic-gate { 52560Sstevel@tonic-gate return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite)); 52570Sstevel@tonic-gate } 52580Sstevel@tonic-gate 52590Sstevel@tonic-gate static int 52600Sstevel@tonic-gate afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit) 52610Sstevel@tonic-gate { 52620Sstevel@tonic-gate #ifdef lint 52630Sstevel@tonic-gate cpuid = cpuid; 52640Sstevel@tonic-gate #endif 52652436Smb91622 #if defined(CHEETAH_PLUS) 52662436Smb91622 /* 52672436Smb91622 * The M_SYND overwrite policy is combined with the E_SYND overwrite 52682436Smb91622 * policy for Cheetah+ and separate for Panther CPUs. 52692436Smb91622 */ 52700Sstevel@tonic-gate if (afsr_bit & C_AFSR_MSYND_ERRS) { 52712436Smb91622 if (IS_PANTHER(cpunodes[cpuid].implementation)) 52722436Smb91622 return (afsr_to_msynd_status(afsr, afsr_bit)); 52732436Smb91622 else 52742436Smb91622 return (afsr_to_esynd_status(afsr, afsr_bit)); 52750Sstevel@tonic-gate } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 52760Sstevel@tonic-gate if (IS_PANTHER(cpunodes[cpuid].implementation)) 52770Sstevel@tonic-gate return (afsr_to_pn_esynd_status(afsr, afsr_bit)); 52780Sstevel@tonic-gate else 52790Sstevel@tonic-gate return (afsr_to_esynd_status(afsr, afsr_bit)); 52800Sstevel@tonic-gate #else /* CHEETAH_PLUS */ 52812436Smb91622 if (afsr_bit & C_AFSR_MSYND_ERRS) { 52822436Smb91622 return (afsr_to_msynd_status(afsr, afsr_bit)); 52832436Smb91622 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 52840Sstevel@tonic-gate return (afsr_to_esynd_status(afsr, afsr_bit)); 52850Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 52860Sstevel@tonic-gate } else { 52870Sstevel@tonic-gate return (AFLT_STAT_INVALID); 52880Sstevel@tonic-gate } 52890Sstevel@tonic-gate } 52900Sstevel@tonic-gate 52910Sstevel@tonic-gate /* 52920Sstevel@tonic-gate * Slave CPU stick synchronization. 52930Sstevel@tonic-gate */ 52940Sstevel@tonic-gate void 52950Sstevel@tonic-gate sticksync_slave(void) 52960Sstevel@tonic-gate { 52970Sstevel@tonic-gate int i; 52980Sstevel@tonic-gate int tries = 0; 52990Sstevel@tonic-gate int64_t tskew; 53000Sstevel@tonic-gate int64_t av_tskew; 53010Sstevel@tonic-gate 53020Sstevel@tonic-gate kpreempt_disable(); 53030Sstevel@tonic-gate /* wait for the master side */ 53040Sstevel@tonic-gate while (stick_sync_cmd != SLAVE_START) 53050Sstevel@tonic-gate ; 53060Sstevel@tonic-gate /* 53070Sstevel@tonic-gate * Synchronization should only take a few tries at most. But in the 53080Sstevel@tonic-gate * odd case where the cpu isn't cooperating we'll keep trying. A cpu 53090Sstevel@tonic-gate * without it's stick synchronized wouldn't be a good citizen. 53100Sstevel@tonic-gate */ 53110Sstevel@tonic-gate while (slave_done == 0) { 53120Sstevel@tonic-gate /* 53130Sstevel@tonic-gate * Time skew calculation. 53140Sstevel@tonic-gate */ 53150Sstevel@tonic-gate av_tskew = tskew = 0; 53160Sstevel@tonic-gate 53170Sstevel@tonic-gate for (i = 0; i < stick_iter; i++) { 53180Sstevel@tonic-gate /* make location hot */ 53190Sstevel@tonic-gate timestamp[EV_A_START] = 0; 53200Sstevel@tonic-gate stick_timestamp(×tamp[EV_A_START]); 53210Sstevel@tonic-gate 53220Sstevel@tonic-gate /* tell the master we're ready */ 53230Sstevel@tonic-gate stick_sync_cmd = MASTER_START; 53240Sstevel@tonic-gate 53250Sstevel@tonic-gate /* and wait */ 53260Sstevel@tonic-gate while (stick_sync_cmd != SLAVE_CONT) 53270Sstevel@tonic-gate ; 53280Sstevel@tonic-gate /* Event B end */ 53290Sstevel@tonic-gate stick_timestamp(×tamp[EV_B_END]); 53300Sstevel@tonic-gate 53310Sstevel@tonic-gate /* calculate time skew */ 53320Sstevel@tonic-gate tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START]) 53335219Skm84432 - (timestamp[EV_A_END] - timestamp[EV_A_START])) 53345219Skm84432 / 2; 53350Sstevel@tonic-gate 53360Sstevel@tonic-gate /* keep running count */ 53370Sstevel@tonic-gate av_tskew += tskew; 53380Sstevel@tonic-gate } /* for */ 53390Sstevel@tonic-gate 53400Sstevel@tonic-gate /* 53410Sstevel@tonic-gate * Adjust stick for time skew if not within the max allowed; 53420Sstevel@tonic-gate * otherwise we're all done. 53430Sstevel@tonic-gate */ 53440Sstevel@tonic-gate if (stick_iter != 0) 53450Sstevel@tonic-gate av_tskew = av_tskew/stick_iter; 53460Sstevel@tonic-gate if (ABS(av_tskew) > stick_tsk) { 53470Sstevel@tonic-gate /* 53480Sstevel@tonic-gate * If the skew is 1 (the slave's STICK register 53490Sstevel@tonic-gate * is 1 STICK ahead of the master's), stick_adj 53500Sstevel@tonic-gate * could fail to adjust the slave's STICK register 53510Sstevel@tonic-gate * if the STICK read on the slave happens to 53520Sstevel@tonic-gate * align with the increment of the STICK. 53530Sstevel@tonic-gate * Therefore, we increment the skew to 2. 53540Sstevel@tonic-gate */ 53550Sstevel@tonic-gate if (av_tskew == 1) 53560Sstevel@tonic-gate av_tskew++; 53570Sstevel@tonic-gate stick_adj(-av_tskew); 53580Sstevel@tonic-gate } else 53590Sstevel@tonic-gate slave_done = 1; 53600Sstevel@tonic-gate #ifdef DEBUG 53610Sstevel@tonic-gate if (tries < DSYNC_ATTEMPTS) 53620Sstevel@tonic-gate stick_sync_stats[CPU->cpu_id].skew_val[tries] = 53635219Skm84432 av_tskew; 53640Sstevel@tonic-gate ++tries; 53650Sstevel@tonic-gate #endif /* DEBUG */ 53660Sstevel@tonic-gate #ifdef lint 53670Sstevel@tonic-gate tries = tries; 53680Sstevel@tonic-gate #endif 53690Sstevel@tonic-gate 53700Sstevel@tonic-gate } /* while */ 53710Sstevel@tonic-gate 53720Sstevel@tonic-gate /* allow the master to finish */ 53730Sstevel@tonic-gate stick_sync_cmd = EVENT_NULL; 53740Sstevel@tonic-gate kpreempt_enable(); 53750Sstevel@tonic-gate } 53760Sstevel@tonic-gate 53770Sstevel@tonic-gate /* 53780Sstevel@tonic-gate * Master CPU side of stick synchronization. 53790Sstevel@tonic-gate * - timestamp end of Event A 53800Sstevel@tonic-gate * - timestamp beginning of Event B 53810Sstevel@tonic-gate */ 53820Sstevel@tonic-gate void 53830Sstevel@tonic-gate sticksync_master(void) 53840Sstevel@tonic-gate { 53850Sstevel@tonic-gate int i; 53860Sstevel@tonic-gate 53870Sstevel@tonic-gate kpreempt_disable(); 53880Sstevel@tonic-gate /* tell the slave we've started */ 53890Sstevel@tonic-gate slave_done = 0; 53900Sstevel@tonic-gate stick_sync_cmd = SLAVE_START; 53910Sstevel@tonic-gate 53920Sstevel@tonic-gate while (slave_done == 0) { 53930Sstevel@tonic-gate for (i = 0; i < stick_iter; i++) { 53940Sstevel@tonic-gate /* wait for the slave */ 53950Sstevel@tonic-gate while (stick_sync_cmd != MASTER_START) 53960Sstevel@tonic-gate ; 53970Sstevel@tonic-gate /* Event A end */ 53980Sstevel@tonic-gate stick_timestamp(×tamp[EV_A_END]); 53990Sstevel@tonic-gate 54000Sstevel@tonic-gate /* make location hot */ 54010Sstevel@tonic-gate timestamp[EV_B_START] = 0; 54020Sstevel@tonic-gate stick_timestamp(×tamp[EV_B_START]); 54030Sstevel@tonic-gate 54040Sstevel@tonic-gate /* tell the slave to continue */ 54050Sstevel@tonic-gate stick_sync_cmd = SLAVE_CONT; 54060Sstevel@tonic-gate } /* for */ 54070Sstevel@tonic-gate 54080Sstevel@tonic-gate /* wait while slave calculates time skew */ 54090Sstevel@tonic-gate while (stick_sync_cmd == SLAVE_CONT) 54100Sstevel@tonic-gate ; 54110Sstevel@tonic-gate } /* while */ 54120Sstevel@tonic-gate kpreempt_enable(); 54130Sstevel@tonic-gate } 54140Sstevel@tonic-gate 54150Sstevel@tonic-gate /* 54160Sstevel@tonic-gate * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to 54170Sstevel@tonic-gate * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also, 54180Sstevel@tonic-gate * in cpu_async_panic_callb, each cpu checks for CPU events on its way to 54190Sstevel@tonic-gate * panic idle. 54200Sstevel@tonic-gate */ 54210Sstevel@tonic-gate /*ARGSUSED*/ 54220Sstevel@tonic-gate void 54230Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt) 54240Sstevel@tonic-gate {} 54250Sstevel@tonic-gate 54260Sstevel@tonic-gate struct kmem_cache *ch_private_cache; 54270Sstevel@tonic-gate 54280Sstevel@tonic-gate /* 54290Sstevel@tonic-gate * Cpu private unitialization. Uninitialize the Ecache scrubber and 54300Sstevel@tonic-gate * deallocate the scrubber data structures and cpu_private data structure. 54310Sstevel@tonic-gate */ 54320Sstevel@tonic-gate void 54330Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp) 54340Sstevel@tonic-gate { 54350Sstevel@tonic-gate cheetah_private_t *chprp = CPU_PRIVATE(cp); 54360Sstevel@tonic-gate 54370Sstevel@tonic-gate ASSERT(chprp); 54380Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(cp); 54390Sstevel@tonic-gate CPU_PRIVATE(cp) = NULL; 54400Sstevel@tonic-gate ch_err_tl1_paddrs[cp->cpu_id] = NULL; 54410Sstevel@tonic-gate kmem_cache_free(ch_private_cache, chprp); 54420Sstevel@tonic-gate cmp_delete_cpu(cp->cpu_id); 54430Sstevel@tonic-gate 54440Sstevel@tonic-gate } 54450Sstevel@tonic-gate 54460Sstevel@tonic-gate /* 54470Sstevel@tonic-gate * Cheetah Cache Scrubbing 54480Sstevel@tonic-gate * 54490Sstevel@tonic-gate * The primary purpose of Cheetah cache scrubbing is to reduce the exposure 54500Sstevel@tonic-gate * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not 54510Sstevel@tonic-gate * protected by either parity or ECC. 54520Sstevel@tonic-gate * 54530Sstevel@tonic-gate * We currently default the E$ and D$ scan rate to 100 (scan 10% of the 54540Sstevel@tonic-gate * cache per second). Due to the the specifics of how the I$ control 54550Sstevel@tonic-gate * logic works with respect to the ASI used to scrub I$ lines, the entire 54560Sstevel@tonic-gate * I$ is scanned at once. 54570Sstevel@tonic-gate */ 54580Sstevel@tonic-gate 54590Sstevel@tonic-gate /* 54600Sstevel@tonic-gate * Tuneables to enable and disable the scrubbing of the caches, and to tune 54610Sstevel@tonic-gate * scrubbing behavior. These may be changed via /etc/system or using mdb 54620Sstevel@tonic-gate * on a running system. 54630Sstevel@tonic-gate */ 54640Sstevel@tonic-gate int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */ 54650Sstevel@tonic-gate 54660Sstevel@tonic-gate /* 54670Sstevel@tonic-gate * The following are the PIL levels that the softints/cross traps will fire at. 54680Sstevel@tonic-gate */ 54690Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */ 54700Sstevel@tonic-gate uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */ 54710Sstevel@tonic-gate uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */ 54720Sstevel@tonic-gate 54730Sstevel@tonic-gate #if defined(JALAPENO) 54740Sstevel@tonic-gate 54750Sstevel@tonic-gate /* 54760Sstevel@tonic-gate * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber 54770Sstevel@tonic-gate * on Jalapeno. 54780Sstevel@tonic-gate */ 54790Sstevel@tonic-gate int ecache_scrub_enable = 0; 54800Sstevel@tonic-gate 54810Sstevel@tonic-gate #else /* JALAPENO */ 54820Sstevel@tonic-gate 54830Sstevel@tonic-gate /* 54840Sstevel@tonic-gate * With all other cpu types, E$ scrubbing is on by default 54850Sstevel@tonic-gate */ 54860Sstevel@tonic-gate int ecache_scrub_enable = 1; 54870Sstevel@tonic-gate 54880Sstevel@tonic-gate #endif /* JALAPENO */ 54890Sstevel@tonic-gate 54900Sstevel@tonic-gate 54910Sstevel@tonic-gate #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO) 54920Sstevel@tonic-gate 54930Sstevel@tonic-gate /* 54940Sstevel@tonic-gate * The I$ scrubber tends to cause latency problems for real-time SW, so it 54950Sstevel@tonic-gate * is disabled by default on non-Cheetah systems 54960Sstevel@tonic-gate */ 54970Sstevel@tonic-gate int icache_scrub_enable = 0; 54980Sstevel@tonic-gate 54990Sstevel@tonic-gate /* 55000Sstevel@tonic-gate * Tuneables specifying the scrub calls per second and the scan rate 55010Sstevel@tonic-gate * for each cache 55020Sstevel@tonic-gate * 55030Sstevel@tonic-gate * The cyclic times are set during boot based on the following values. 55040Sstevel@tonic-gate * Changing these values in mdb after this time will have no effect. If 55050Sstevel@tonic-gate * a different value is desired, it must be set in /etc/system before a 55060Sstevel@tonic-gate * reboot. 55070Sstevel@tonic-gate */ 55080Sstevel@tonic-gate int ecache_calls_a_sec = 1; 55090Sstevel@tonic-gate int dcache_calls_a_sec = 2; 55100Sstevel@tonic-gate int icache_calls_a_sec = 2; 55110Sstevel@tonic-gate 55120Sstevel@tonic-gate int ecache_scan_rate_idle = 1; 55130Sstevel@tonic-gate int ecache_scan_rate_busy = 1; 55140Sstevel@tonic-gate int dcache_scan_rate_idle = 1; 55150Sstevel@tonic-gate int dcache_scan_rate_busy = 1; 55160Sstevel@tonic-gate int icache_scan_rate_idle = 1; 55170Sstevel@tonic-gate int icache_scan_rate_busy = 1; 55180Sstevel@tonic-gate 55190Sstevel@tonic-gate #else /* CHEETAH_PLUS || JALAPENO || SERRANO */ 55200Sstevel@tonic-gate 55210Sstevel@tonic-gate int icache_scrub_enable = 1; /* I$ scrubbing is on by default */ 55220Sstevel@tonic-gate 55230Sstevel@tonic-gate int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */ 55240Sstevel@tonic-gate int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */ 55250Sstevel@tonic-gate int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */ 55260Sstevel@tonic-gate 55270Sstevel@tonic-gate int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */ 55280Sstevel@tonic-gate int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */ 55290Sstevel@tonic-gate int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */ 55300Sstevel@tonic-gate int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */ 55310Sstevel@tonic-gate int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */ 55320Sstevel@tonic-gate int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */ 55330Sstevel@tonic-gate 55340Sstevel@tonic-gate #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */ 55350Sstevel@tonic-gate 55360Sstevel@tonic-gate /* 55370Sstevel@tonic-gate * In order to scrub on offline cpus, a cross trap is sent. The handler will 55380Sstevel@tonic-gate * increment the outstanding request counter and schedule a softint to run 55390Sstevel@tonic-gate * the scrubber. 55400Sstevel@tonic-gate */ 55410Sstevel@tonic-gate extern xcfunc_t cache_scrubreq_tl1; 55420Sstevel@tonic-gate 55430Sstevel@tonic-gate /* 55440Sstevel@tonic-gate * These are the softint functions for each cache scrubber 55450Sstevel@tonic-gate */ 55460Sstevel@tonic-gate static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2); 55470Sstevel@tonic-gate static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2); 55480Sstevel@tonic-gate static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2); 55490Sstevel@tonic-gate 55500Sstevel@tonic-gate /* 55510Sstevel@tonic-gate * The cache scrub info table contains cache specific information 55520Sstevel@tonic-gate * and allows for some of the scrub code to be table driven, reducing 55530Sstevel@tonic-gate * duplication of cache similar code. 55540Sstevel@tonic-gate * 55550Sstevel@tonic-gate * This table keeps a copy of the value in the calls per second variable 55560Sstevel@tonic-gate * (?cache_calls_a_sec). This makes it much more difficult for someone 55570Sstevel@tonic-gate * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in 55580Sstevel@tonic-gate * mdb in a misguided attempt to disable the scrubber). 55590Sstevel@tonic-gate */ 55600Sstevel@tonic-gate struct scrub_info { 55610Sstevel@tonic-gate int *csi_enable; /* scrubber enable flag */ 55620Sstevel@tonic-gate int csi_freq; /* scrubber calls per second */ 55630Sstevel@tonic-gate int csi_index; /* index to chsm_outstanding[] */ 55642973Sgovinda uint64_t csi_inum; /* scrubber interrupt number */ 55650Sstevel@tonic-gate cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */ 55660Sstevel@tonic-gate cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */ 55670Sstevel@tonic-gate char csi_name[3]; /* cache name for this scrub entry */ 55680Sstevel@tonic-gate } cache_scrub_info[] = { 55690Sstevel@tonic-gate { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"}, 55700Sstevel@tonic-gate { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"}, 55710Sstevel@tonic-gate { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"} 55720Sstevel@tonic-gate }; 55730Sstevel@tonic-gate 55740Sstevel@tonic-gate /* 55750Sstevel@tonic-gate * If scrubbing is enabled, increment the outstanding request counter. If it 55760Sstevel@tonic-gate * is 1 (meaning there were no previous requests outstanding), call 55770Sstevel@tonic-gate * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing 55780Sstevel@tonic-gate * a self trap. 55790Sstevel@tonic-gate */ 55800Sstevel@tonic-gate static void 55810Sstevel@tonic-gate do_scrub(struct scrub_info *csi) 55820Sstevel@tonic-gate { 55830Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 55840Sstevel@tonic-gate int index = csi->csi_index; 55850Sstevel@tonic-gate uint32_t *outstanding = &csmp->chsm_outstanding[index]; 55860Sstevel@tonic-gate 55870Sstevel@tonic-gate if (*(csi->csi_enable) && (csmp->chsm_enable[index])) { 55880Sstevel@tonic-gate if (atomic_add_32_nv(outstanding, 1) == 1) { 55890Sstevel@tonic-gate xt_one_unchecked(CPU->cpu_id, setsoftint_tl1, 55900Sstevel@tonic-gate csi->csi_inum, 0); 55910Sstevel@tonic-gate } 55920Sstevel@tonic-gate } 55930Sstevel@tonic-gate } 55940Sstevel@tonic-gate 55950Sstevel@tonic-gate /* 55960Sstevel@tonic-gate * Omni cyclics don't fire on offline cpus, so we use another cyclic to 55970Sstevel@tonic-gate * cross-trap the offline cpus. 55980Sstevel@tonic-gate */ 55990Sstevel@tonic-gate static void 56000Sstevel@tonic-gate do_scrub_offline(struct scrub_info *csi) 56010Sstevel@tonic-gate { 56020Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 56030Sstevel@tonic-gate 56040Sstevel@tonic-gate if (CPUSET_ISNULL(cpu_offline_set)) { 56050Sstevel@tonic-gate /* 56060Sstevel@tonic-gate * No offline cpus - nothing to do 56070Sstevel@tonic-gate */ 56080Sstevel@tonic-gate return; 56090Sstevel@tonic-gate } 56100Sstevel@tonic-gate 56110Sstevel@tonic-gate if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) { 56120Sstevel@tonic-gate xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum, 56130Sstevel@tonic-gate csi->csi_index); 56140Sstevel@tonic-gate } 56150Sstevel@tonic-gate } 56160Sstevel@tonic-gate 56170Sstevel@tonic-gate /* 56180Sstevel@tonic-gate * This is the initial setup for the scrubber cyclics - it sets the 56190Sstevel@tonic-gate * interrupt level, frequency, and function to call. 56200Sstevel@tonic-gate */ 56210Sstevel@tonic-gate /*ARGSUSED*/ 56220Sstevel@tonic-gate static void 56230Sstevel@tonic-gate cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 56240Sstevel@tonic-gate cyc_time_t *when) 56250Sstevel@tonic-gate { 56260Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg; 56270Sstevel@tonic-gate 56280Sstevel@tonic-gate ASSERT(csi != NULL); 56290Sstevel@tonic-gate hdlr->cyh_func = (cyc_func_t)do_scrub; 56300Sstevel@tonic-gate hdlr->cyh_level = CY_LOW_LEVEL; 56310Sstevel@tonic-gate hdlr->cyh_arg = arg; 56320Sstevel@tonic-gate 56330Sstevel@tonic-gate when->cyt_when = 0; /* Start immediately */ 56340Sstevel@tonic-gate when->cyt_interval = NANOSEC / csi->csi_freq; 56350Sstevel@tonic-gate } 56360Sstevel@tonic-gate 56370Sstevel@tonic-gate /* 56380Sstevel@tonic-gate * Initialization for cache scrubbing. 56390Sstevel@tonic-gate * This routine is called AFTER all cpus have had cpu_init_private called 56400Sstevel@tonic-gate * to initialize their private data areas. 56410Sstevel@tonic-gate */ 56420Sstevel@tonic-gate void 56430Sstevel@tonic-gate cpu_init_cache_scrub(void) 56440Sstevel@tonic-gate { 56450Sstevel@tonic-gate int i; 56460Sstevel@tonic-gate struct scrub_info *csi; 56470Sstevel@tonic-gate cyc_omni_handler_t omni_hdlr; 56480Sstevel@tonic-gate cyc_handler_t offline_hdlr; 56490Sstevel@tonic-gate cyc_time_t when; 56500Sstevel@tonic-gate 56510Sstevel@tonic-gate /* 56520Sstevel@tonic-gate * save away the maximum number of lines for the D$ 56530Sstevel@tonic-gate */ 56540Sstevel@tonic-gate dcache_nlines = dcache_size / dcache_linesize; 56550Sstevel@tonic-gate 56560Sstevel@tonic-gate /* 56570Sstevel@tonic-gate * register the softints for the cache scrubbing 56580Sstevel@tonic-gate */ 56590Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum = 56600Sstevel@tonic-gate add_softintr(ecache_scrub_pil, scrub_ecache_line_intr, 56612973Sgovinda (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT); 56620Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec; 56630Sstevel@tonic-gate 56640Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum = 56650Sstevel@tonic-gate add_softintr(dcache_scrub_pil, scrub_dcache_line_intr, 56662973Sgovinda (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT); 56670Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec; 56680Sstevel@tonic-gate 56690Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum = 56700Sstevel@tonic-gate add_softintr(icache_scrub_pil, scrub_icache_line_intr, 56712973Sgovinda (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT); 56720Sstevel@tonic-gate cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec; 56730Sstevel@tonic-gate 56740Sstevel@tonic-gate /* 56750Sstevel@tonic-gate * start the scrubbing for all the caches 56760Sstevel@tonic-gate */ 56770Sstevel@tonic-gate mutex_enter(&cpu_lock); 56780Sstevel@tonic-gate for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) { 56790Sstevel@tonic-gate 56800Sstevel@tonic-gate csi = &cache_scrub_info[i]; 56810Sstevel@tonic-gate 56820Sstevel@tonic-gate if (!(*csi->csi_enable)) 56830Sstevel@tonic-gate continue; 56840Sstevel@tonic-gate 56850Sstevel@tonic-gate /* 56860Sstevel@tonic-gate * force the following to be true: 56870Sstevel@tonic-gate * 1 <= calls_a_sec <= hz 56880Sstevel@tonic-gate */ 56890Sstevel@tonic-gate if (csi->csi_freq > hz) { 56900Sstevel@tonic-gate cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high " 56915219Skm84432 "(%d); resetting to hz (%d)", csi->csi_name, 56925219Skm84432 csi->csi_freq, hz); 56930Sstevel@tonic-gate csi->csi_freq = hz; 56940Sstevel@tonic-gate } else if (csi->csi_freq < 1) { 56950Sstevel@tonic-gate cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low " 56965219Skm84432 "(%d); resetting to 1", csi->csi_name, 56975219Skm84432 csi->csi_freq); 56980Sstevel@tonic-gate csi->csi_freq = 1; 56990Sstevel@tonic-gate } 57000Sstevel@tonic-gate 57010Sstevel@tonic-gate omni_hdlr.cyo_online = cpu_scrub_cyclic_setup; 57020Sstevel@tonic-gate omni_hdlr.cyo_offline = NULL; 57030Sstevel@tonic-gate omni_hdlr.cyo_arg = (void *)csi; 57040Sstevel@tonic-gate 57050Sstevel@tonic-gate offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline; 57060Sstevel@tonic-gate offline_hdlr.cyh_arg = (void *)csi; 57070Sstevel@tonic-gate offline_hdlr.cyh_level = CY_LOW_LEVEL; 57080Sstevel@tonic-gate 57090Sstevel@tonic-gate when.cyt_when = 0; /* Start immediately */ 57100Sstevel@tonic-gate when.cyt_interval = NANOSEC / csi->csi_freq; 57110Sstevel@tonic-gate 57120Sstevel@tonic-gate csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr); 57130Sstevel@tonic-gate csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when); 57140Sstevel@tonic-gate } 57150Sstevel@tonic-gate register_cpu_setup_func(cpu_scrub_cpu_setup, NULL); 57160Sstevel@tonic-gate mutex_exit(&cpu_lock); 57170Sstevel@tonic-gate } 57180Sstevel@tonic-gate 57190Sstevel@tonic-gate /* 57200Sstevel@tonic-gate * Indicate that the specified cpu is idle. 57210Sstevel@tonic-gate */ 57220Sstevel@tonic-gate void 57230Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp) 57240Sstevel@tonic-gate { 57250Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 57260Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 57270Sstevel@tonic-gate csmp->chsm_ecache_busy = ECACHE_CPU_IDLE; 57280Sstevel@tonic-gate } 57290Sstevel@tonic-gate } 57300Sstevel@tonic-gate 57310Sstevel@tonic-gate /* 57320Sstevel@tonic-gate * Indicate that the specified cpu is busy. 57330Sstevel@tonic-gate */ 57340Sstevel@tonic-gate void 57350Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp) 57360Sstevel@tonic-gate { 57370Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 57380Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 57390Sstevel@tonic-gate csmp->chsm_ecache_busy = ECACHE_CPU_BUSY; 57400Sstevel@tonic-gate } 57410Sstevel@tonic-gate } 57420Sstevel@tonic-gate 57430Sstevel@tonic-gate /* 57440Sstevel@tonic-gate * Initialization for cache scrubbing for the specified cpu. 57450Sstevel@tonic-gate */ 57460Sstevel@tonic-gate void 57470Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp) 57480Sstevel@tonic-gate { 57490Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 57500Sstevel@tonic-gate int cpuid = cp->cpu_id; 57510Sstevel@tonic-gate 57520Sstevel@tonic-gate /* initialize the number of lines in the caches */ 57530Sstevel@tonic-gate csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size / 57540Sstevel@tonic-gate cpunodes[cpuid].ecache_linesize; 57550Sstevel@tonic-gate csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) / 57560Sstevel@tonic-gate CPU_PRIVATE_VAL(cp, chpr_icache_linesize); 57570Sstevel@tonic-gate 57580Sstevel@tonic-gate /* 57590Sstevel@tonic-gate * do_scrub() and do_scrub_offline() check both the global 57600Sstevel@tonic-gate * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers 57610Sstevel@tonic-gate * check this value before scrubbing. Currently, we use it to 57620Sstevel@tonic-gate * disable the E$ scrubber on multi-core cpus or while running at 57630Sstevel@tonic-gate * slowed speed. For now, just turn everything on and allow 57640Sstevel@tonic-gate * cpu_init_private() to change it if necessary. 57650Sstevel@tonic-gate */ 57660Sstevel@tonic-gate csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 57670Sstevel@tonic-gate csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1; 57680Sstevel@tonic-gate csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1; 57690Sstevel@tonic-gate 57700Sstevel@tonic-gate cpu_busy_ecache_scrub(cp); 57710Sstevel@tonic-gate } 57720Sstevel@tonic-gate 57730Sstevel@tonic-gate /* 57740Sstevel@tonic-gate * Un-initialization for cache scrubbing for the specified cpu. 57750Sstevel@tonic-gate */ 57760Sstevel@tonic-gate static void 57770Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp) 57780Sstevel@tonic-gate { 57790Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 57800Sstevel@tonic-gate 57810Sstevel@tonic-gate /* 57820Sstevel@tonic-gate * un-initialize bookkeeping for cache scrubbing 57830Sstevel@tonic-gate */ 57840Sstevel@tonic-gate bzero(csmp, sizeof (ch_scrub_misc_t)); 57850Sstevel@tonic-gate 57860Sstevel@tonic-gate cpu_idle_ecache_scrub(cp); 57870Sstevel@tonic-gate } 57880Sstevel@tonic-gate 57890Sstevel@tonic-gate /* 57900Sstevel@tonic-gate * Called periodically on each CPU to scrub the D$. 57910Sstevel@tonic-gate */ 57920Sstevel@tonic-gate static void 57930Sstevel@tonic-gate scrub_dcache(int how_many) 57940Sstevel@tonic-gate { 57950Sstevel@tonic-gate int i; 57960Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 57970Sstevel@tonic-gate int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D]; 57980Sstevel@tonic-gate 57990Sstevel@tonic-gate /* 58000Sstevel@tonic-gate * scrub the desired number of lines 58010Sstevel@tonic-gate */ 58020Sstevel@tonic-gate for (i = 0; i < how_many; i++) { 58030Sstevel@tonic-gate /* 58040Sstevel@tonic-gate * scrub a D$ line 58050Sstevel@tonic-gate */ 58060Sstevel@tonic-gate dcache_inval_line(index); 58070Sstevel@tonic-gate 58080Sstevel@tonic-gate /* 58090Sstevel@tonic-gate * calculate the next D$ line to scrub, assumes 58100Sstevel@tonic-gate * that dcache_nlines is a power of 2 58110Sstevel@tonic-gate */ 58120Sstevel@tonic-gate index = (index + 1) & (dcache_nlines - 1); 58130Sstevel@tonic-gate } 58140Sstevel@tonic-gate 58150Sstevel@tonic-gate /* 58160Sstevel@tonic-gate * set the scrub index for the next visit 58170Sstevel@tonic-gate */ 58180Sstevel@tonic-gate csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index; 58190Sstevel@tonic-gate } 58200Sstevel@tonic-gate 58210Sstevel@tonic-gate /* 58220Sstevel@tonic-gate * Handler for D$ scrub inum softint. Call scrub_dcache until 58230Sstevel@tonic-gate * we decrement the outstanding request count to zero. 58240Sstevel@tonic-gate */ 58250Sstevel@tonic-gate /*ARGSUSED*/ 58260Sstevel@tonic-gate static uint_t 58270Sstevel@tonic-gate scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2) 58280Sstevel@tonic-gate { 58290Sstevel@tonic-gate int i; 58300Sstevel@tonic-gate int how_many; 58310Sstevel@tonic-gate int outstanding; 58320Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 58330Sstevel@tonic-gate uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D]; 58340Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg1; 58350Sstevel@tonic-gate int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 58365219Skm84432 dcache_scan_rate_idle : dcache_scan_rate_busy; 58370Sstevel@tonic-gate 58380Sstevel@tonic-gate /* 58390Sstevel@tonic-gate * The scan rates are expressed in units of tenths of a 58400Sstevel@tonic-gate * percent. A scan rate of 1000 (100%) means the whole 58410Sstevel@tonic-gate * cache is scanned every second. 58420Sstevel@tonic-gate */ 58430Sstevel@tonic-gate how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq); 58440Sstevel@tonic-gate 58450Sstevel@tonic-gate do { 58460Sstevel@tonic-gate outstanding = *countp; 58470Sstevel@tonic-gate for (i = 0; i < outstanding; i++) { 58480Sstevel@tonic-gate scrub_dcache(how_many); 58490Sstevel@tonic-gate } 58500Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding)); 58510Sstevel@tonic-gate 58520Sstevel@tonic-gate return (DDI_INTR_CLAIMED); 58530Sstevel@tonic-gate } 58540Sstevel@tonic-gate 58550Sstevel@tonic-gate /* 58560Sstevel@tonic-gate * Called periodically on each CPU to scrub the I$. The I$ is scrubbed 58570Sstevel@tonic-gate * by invalidating lines. Due to the characteristics of the ASI which 58580Sstevel@tonic-gate * is used to invalidate an I$ line, the entire I$ must be invalidated 58590Sstevel@tonic-gate * vs. an individual I$ line. 58600Sstevel@tonic-gate */ 58610Sstevel@tonic-gate static void 58620Sstevel@tonic-gate scrub_icache(int how_many) 58630Sstevel@tonic-gate { 58640Sstevel@tonic-gate int i; 58650Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 58660Sstevel@tonic-gate int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I]; 58670Sstevel@tonic-gate int icache_nlines = csmp->chsm_icache_nlines; 58680Sstevel@tonic-gate 58690Sstevel@tonic-gate /* 58700Sstevel@tonic-gate * scrub the desired number of lines 58710Sstevel@tonic-gate */ 58720Sstevel@tonic-gate for (i = 0; i < how_many; i++) { 58730Sstevel@tonic-gate /* 58740Sstevel@tonic-gate * since the entire I$ must be scrubbed at once, 58750Sstevel@tonic-gate * wait until the index wraps to zero to invalidate 58760Sstevel@tonic-gate * the entire I$ 58770Sstevel@tonic-gate */ 58780Sstevel@tonic-gate if (index == 0) { 58790Sstevel@tonic-gate icache_inval_all(); 58800Sstevel@tonic-gate } 58810Sstevel@tonic-gate 58820Sstevel@tonic-gate /* 58830Sstevel@tonic-gate * calculate the next I$ line to scrub, assumes 58840Sstevel@tonic-gate * that chsm_icache_nlines is a power of 2 58850Sstevel@tonic-gate */ 58860Sstevel@tonic-gate index = (index + 1) & (icache_nlines - 1); 58870Sstevel@tonic-gate } 58880Sstevel@tonic-gate 58890Sstevel@tonic-gate /* 58900Sstevel@tonic-gate * set the scrub index for the next visit 58910Sstevel@tonic-gate */ 58920Sstevel@tonic-gate csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index; 58930Sstevel@tonic-gate } 58940Sstevel@tonic-gate 58950Sstevel@tonic-gate /* 58960Sstevel@tonic-gate * Handler for I$ scrub inum softint. Call scrub_icache until 58970Sstevel@tonic-gate * we decrement the outstanding request count to zero. 58980Sstevel@tonic-gate */ 58990Sstevel@tonic-gate /*ARGSUSED*/ 59000Sstevel@tonic-gate static uint_t 59010Sstevel@tonic-gate scrub_icache_line_intr(caddr_t arg1, caddr_t arg2) 59020Sstevel@tonic-gate { 59030Sstevel@tonic-gate int i; 59040Sstevel@tonic-gate int how_many; 59050Sstevel@tonic-gate int outstanding; 59060Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 59070Sstevel@tonic-gate uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I]; 59080Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg1; 59090Sstevel@tonic-gate int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 59100Sstevel@tonic-gate icache_scan_rate_idle : icache_scan_rate_busy; 59110Sstevel@tonic-gate int icache_nlines = csmp->chsm_icache_nlines; 59120Sstevel@tonic-gate 59130Sstevel@tonic-gate /* 59140Sstevel@tonic-gate * The scan rates are expressed in units of tenths of a 59150Sstevel@tonic-gate * percent. A scan rate of 1000 (100%) means the whole 59160Sstevel@tonic-gate * cache is scanned every second. 59170Sstevel@tonic-gate */ 59180Sstevel@tonic-gate how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq); 59190Sstevel@tonic-gate 59200Sstevel@tonic-gate do { 59210Sstevel@tonic-gate outstanding = *countp; 59220Sstevel@tonic-gate for (i = 0; i < outstanding; i++) { 59230Sstevel@tonic-gate scrub_icache(how_many); 59240Sstevel@tonic-gate } 59250Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding)); 59260Sstevel@tonic-gate 59270Sstevel@tonic-gate return (DDI_INTR_CLAIMED); 59280Sstevel@tonic-gate } 59290Sstevel@tonic-gate 59300Sstevel@tonic-gate /* 59310Sstevel@tonic-gate * Called periodically on each CPU to scrub the E$. 59320Sstevel@tonic-gate */ 59330Sstevel@tonic-gate static void 59340Sstevel@tonic-gate scrub_ecache(int how_many) 59350Sstevel@tonic-gate { 59360Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 59370Sstevel@tonic-gate int i; 59380Sstevel@tonic-gate int cpuid = CPU->cpu_id; 59390Sstevel@tonic-gate int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 59400Sstevel@tonic-gate int nlines = csmp->chsm_ecache_nlines; 59410Sstevel@tonic-gate int linesize = cpunodes[cpuid].ecache_linesize; 59420Sstevel@tonic-gate int ec_set_size = cpu_ecache_set_size(CPU); 59430Sstevel@tonic-gate 59440Sstevel@tonic-gate /* 59450Sstevel@tonic-gate * scrub the desired number of lines 59460Sstevel@tonic-gate */ 59470Sstevel@tonic-gate for (i = 0; i < how_many; i++) { 59480Sstevel@tonic-gate /* 59490Sstevel@tonic-gate * scrub the E$ line 59500Sstevel@tonic-gate */ 59510Sstevel@tonic-gate ecache_flush_line(ecache_flushaddr + (index * linesize), 59520Sstevel@tonic-gate ec_set_size); 59530Sstevel@tonic-gate 59540Sstevel@tonic-gate /* 59550Sstevel@tonic-gate * calculate the next E$ line to scrub based on twice 59560Sstevel@tonic-gate * the number of E$ lines (to displace lines containing 59570Sstevel@tonic-gate * flush area data), assumes that the number of lines 59580Sstevel@tonic-gate * is a power of 2 59590Sstevel@tonic-gate */ 59600Sstevel@tonic-gate index = (index + 1) & ((nlines << 1) - 1); 59610Sstevel@tonic-gate } 59620Sstevel@tonic-gate 59630Sstevel@tonic-gate /* 59640Sstevel@tonic-gate * set the ecache scrub index for the next visit 59650Sstevel@tonic-gate */ 59660Sstevel@tonic-gate csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index; 59670Sstevel@tonic-gate } 59680Sstevel@tonic-gate 59690Sstevel@tonic-gate /* 59700Sstevel@tonic-gate * Handler for E$ scrub inum softint. Call the E$ scrubber until 59710Sstevel@tonic-gate * we decrement the outstanding request count to zero. 5972474Srscott * 5973474Srscott * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may 5974474Srscott * become negative after the atomic_add_32_nv(). This is not a problem, as 5975474Srscott * the next trip around the loop won't scrub anything, and the next add will 5976474Srscott * reset the count back to zero. 59770Sstevel@tonic-gate */ 59780Sstevel@tonic-gate /*ARGSUSED*/ 59790Sstevel@tonic-gate static uint_t 59800Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 59810Sstevel@tonic-gate { 59820Sstevel@tonic-gate int i; 59830Sstevel@tonic-gate int how_many; 59840Sstevel@tonic-gate int outstanding; 59850Sstevel@tonic-gate ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 59860Sstevel@tonic-gate uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E]; 59870Sstevel@tonic-gate struct scrub_info *csi = (struct scrub_info *)arg1; 59880Sstevel@tonic-gate int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 59895219Skm84432 ecache_scan_rate_idle : ecache_scan_rate_busy; 59900Sstevel@tonic-gate int ecache_nlines = csmp->chsm_ecache_nlines; 59910Sstevel@tonic-gate 59920Sstevel@tonic-gate /* 59930Sstevel@tonic-gate * The scan rates are expressed in units of tenths of a 59940Sstevel@tonic-gate * percent. A scan rate of 1000 (100%) means the whole 59950Sstevel@tonic-gate * cache is scanned every second. 59960Sstevel@tonic-gate */ 59970Sstevel@tonic-gate how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq); 59980Sstevel@tonic-gate 59990Sstevel@tonic-gate do { 60000Sstevel@tonic-gate outstanding = *countp; 60010Sstevel@tonic-gate for (i = 0; i < outstanding; i++) { 60020Sstevel@tonic-gate scrub_ecache(how_many); 60030Sstevel@tonic-gate } 60040Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding)); 60050Sstevel@tonic-gate 60060Sstevel@tonic-gate return (DDI_INTR_CLAIMED); 60070Sstevel@tonic-gate } 60080Sstevel@tonic-gate 60090Sstevel@tonic-gate /* 60100Sstevel@tonic-gate * Timeout function to reenable CE 60110Sstevel@tonic-gate */ 60120Sstevel@tonic-gate static void 60130Sstevel@tonic-gate cpu_delayed_check_ce_errors(void *arg) 60140Sstevel@tonic-gate { 60150Sstevel@tonic-gate if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg, 60160Sstevel@tonic-gate TQ_NOSLEEP)) { 60170Sstevel@tonic-gate (void) timeout(cpu_delayed_check_ce_errors, arg, 60180Sstevel@tonic-gate drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 60190Sstevel@tonic-gate } 60200Sstevel@tonic-gate } 60210Sstevel@tonic-gate 60220Sstevel@tonic-gate /* 60230Sstevel@tonic-gate * CE Deferred Re-enable after trap. 60240Sstevel@tonic-gate * 60250Sstevel@tonic-gate * When the CPU gets a disrupting trap for any of the errors 60260Sstevel@tonic-gate * controlled by the CEEN bit, CEEN is disabled in the trap handler 60270Sstevel@tonic-gate * immediately. To eliminate the possibility of multiple CEs causing 60280Sstevel@tonic-gate * recursive stack overflow in the trap handler, we cannot 60290Sstevel@tonic-gate * reenable CEEN while still running in the trap handler. Instead, 60300Sstevel@tonic-gate * after a CE is logged on a CPU, we schedule a timeout function, 60310Sstevel@tonic-gate * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs 60320Sstevel@tonic-gate * seconds. This function will check whether any further CEs 60330Sstevel@tonic-gate * have occurred on that CPU, and if none have, will reenable CEEN. 60340Sstevel@tonic-gate * 60350Sstevel@tonic-gate * If further CEs have occurred while CEEN is disabled, another 60360Sstevel@tonic-gate * timeout will be scheduled. This is to ensure that the CPU can 60370Sstevel@tonic-gate * make progress in the face of CE 'storms', and that it does not 60380Sstevel@tonic-gate * spend all its time logging CE errors. 60390Sstevel@tonic-gate */ 60400Sstevel@tonic-gate static void 60410Sstevel@tonic-gate cpu_check_ce_errors(void *arg) 60420Sstevel@tonic-gate { 6043946Smathue int cpuid = (int)(uintptr_t)arg; 60440Sstevel@tonic-gate cpu_t *cp; 60450Sstevel@tonic-gate 60460Sstevel@tonic-gate /* 60470Sstevel@tonic-gate * We acquire cpu_lock. 60480Sstevel@tonic-gate */ 60490Sstevel@tonic-gate ASSERT(curthread->t_pil == 0); 60500Sstevel@tonic-gate 60510Sstevel@tonic-gate /* 60520Sstevel@tonic-gate * verify that the cpu is still around, DR 60530Sstevel@tonic-gate * could have got there first ... 60540Sstevel@tonic-gate */ 60550Sstevel@tonic-gate mutex_enter(&cpu_lock); 60560Sstevel@tonic-gate cp = cpu_get(cpuid); 60570Sstevel@tonic-gate if (cp == NULL) { 60580Sstevel@tonic-gate mutex_exit(&cpu_lock); 60590Sstevel@tonic-gate return; 60600Sstevel@tonic-gate } 60610Sstevel@tonic-gate /* 60620Sstevel@tonic-gate * make sure we don't migrate across CPUs 60630Sstevel@tonic-gate * while checking our CE status. 60640Sstevel@tonic-gate */ 60650Sstevel@tonic-gate kpreempt_disable(); 60660Sstevel@tonic-gate 60670Sstevel@tonic-gate /* 60680Sstevel@tonic-gate * If we are running on the CPU that got the 60690Sstevel@tonic-gate * CE, we can do the checks directly. 60700Sstevel@tonic-gate */ 60710Sstevel@tonic-gate if (cp->cpu_id == CPU->cpu_id) { 60720Sstevel@tonic-gate mutex_exit(&cpu_lock); 60730Sstevel@tonic-gate cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0); 60740Sstevel@tonic-gate kpreempt_enable(); 60750Sstevel@tonic-gate return; 60760Sstevel@tonic-gate } 60770Sstevel@tonic-gate kpreempt_enable(); 60780Sstevel@tonic-gate 60790Sstevel@tonic-gate /* 60800Sstevel@tonic-gate * send an x-call to get the CPU that originally 60810Sstevel@tonic-gate * got the CE to do the necessary checks. If we can't 60820Sstevel@tonic-gate * send the x-call, reschedule the timeout, otherwise we 60830Sstevel@tonic-gate * lose CEEN forever on that CPU. 60840Sstevel@tonic-gate */ 60850Sstevel@tonic-gate if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) { 60860Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce, 60870Sstevel@tonic-gate TIMEOUT_CEEN_CHECK, 0); 60880Sstevel@tonic-gate mutex_exit(&cpu_lock); 60890Sstevel@tonic-gate } else { 60900Sstevel@tonic-gate /* 60910Sstevel@tonic-gate * When the CPU is not accepting xcalls, or 60920Sstevel@tonic-gate * the processor is offlined, we don't want to 60930Sstevel@tonic-gate * incur the extra overhead of trying to schedule the 60940Sstevel@tonic-gate * CE timeout indefinitely. However, we don't want to lose 60950Sstevel@tonic-gate * CE checking forever. 60960Sstevel@tonic-gate * 60970Sstevel@tonic-gate * Keep rescheduling the timeout, accepting the additional 60980Sstevel@tonic-gate * overhead as the cost of correctness in the case where we get 60990Sstevel@tonic-gate * a CE, disable CEEN, offline the CPU during the 61000Sstevel@tonic-gate * the timeout interval, and then online it at some 61010Sstevel@tonic-gate * point in the future. This is unlikely given the short 61020Sstevel@tonic-gate * cpu_ceen_delay_secs. 61030Sstevel@tonic-gate */ 61040Sstevel@tonic-gate mutex_exit(&cpu_lock); 6105946Smathue (void) timeout(cpu_delayed_check_ce_errors, 6106946Smathue (void *)(uintptr_t)cp->cpu_id, 61070Sstevel@tonic-gate drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 61080Sstevel@tonic-gate } 61090Sstevel@tonic-gate } 61100Sstevel@tonic-gate 61110Sstevel@tonic-gate /* 61120Sstevel@tonic-gate * This routine will check whether CEs have occurred while 61130Sstevel@tonic-gate * CEEN is disabled. Any CEs detected will be logged and, if 61140Sstevel@tonic-gate * possible, scrubbed. 61150Sstevel@tonic-gate * 61160Sstevel@tonic-gate * The memscrubber will also use this routine to clear any errors 61170Sstevel@tonic-gate * caused by its scrubbing with CEEN disabled. 61180Sstevel@tonic-gate * 61190Sstevel@tonic-gate * flag == SCRUBBER_CEEN_CHECK 61200Sstevel@tonic-gate * called from memscrubber, just check/scrub, no reset 61210Sstevel@tonic-gate * paddr physical addr. for start of scrub pages 61220Sstevel@tonic-gate * vaddr virtual addr. for scrub area 61230Sstevel@tonic-gate * psz page size of area to be scrubbed 61240Sstevel@tonic-gate * 61250Sstevel@tonic-gate * flag == TIMEOUT_CEEN_CHECK 61260Sstevel@tonic-gate * timeout function has triggered, reset timeout or CEEN 61270Sstevel@tonic-gate * 61280Sstevel@tonic-gate * Note: We must not migrate cpus during this function. This can be 61290Sstevel@tonic-gate * achieved by one of: 61300Sstevel@tonic-gate * - invoking as target of an x-call in which case we're at XCALL_PIL 61310Sstevel@tonic-gate * The flag value must be first xcall argument. 61320Sstevel@tonic-gate * - disabling kernel preemption. This should be done for very short 61330Sstevel@tonic-gate * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might 61340Sstevel@tonic-gate * scrub an extended area with cpu_check_block. The call for 61350Sstevel@tonic-gate * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept 61360Sstevel@tonic-gate * brief for this case. 61370Sstevel@tonic-gate * - binding to a cpu, eg with thread_affinity_set(). This is used 61380Sstevel@tonic-gate * in the SCRUBBER_CEEN_CHECK case, but is not practical for 61390Sstevel@tonic-gate * the TIMEOUT_CEEN_CHECK because both need cpu_lock. 61400Sstevel@tonic-gate */ 61410Sstevel@tonic-gate void 61420Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 61430Sstevel@tonic-gate { 61440Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 61450Sstevel@tonic-gate uint64_t ec_err_enable; 61460Sstevel@tonic-gate uint64_t page_offset; 61470Sstevel@tonic-gate 61480Sstevel@tonic-gate /* Read AFSR */ 61490Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 61500Sstevel@tonic-gate 61510Sstevel@tonic-gate /* 61520Sstevel@tonic-gate * If no CEEN errors have occurred during the timeout 61530Sstevel@tonic-gate * interval, it is safe to re-enable CEEN and exit. 61540Sstevel@tonic-gate */ 61555219Skm84432 if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) | 61565219Skm84432 (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) { 61570Sstevel@tonic-gate if (flag == TIMEOUT_CEEN_CHECK && 61580Sstevel@tonic-gate !((ec_err_enable = get_error_enable()) & EN_REG_CEEN)) 61590Sstevel@tonic-gate set_error_enable(ec_err_enable | EN_REG_CEEN); 61600Sstevel@tonic-gate return; 61610Sstevel@tonic-gate } 61620Sstevel@tonic-gate 61630Sstevel@tonic-gate /* 61640Sstevel@tonic-gate * Ensure that CEEN was not reenabled (maybe by DR) before 61650Sstevel@tonic-gate * we log/clear the error. 61660Sstevel@tonic-gate */ 61670Sstevel@tonic-gate if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN) 61685219Skm84432 set_error_enable(ec_err_enable & ~EN_REG_CEEN); 61690Sstevel@tonic-gate 61700Sstevel@tonic-gate /* 61710Sstevel@tonic-gate * log/clear the CE. If CE_CEEN_DEFER is passed, the 61720Sstevel@tonic-gate * timeout will be rescheduled when the error is logged. 61730Sstevel@tonic-gate */ 61745219Skm84432 if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) | 61755219Skm84432 (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext))) 61765219Skm84432 cpu_ce_detected(&cpu_error_regs, 61775219Skm84432 CE_CEEN_DEFER | CE_CEEN_TIMEOUT); 61780Sstevel@tonic-gate else 61795219Skm84432 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT); 61800Sstevel@tonic-gate 61810Sstevel@tonic-gate /* 61820Sstevel@tonic-gate * If the memory scrubber runs while CEEN is 61830Sstevel@tonic-gate * disabled, (or if CEEN is disabled during the 61840Sstevel@tonic-gate * scrub as a result of a CE being triggered by 61850Sstevel@tonic-gate * it), the range being scrubbed will not be 61860Sstevel@tonic-gate * completely cleaned. If there are multiple CEs 61870Sstevel@tonic-gate * in the range at most two of these will be dealt 61880Sstevel@tonic-gate * with, (one by the trap handler and one by the 61890Sstevel@tonic-gate * timeout). It is also possible that none are dealt 61900Sstevel@tonic-gate * with, (CEEN disabled and another CE occurs before 61910Sstevel@tonic-gate * the timeout triggers). So to ensure that the 61920Sstevel@tonic-gate * memory is actually scrubbed, we have to access each 61930Sstevel@tonic-gate * memory location in the range and then check whether 61940Sstevel@tonic-gate * that access causes a CE. 61950Sstevel@tonic-gate */ 61960Sstevel@tonic-gate if (flag == SCRUBBER_CEEN_CHECK && va) { 61970Sstevel@tonic-gate if ((cpu_error_regs.afar >= pa) && 61980Sstevel@tonic-gate (cpu_error_regs.afar < (pa + psz))) { 61990Sstevel@tonic-gate /* 62000Sstevel@tonic-gate * Force a load from physical memory for each 62010Sstevel@tonic-gate * 64-byte block, then check AFSR to determine 62020Sstevel@tonic-gate * whether this access caused an error. 62030Sstevel@tonic-gate * 62040Sstevel@tonic-gate * This is a slow way to do a scrub, but as it will 62050Sstevel@tonic-gate * only be invoked when the memory scrubber actually 62060Sstevel@tonic-gate * triggered a CE, it should not happen too 62070Sstevel@tonic-gate * frequently. 62080Sstevel@tonic-gate * 62090Sstevel@tonic-gate * cut down what we need to check as the scrubber 62100Sstevel@tonic-gate * has verified up to AFAR, so get it's offset 62110Sstevel@tonic-gate * into the page and start there. 62120Sstevel@tonic-gate */ 62130Sstevel@tonic-gate page_offset = (uint64_t)(cpu_error_regs.afar & 62140Sstevel@tonic-gate (psz - 1)); 62150Sstevel@tonic-gate va = (caddr_t)(va + (P2ALIGN(page_offset, 64))); 62160Sstevel@tonic-gate psz -= (uint_t)(P2ALIGN(page_offset, 64)); 62170Sstevel@tonic-gate cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)), 62180Sstevel@tonic-gate psz); 62190Sstevel@tonic-gate } 62200Sstevel@tonic-gate } 62210Sstevel@tonic-gate 62220Sstevel@tonic-gate /* 62230Sstevel@tonic-gate * Reset error enable if this CE is not masked. 62240Sstevel@tonic-gate */ 62250Sstevel@tonic-gate if ((flag == TIMEOUT_CEEN_CHECK) && 62260Sstevel@tonic-gate (cpu_error_regs.afsr & cpu_ce_not_deferred)) 62275219Skm84432 set_error_enable(ec_err_enable | EN_REG_CEEN); 62280Sstevel@tonic-gate 62290Sstevel@tonic-gate } 62300Sstevel@tonic-gate 62310Sstevel@tonic-gate /* 62320Sstevel@tonic-gate * Attempt a cpu logout for an error that we did not trap for, such 62330Sstevel@tonic-gate * as a CE noticed with CEEN off. It is assumed that we are still running 62340Sstevel@tonic-gate * on the cpu that took the error and that we cannot migrate. Returns 62350Sstevel@tonic-gate * 0 on success, otherwise nonzero. 62360Sstevel@tonic-gate */ 62370Sstevel@tonic-gate static int 62380Sstevel@tonic-gate cpu_ce_delayed_ec_logout(uint64_t afar) 62390Sstevel@tonic-gate { 62400Sstevel@tonic-gate ch_cpu_logout_t *clop; 62410Sstevel@tonic-gate 62420Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) 62430Sstevel@tonic-gate return (0); 62440Sstevel@tonic-gate 62450Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 62460Sstevel@tonic-gate if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) != 62470Sstevel@tonic-gate LOGOUT_INVALID) 62480Sstevel@tonic-gate return (0); 62490Sstevel@tonic-gate 62500Sstevel@tonic-gate cpu_delayed_logout(afar, clop); 62510Sstevel@tonic-gate return (1); 62520Sstevel@tonic-gate } 62530Sstevel@tonic-gate 62540Sstevel@tonic-gate /* 62550Sstevel@tonic-gate * We got an error while CEEN was disabled. We 62560Sstevel@tonic-gate * need to clean up after it and log whatever 62570Sstevel@tonic-gate * information we have on the CE. 62580Sstevel@tonic-gate */ 62590Sstevel@tonic-gate void 62600Sstevel@tonic-gate cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag) 62610Sstevel@tonic-gate { 62620Sstevel@tonic-gate ch_async_flt_t ch_flt; 62630Sstevel@tonic-gate struct async_flt *aflt; 62640Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING]; 62650Sstevel@tonic-gate 62660Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 62670Sstevel@tonic-gate ch_flt.flt_trapped_ce = flag; 62680Sstevel@tonic-gate aflt = (struct async_flt *)&ch_flt; 62690Sstevel@tonic-gate aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK; 62700Sstevel@tonic-gate ch_flt.afsr_ext = cpu_error_regs->afsr_ext; 62710Sstevel@tonic-gate ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) | 62720Sstevel@tonic-gate (cpu_error_regs->afsr & C_AFSR_ALL_ERRS); 62730Sstevel@tonic-gate aflt->flt_addr = cpu_error_regs->afar; 62740Sstevel@tonic-gate #if defined(SERRANO) 62750Sstevel@tonic-gate ch_flt.afar2 = cpu_error_regs->afar2; 62760Sstevel@tonic-gate #endif /* SERRANO */ 62770Sstevel@tonic-gate aflt->flt_pc = NULL; 62780Sstevel@tonic-gate aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0); 62790Sstevel@tonic-gate aflt->flt_tl = 0; 62800Sstevel@tonic-gate aflt->flt_panic = 0; 62810Sstevel@tonic-gate cpu_log_and_clear_ce(&ch_flt); 62820Sstevel@tonic-gate 62830Sstevel@tonic-gate /* 62840Sstevel@tonic-gate * check if we caused any errors during cleanup 62850Sstevel@tonic-gate */ 62860Sstevel@tonic-gate if (clear_errors(&ch_flt)) { 62870Sstevel@tonic-gate pr_reason[0] = '\0'; 62880Sstevel@tonic-gate (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 62890Sstevel@tonic-gate NULL); 62900Sstevel@tonic-gate } 62910Sstevel@tonic-gate } 62920Sstevel@tonic-gate 62930Sstevel@tonic-gate /* 62940Sstevel@tonic-gate * Log/clear CEEN-controlled disrupting errors 62950Sstevel@tonic-gate */ 62960Sstevel@tonic-gate static void 62970Sstevel@tonic-gate cpu_log_and_clear_ce(ch_async_flt_t *ch_flt) 62980Sstevel@tonic-gate { 62990Sstevel@tonic-gate struct async_flt *aflt; 63000Sstevel@tonic-gate uint64_t afsr, afsr_errs; 63010Sstevel@tonic-gate ch_cpu_logout_t *clop; 63020Sstevel@tonic-gate char pr_reason[MAX_REASON_STRING]; 63030Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap; 63040Sstevel@tonic-gate 63050Sstevel@tonic-gate aflt = (struct async_flt *)ch_flt; 63060Sstevel@tonic-gate afsr = aflt->flt_stat; 63070Sstevel@tonic-gate afsr_errs = ch_flt->afsr_errs; 63080Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 63090Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 63100Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 63110Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 63120Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 63130Sstevel@tonic-gate aflt->flt_status = ECC_C_TRAP; 63140Sstevel@tonic-gate 63150Sstevel@tonic-gate pr_reason[0] = '\0'; 63160Sstevel@tonic-gate /* 63170Sstevel@tonic-gate * Get the CPU log out info for Disrupting Trap. 63180Sstevel@tonic-gate */ 63190Sstevel@tonic-gate if (CPU_PRIVATE(CPU) == NULL) { 63200Sstevel@tonic-gate clop = NULL; 63210Sstevel@tonic-gate ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 63220Sstevel@tonic-gate } else { 63230Sstevel@tonic-gate clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 63240Sstevel@tonic-gate } 63250Sstevel@tonic-gate 63260Sstevel@tonic-gate if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) { 63270Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 63280Sstevel@tonic-gate 63290Sstevel@tonic-gate get_cpu_error_state(&cpu_error_regs); 63300Sstevel@tonic-gate (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar); 63310Sstevel@tonic-gate clop->clo_data.chd_afsr = cpu_error_regs.afsr; 63320Sstevel@tonic-gate clop->clo_data.chd_afar = cpu_error_regs.afar; 63330Sstevel@tonic-gate clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext; 63340Sstevel@tonic-gate clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr; 63350Sstevel@tonic-gate clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar; 63360Sstevel@tonic-gate clop->clo_sdw_data.chd_afsr_ext = 63370Sstevel@tonic-gate cpu_error_regs.shadow_afsr_ext; 63380Sstevel@tonic-gate #if defined(SERRANO) 63390Sstevel@tonic-gate clop->clo_data.chd_afar2 = cpu_error_regs.afar2; 63400Sstevel@tonic-gate #endif /* SERRANO */ 63410Sstevel@tonic-gate ch_flt->flt_data_incomplete = 1; 63420Sstevel@tonic-gate 63430Sstevel@tonic-gate /* 63440Sstevel@tonic-gate * The logging/clear code expects AFSR/AFAR to be cleared. 63450Sstevel@tonic-gate * The trap handler does it for CEEN enabled errors 63460Sstevel@tonic-gate * so we need to do it here. 63470Sstevel@tonic-gate */ 63480Sstevel@tonic-gate set_cpu_error_state(&cpu_error_regs); 63490Sstevel@tonic-gate } 63500Sstevel@tonic-gate 63510Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 63520Sstevel@tonic-gate /* 63530Sstevel@tonic-gate * FRC: Can't scrub memory as we don't have AFAR for Jalapeno. 63540Sstevel@tonic-gate * For Serrano, even thou we do have the AFAR, we still do the 63550Sstevel@tonic-gate * scrub on the RCE side since that's where the error type can 63560Sstevel@tonic-gate * be properly classified as intermittent, persistent, etc. 63570Sstevel@tonic-gate * 63580Sstevel@tonic-gate * CE/RCE: If error is in memory and AFAR is valid, scrub the memory. 63590Sstevel@tonic-gate * Must scrub memory before cpu_queue_events, as scrubbing memory sets 63600Sstevel@tonic-gate * the flt_status bits. 63610Sstevel@tonic-gate */ 63620Sstevel@tonic-gate if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) && 63630Sstevel@tonic-gate (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 63640Sstevel@tonic-gate cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) { 63650Sstevel@tonic-gate cpu_ce_scrub_mem_err(aflt, B_TRUE); 63660Sstevel@tonic-gate } 63670Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 63680Sstevel@tonic-gate /* 63690Sstevel@tonic-gate * CE/EMC: If error is in memory and AFAR is valid, scrub the memory. 63700Sstevel@tonic-gate * Must scrub memory before cpu_queue_events, as scrubbing memory sets 63710Sstevel@tonic-gate * the flt_status bits. 63720Sstevel@tonic-gate */ 63730Sstevel@tonic-gate if (afsr & (C_AFSR_CE|C_AFSR_EMC)) { 63740Sstevel@tonic-gate if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 63750Sstevel@tonic-gate cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) { 63760Sstevel@tonic-gate cpu_ce_scrub_mem_err(aflt, B_TRUE); 63770Sstevel@tonic-gate } 63780Sstevel@tonic-gate } 63790Sstevel@tonic-gate 63800Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 63810Sstevel@tonic-gate 63820Sstevel@tonic-gate /* 63830Sstevel@tonic-gate * Update flt_prot if this error occurred under on_trap protection. 63840Sstevel@tonic-gate */ 63850Sstevel@tonic-gate if (otp != NULL && (otp->ot_prot & OT_DATA_EC)) 63860Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC; 63870Sstevel@tonic-gate 63880Sstevel@tonic-gate /* 63890Sstevel@tonic-gate * Queue events on the async event queue, one event per error bit. 63900Sstevel@tonic-gate */ 63910Sstevel@tonic-gate if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 || 63920Sstevel@tonic-gate (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) { 63930Sstevel@tonic-gate ch_flt->flt_type = CPU_INV_AFSR; 63940Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 63950Sstevel@tonic-gate (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 63960Sstevel@tonic-gate aflt->flt_panic); 63970Sstevel@tonic-gate } 63980Sstevel@tonic-gate 63990Sstevel@tonic-gate /* 64000Sstevel@tonic-gate * Zero out + invalidate CPU logout. 64010Sstevel@tonic-gate */ 64020Sstevel@tonic-gate if (clop) { 64030Sstevel@tonic-gate bzero(clop, sizeof (ch_cpu_logout_t)); 64040Sstevel@tonic-gate clop->clo_data.chd_afar = LOGOUT_INVALID; 64050Sstevel@tonic-gate } 64060Sstevel@tonic-gate 64070Sstevel@tonic-gate /* 64080Sstevel@tonic-gate * If either a CPC, WDC or EDC error has occurred while CEEN 64090Sstevel@tonic-gate * was disabled, we need to flush either the entire 64100Sstevel@tonic-gate * E$ or an E$ line. 64110Sstevel@tonic-gate */ 64120Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 64130Sstevel@tonic-gate if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC)) 64140Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 64150Sstevel@tonic-gate if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC | 64160Sstevel@tonic-gate C_AFSR_L3_CPC | C_AFSR_L3_WDC)) 64170Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 64180Sstevel@tonic-gate cpu_error_ecache_flush(ch_flt); 64190Sstevel@tonic-gate 64200Sstevel@tonic-gate } 64210Sstevel@tonic-gate 64220Sstevel@tonic-gate /* 64230Sstevel@tonic-gate * depending on the error type, we determine whether we 64240Sstevel@tonic-gate * need to flush the entire ecache or just a line. 64250Sstevel@tonic-gate */ 64260Sstevel@tonic-gate static int 64270Sstevel@tonic-gate cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt) 64280Sstevel@tonic-gate { 64290Sstevel@tonic-gate struct async_flt *aflt; 64300Sstevel@tonic-gate uint64_t afsr; 64310Sstevel@tonic-gate uint64_t afsr_errs = ch_flt->afsr_errs; 64320Sstevel@tonic-gate 64330Sstevel@tonic-gate aflt = (struct async_flt *)ch_flt; 64340Sstevel@tonic-gate afsr = aflt->flt_stat; 64350Sstevel@tonic-gate 64360Sstevel@tonic-gate /* 64370Sstevel@tonic-gate * If we got multiple errors, no point in trying 64380Sstevel@tonic-gate * the individual cases, just flush the whole cache 64390Sstevel@tonic-gate */ 64400Sstevel@tonic-gate if (afsr & C_AFSR_ME) { 64410Sstevel@tonic-gate return (ECACHE_FLUSH_ALL); 64420Sstevel@tonic-gate } 64430Sstevel@tonic-gate 64440Sstevel@tonic-gate /* 64450Sstevel@tonic-gate * If either a CPC, WDC or EDC error has occurred while CEEN 64460Sstevel@tonic-gate * was disabled, we need to flush entire E$. We can't just 64470Sstevel@tonic-gate * flush the cache line affected as the ME bit 64480Sstevel@tonic-gate * is not set when multiple correctable errors of the same 64490Sstevel@tonic-gate * type occur, so we might have multiple CPC or EDC errors, 64500Sstevel@tonic-gate * with only the first recorded. 64510Sstevel@tonic-gate */ 64520Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 64530Sstevel@tonic-gate if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) { 64540Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 64550Sstevel@tonic-gate if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC | 64560Sstevel@tonic-gate C_AFSR_L3_EDC | C_AFSR_L3_WDC)) { 64570Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 64580Sstevel@tonic-gate return (ECACHE_FLUSH_ALL); 64590Sstevel@tonic-gate } 64600Sstevel@tonic-gate 64610Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO) 64620Sstevel@tonic-gate /* 64630Sstevel@tonic-gate * If only UE or RUE is set, flush the Ecache line, otherwise 64640Sstevel@tonic-gate * flush the entire Ecache. 64650Sstevel@tonic-gate */ 64660Sstevel@tonic-gate if (afsr & (C_AFSR_UE|C_AFSR_RUE)) { 64670Sstevel@tonic-gate if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE || 64680Sstevel@tonic-gate (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) { 64690Sstevel@tonic-gate return (ECACHE_FLUSH_LINE); 64700Sstevel@tonic-gate } else { 64710Sstevel@tonic-gate return (ECACHE_FLUSH_ALL); 64720Sstevel@tonic-gate } 64730Sstevel@tonic-gate } 64740Sstevel@tonic-gate #else /* JALAPENO || SERRANO */ 64750Sstevel@tonic-gate /* 64760Sstevel@tonic-gate * If UE only is set, flush the Ecache line, otherwise 64770Sstevel@tonic-gate * flush the entire Ecache. 64780Sstevel@tonic-gate */ 64790Sstevel@tonic-gate if (afsr_errs & C_AFSR_UE) { 64800Sstevel@tonic-gate if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == 64810Sstevel@tonic-gate C_AFSR_UE) { 64820Sstevel@tonic-gate return (ECACHE_FLUSH_LINE); 64830Sstevel@tonic-gate } else { 64840Sstevel@tonic-gate return (ECACHE_FLUSH_ALL); 64850Sstevel@tonic-gate } 64860Sstevel@tonic-gate } 64870Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */ 64880Sstevel@tonic-gate 64890Sstevel@tonic-gate /* 64900Sstevel@tonic-gate * EDU: If EDU only is set, flush the ecache line, otherwise 64910Sstevel@tonic-gate * flush the entire Ecache. 64920Sstevel@tonic-gate */ 64930Sstevel@tonic-gate if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) { 64940Sstevel@tonic-gate if (((afsr_errs & ~C_AFSR_EDU) == 0) || 64950Sstevel@tonic-gate ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) { 64960Sstevel@tonic-gate return (ECACHE_FLUSH_LINE); 64970Sstevel@tonic-gate } else { 64980Sstevel@tonic-gate return (ECACHE_FLUSH_ALL); 64990Sstevel@tonic-gate } 65000Sstevel@tonic-gate } 65010Sstevel@tonic-gate 65020Sstevel@tonic-gate /* 65030Sstevel@tonic-gate * BERR: If BERR only is set, flush the Ecache line, otherwise 65040Sstevel@tonic-gate * flush the entire Ecache. 65050Sstevel@tonic-gate */ 65060Sstevel@tonic-gate if (afsr_errs & C_AFSR_BERR) { 65070Sstevel@tonic-gate if ((afsr_errs & ~C_AFSR_BERR) == 0) { 65080Sstevel@tonic-gate return (ECACHE_FLUSH_LINE); 65090Sstevel@tonic-gate } else { 65100Sstevel@tonic-gate return (ECACHE_FLUSH_ALL); 65110Sstevel@tonic-gate } 65120Sstevel@tonic-gate } 65130Sstevel@tonic-gate 65140Sstevel@tonic-gate return (0); 65150Sstevel@tonic-gate } 65160Sstevel@tonic-gate 65170Sstevel@tonic-gate void 65180Sstevel@tonic-gate cpu_error_ecache_flush(ch_async_flt_t *ch_flt) 65190Sstevel@tonic-gate { 65200Sstevel@tonic-gate int ecache_flush_flag = 65210Sstevel@tonic-gate cpu_error_ecache_flush_required(ch_flt); 65220Sstevel@tonic-gate 65230Sstevel@tonic-gate /* 65240Sstevel@tonic-gate * Flush Ecache line or entire Ecache based on above checks. 65250Sstevel@tonic-gate */ 65260Sstevel@tonic-gate if (ecache_flush_flag == ECACHE_FLUSH_ALL) 65270Sstevel@tonic-gate cpu_flush_ecache(); 65280Sstevel@tonic-gate else if (ecache_flush_flag == ECACHE_FLUSH_LINE) { 65290Sstevel@tonic-gate cpu_flush_ecache_line(ch_flt); 65300Sstevel@tonic-gate } 65310Sstevel@tonic-gate 65320Sstevel@tonic-gate } 65330Sstevel@tonic-gate 65340Sstevel@tonic-gate /* 65350Sstevel@tonic-gate * Extract the PA portion from the E$ tag. 65360Sstevel@tonic-gate */ 65370Sstevel@tonic-gate uint64_t 65380Sstevel@tonic-gate cpu_ectag_to_pa(int setsize, uint64_t tag) 65390Sstevel@tonic-gate { 65400Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 65410Sstevel@tonic-gate return (JG_ECTAG_TO_PA(setsize, tag)); 65420Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 65430Sstevel@tonic-gate return (PN_L3TAG_TO_PA(tag)); 65440Sstevel@tonic-gate else 65450Sstevel@tonic-gate return (CH_ECTAG_TO_PA(setsize, tag)); 65460Sstevel@tonic-gate } 65470Sstevel@tonic-gate 65480Sstevel@tonic-gate /* 65490Sstevel@tonic-gate * Convert the E$ tag PA into an E$ subblock index. 65500Sstevel@tonic-gate */ 65516330Sjc25722 int 65520Sstevel@tonic-gate cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr) 65530Sstevel@tonic-gate { 65540Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 65550Sstevel@tonic-gate return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 65560Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 65570Sstevel@tonic-gate /* Panther has only one subblock per line */ 65580Sstevel@tonic-gate return (0); 65590Sstevel@tonic-gate else 65600Sstevel@tonic-gate return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 65610Sstevel@tonic-gate } 65620Sstevel@tonic-gate 65630Sstevel@tonic-gate /* 65640Sstevel@tonic-gate * All subblocks in an E$ line must be invalid for 65650Sstevel@tonic-gate * the line to be invalid. 65660Sstevel@tonic-gate */ 65670Sstevel@tonic-gate int 65680Sstevel@tonic-gate cpu_ectag_line_invalid(int cachesize, uint64_t tag) 65690Sstevel@tonic-gate { 65700Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 65710Sstevel@tonic-gate return (JG_ECTAG_LINE_INVALID(cachesize, tag)); 65720Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 65730Sstevel@tonic-gate return (PN_L3_LINE_INVALID(tag)); 65740Sstevel@tonic-gate else 65750Sstevel@tonic-gate return (CH_ECTAG_LINE_INVALID(cachesize, tag)); 65760Sstevel@tonic-gate } 65770Sstevel@tonic-gate 65780Sstevel@tonic-gate /* 65790Sstevel@tonic-gate * Extract state bits for a subblock given the tag. Note that for Panther 65800Sstevel@tonic-gate * this works on both l2 and l3 tags. 65810Sstevel@tonic-gate */ 65826330Sjc25722 int 65830Sstevel@tonic-gate cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag) 65840Sstevel@tonic-gate { 65850Sstevel@tonic-gate if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 65860Sstevel@tonic-gate return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 65870Sstevel@tonic-gate else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 65880Sstevel@tonic-gate return (tag & CH_ECSTATE_MASK); 65890Sstevel@tonic-gate else 65900Sstevel@tonic-gate return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 65910Sstevel@tonic-gate } 65920Sstevel@tonic-gate 65930Sstevel@tonic-gate /* 65940Sstevel@tonic-gate * Cpu specific initialization. 65950Sstevel@tonic-gate */ 65960Sstevel@tonic-gate void 65970Sstevel@tonic-gate cpu_mp_init(void) 65980Sstevel@tonic-gate { 65990Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25 66000Sstevel@tonic-gate if (cheetah_sendmondo_recover) { 66010Sstevel@tonic-gate cheetah_nudge_init(); 66020Sstevel@tonic-gate } 66030Sstevel@tonic-gate #endif 66040Sstevel@tonic-gate } 66050Sstevel@tonic-gate 66060Sstevel@tonic-gate void 66070Sstevel@tonic-gate cpu_ereport_post(struct async_flt *aflt) 66080Sstevel@tonic-gate { 66090Sstevel@tonic-gate char *cpu_type, buf[FM_MAX_CLASS]; 66100Sstevel@tonic-gate nv_alloc_t *nva = NULL; 66110Sstevel@tonic-gate nvlist_t *ereport, *detector, *resource; 66120Sstevel@tonic-gate errorq_elem_t *eqep; 66130Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 66140Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 66152436Smb91622 int synd_code; 66162381Smikechr uint8_t msg_type; 66170Sstevel@tonic-gate plat_ecc_ch_async_flt_t plat_ecc_ch_flt; 66180Sstevel@tonic-gate 66190Sstevel@tonic-gate if (aflt->flt_panic || panicstr) { 66200Sstevel@tonic-gate eqep = errorq_reserve(ereport_errorq); 66210Sstevel@tonic-gate if (eqep == NULL) 66220Sstevel@tonic-gate return; 66230Sstevel@tonic-gate ereport = errorq_elem_nvl(ereport_errorq, eqep); 66240Sstevel@tonic-gate nva = errorq_elem_nva(ereport_errorq, eqep); 66250Sstevel@tonic-gate } else { 66260Sstevel@tonic-gate ereport = fm_nvlist_create(nva); 66270Sstevel@tonic-gate } 66280Sstevel@tonic-gate 66290Sstevel@tonic-gate /* 66300Sstevel@tonic-gate * Create the scheme "cpu" FMRI. 66310Sstevel@tonic-gate */ 66320Sstevel@tonic-gate detector = fm_nvlist_create(nva); 66330Sstevel@tonic-gate resource = fm_nvlist_create(nva); 66340Sstevel@tonic-gate switch (cpunodes[aflt->flt_inst].implementation) { 66350Sstevel@tonic-gate case CHEETAH_IMPL: 66360Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIII; 66370Sstevel@tonic-gate break; 66380Sstevel@tonic-gate case CHEETAH_PLUS_IMPL: 66390Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIIIplus; 66400Sstevel@tonic-gate break; 66410Sstevel@tonic-gate case JALAPENO_IMPL: 66420Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIIIi; 66430Sstevel@tonic-gate break; 66440Sstevel@tonic-gate case SERRANO_IMPL: 66450Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIIIiplus; 66460Sstevel@tonic-gate break; 66470Sstevel@tonic-gate case JAGUAR_IMPL: 66480Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIV; 66490Sstevel@tonic-gate break; 66500Sstevel@tonic-gate case PANTHER_IMPL: 66510Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_USIVplus; 66520Sstevel@tonic-gate break; 66530Sstevel@tonic-gate default: 66540Sstevel@tonic-gate cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 66550Sstevel@tonic-gate break; 66560Sstevel@tonic-gate } 66572381Smikechr 66582381Smikechr cpu_fmri_cpu_set(detector, aflt->flt_inst); 66590Sstevel@tonic-gate 66600Sstevel@tonic-gate /* 66610Sstevel@tonic-gate * Encode all the common data into the ereport. 66620Sstevel@tonic-gate */ 66630Sstevel@tonic-gate (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 66645219Skm84432 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 66650Sstevel@tonic-gate 66660Sstevel@tonic-gate fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 66670Sstevel@tonic-gate fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1), 66680Sstevel@tonic-gate detector, NULL); 66690Sstevel@tonic-gate 66700Sstevel@tonic-gate /* 66710Sstevel@tonic-gate * Encode the error specific data that was saved in 66720Sstevel@tonic-gate * the async_flt structure into the ereport. 66730Sstevel@tonic-gate */ 66740Sstevel@tonic-gate cpu_payload_add_aflt(aflt, ereport, resource, 66750Sstevel@tonic-gate &plat_ecc_ch_flt.ecaf_afar_status, 66760Sstevel@tonic-gate &plat_ecc_ch_flt.ecaf_synd_status); 66770Sstevel@tonic-gate 66780Sstevel@tonic-gate if (aflt->flt_panic || panicstr) { 66790Sstevel@tonic-gate errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 66800Sstevel@tonic-gate } else { 66810Sstevel@tonic-gate (void) fm_ereport_post(ereport, EVCH_TRYHARD); 66820Sstevel@tonic-gate fm_nvlist_destroy(ereport, FM_NVA_FREE); 66830Sstevel@tonic-gate fm_nvlist_destroy(detector, FM_NVA_FREE); 66840Sstevel@tonic-gate fm_nvlist_destroy(resource, FM_NVA_FREE); 66850Sstevel@tonic-gate } 66860Sstevel@tonic-gate /* 66870Sstevel@tonic-gate * Send the enhanced error information (plat_ecc_error2_data_t) 66880Sstevel@tonic-gate * to the SC olny if it can process it. 66890Sstevel@tonic-gate */ 66900Sstevel@tonic-gate 66910Sstevel@tonic-gate if (&plat_ecc_capability_sc_get && 66920Sstevel@tonic-gate plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) { 66930Sstevel@tonic-gate msg_type = cpu_flt_bit_to_plat_error(aflt); 66940Sstevel@tonic-gate if (msg_type != PLAT_ECC_ERROR2_NONE) { 66950Sstevel@tonic-gate /* 66960Sstevel@tonic-gate * If afar status is not invalid do a unum lookup. 66970Sstevel@tonic-gate */ 66980Sstevel@tonic-gate if (plat_ecc_ch_flt.ecaf_afar_status != 66990Sstevel@tonic-gate AFLT_STAT_INVALID) { 67002436Smb91622 synd_code = synd_to_synd_code( 67012436Smb91622 plat_ecc_ch_flt.ecaf_synd_status, 67022436Smb91622 aflt->flt_synd, ch_flt->flt_bit); 67032436Smb91622 (void) cpu_get_mem_unum_synd(synd_code, 67042436Smb91622 aflt, unum); 67050Sstevel@tonic-gate } else { 67060Sstevel@tonic-gate unum[0] = '\0'; 67070Sstevel@tonic-gate } 67080Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar; 67090Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr; 67100Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext; 67110Sstevel@tonic-gate plat_ecc_ch_flt.ecaf_sdw_afsr_ext = 67120Sstevel@tonic-gate ch_flt->flt_sdw_afsr_ext; 67130Sstevel@tonic-gate 67140Sstevel@tonic-gate if (&plat_log_fruid_error2) 67150Sstevel@tonic-gate plat_log_fruid_error2(msg_type, unum, aflt, 67160Sstevel@tonic-gate &plat_ecc_ch_flt); 67170Sstevel@tonic-gate } 67180Sstevel@tonic-gate } 67190Sstevel@tonic-gate } 67200Sstevel@tonic-gate 67210Sstevel@tonic-gate void 67220Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 67230Sstevel@tonic-gate { 67240Sstevel@tonic-gate int status; 67250Sstevel@tonic-gate ddi_fm_error_t de; 67260Sstevel@tonic-gate 67270Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t)); 67280Sstevel@tonic-gate 67290Sstevel@tonic-gate de.fme_version = DDI_FME_VERSION; 67300Sstevel@tonic-gate de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 67310Sstevel@tonic-gate FM_ENA_FMT1); 67320Sstevel@tonic-gate de.fme_flag = expected; 67330Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr; 67340Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 67350Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 67360Sstevel@tonic-gate aflt->flt_panic = 1; 67370Sstevel@tonic-gate } 67380Sstevel@tonic-gate 67390Sstevel@tonic-gate void 67400Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 67410Sstevel@tonic-gate errorq_t *eqp, uint_t flag) 67420Sstevel@tonic-gate { 67430Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)payload; 67440Sstevel@tonic-gate 67450Sstevel@tonic-gate aflt->flt_erpt_class = error_class; 67460Sstevel@tonic-gate errorq_dispatch(eqp, payload, payload_sz, flag); 67470Sstevel@tonic-gate } 67480Sstevel@tonic-gate 67490Sstevel@tonic-gate /* 67500Sstevel@tonic-gate * This routine may be called by the IO module, but does not do 67510Sstevel@tonic-gate * anything in this cpu module. The SERD algorithm is handled by 67520Sstevel@tonic-gate * cpumem-diagnosis engine instead. 67530Sstevel@tonic-gate */ 67540Sstevel@tonic-gate /*ARGSUSED*/ 67550Sstevel@tonic-gate void 67560Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 67570Sstevel@tonic-gate {} 67580Sstevel@tonic-gate 67590Sstevel@tonic-gate void 67600Sstevel@tonic-gate adjust_hw_copy_limits(int ecache_size) 67610Sstevel@tonic-gate { 67620Sstevel@tonic-gate /* 67630Sstevel@tonic-gate * Set hw copy limits. 67640Sstevel@tonic-gate * 67650Sstevel@tonic-gate * /etc/system will be parsed later and can override one or more 67660Sstevel@tonic-gate * of these settings. 67670Sstevel@tonic-gate * 67680Sstevel@tonic-gate * At this time, ecache size seems only mildly relevant. 67690Sstevel@tonic-gate * We seem to run into issues with the d-cache and stalls 67700Sstevel@tonic-gate * we see on misses. 67710Sstevel@tonic-gate * 67720Sstevel@tonic-gate * Cycle measurement indicates that 2 byte aligned copies fare 67730Sstevel@tonic-gate * little better than doing things with VIS at around 512 bytes. 67740Sstevel@tonic-gate * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 67750Sstevel@tonic-gate * aligned is faster whenever the source and destination data 67760Sstevel@tonic-gate * in cache and the total size is less than 2 Kbytes. The 2K 67770Sstevel@tonic-gate * limit seems to be driven by the 2K write cache. 67780Sstevel@tonic-gate * When more than 2K of copies are done in non-VIS mode, stores 67790Sstevel@tonic-gate * backup in the write cache. In VIS mode, the write cache is 67800Sstevel@tonic-gate * bypassed, allowing faster cache-line writes aligned on cache 67810Sstevel@tonic-gate * boundaries. 67820Sstevel@tonic-gate * 67830Sstevel@tonic-gate * In addition, in non-VIS mode, there is no prefetching, so 67840Sstevel@tonic-gate * for larger copies, the advantage of prefetching to avoid even 67850Sstevel@tonic-gate * occasional cache misses is enough to justify using the VIS code. 67860Sstevel@tonic-gate * 67870Sstevel@tonic-gate * During testing, it was discovered that netbench ran 3% slower 67880Sstevel@tonic-gate * when hw_copy_limit_8 was 2K or larger. Apparently for server 67890Sstevel@tonic-gate * applications, data is only used once (copied to the output 67900Sstevel@tonic-gate * buffer, then copied by the network device off the system). Using 67910Sstevel@tonic-gate * the VIS copy saves more L2 cache state. Network copies are 67920Sstevel@tonic-gate * around 1.3K to 1.5K in size for historical reasons. 67930Sstevel@tonic-gate * 67940Sstevel@tonic-gate * Therefore, a limit of 1K bytes will be used for the 8 byte 67950Sstevel@tonic-gate * aligned copy even for large caches and 8 MB ecache. The 67960Sstevel@tonic-gate * infrastructure to allow different limits for different sized 67970Sstevel@tonic-gate * caches is kept to allow further tuning in later releases. 67980Sstevel@tonic-gate */ 67990Sstevel@tonic-gate 68000Sstevel@tonic-gate if (min_ecache_size == 0 && use_hw_bcopy) { 68010Sstevel@tonic-gate /* 68020Sstevel@tonic-gate * First time through - should be before /etc/system 68030Sstevel@tonic-gate * is read. 68040Sstevel@tonic-gate * Could skip the checks for zero but this lets us 68050Sstevel@tonic-gate * preserve any debugger rewrites. 68060Sstevel@tonic-gate */ 68070Sstevel@tonic-gate if (hw_copy_limit_1 == 0) { 68080Sstevel@tonic-gate hw_copy_limit_1 = VIS_COPY_THRESHOLD; 68090Sstevel@tonic-gate priv_hcl_1 = hw_copy_limit_1; 68100Sstevel@tonic-gate } 68110Sstevel@tonic-gate if (hw_copy_limit_2 == 0) { 68120Sstevel@tonic-gate hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 68130Sstevel@tonic-gate priv_hcl_2 = hw_copy_limit_2; 68140Sstevel@tonic-gate } 68150Sstevel@tonic-gate if (hw_copy_limit_4 == 0) { 68160Sstevel@tonic-gate hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 68170Sstevel@tonic-gate priv_hcl_4 = hw_copy_limit_4; 68180Sstevel@tonic-gate } 68190Sstevel@tonic-gate if (hw_copy_limit_8 == 0) { 68200Sstevel@tonic-gate hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 68210Sstevel@tonic-gate priv_hcl_8 = hw_copy_limit_8; 68220Sstevel@tonic-gate } 68230Sstevel@tonic-gate min_ecache_size = ecache_size; 68240Sstevel@tonic-gate } else { 68250Sstevel@tonic-gate /* 68260Sstevel@tonic-gate * MP initialization. Called *after* /etc/system has 68270Sstevel@tonic-gate * been parsed. One CPU has already been initialized. 68280Sstevel@tonic-gate * Need to cater for /etc/system having scragged one 68290Sstevel@tonic-gate * of our values. 68300Sstevel@tonic-gate */ 68310Sstevel@tonic-gate if (ecache_size == min_ecache_size) { 68320Sstevel@tonic-gate /* 68330Sstevel@tonic-gate * Same size ecache. We do nothing unless we 68340Sstevel@tonic-gate * have a pessimistic ecache setting. In that 68350Sstevel@tonic-gate * case we become more optimistic (if the cache is 68360Sstevel@tonic-gate * large enough). 68370Sstevel@tonic-gate */ 68380Sstevel@tonic-gate if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 68390Sstevel@tonic-gate /* 68400Sstevel@tonic-gate * Need to adjust hw_copy_limit* from our 68410Sstevel@tonic-gate * pessimistic uniprocessor value to a more 68420Sstevel@tonic-gate * optimistic UP value *iff* it hasn't been 68430Sstevel@tonic-gate * reset. 68440Sstevel@tonic-gate */ 68450Sstevel@tonic-gate if ((ecache_size > 1048576) && 68460Sstevel@tonic-gate (priv_hcl_8 == hw_copy_limit_8)) { 68470Sstevel@tonic-gate if (ecache_size <= 2097152) 68480Sstevel@tonic-gate hw_copy_limit_8 = 4 * 68490Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68500Sstevel@tonic-gate else if (ecache_size <= 4194304) 68510Sstevel@tonic-gate hw_copy_limit_8 = 4 * 68520Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68530Sstevel@tonic-gate else 68540Sstevel@tonic-gate hw_copy_limit_8 = 4 * 68550Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68560Sstevel@tonic-gate priv_hcl_8 = hw_copy_limit_8; 68570Sstevel@tonic-gate } 68580Sstevel@tonic-gate } 68590Sstevel@tonic-gate } else if (ecache_size < min_ecache_size) { 68600Sstevel@tonic-gate /* 68610Sstevel@tonic-gate * A different ecache size. Can this even happen? 68620Sstevel@tonic-gate */ 68630Sstevel@tonic-gate if (priv_hcl_8 == hw_copy_limit_8) { 68640Sstevel@tonic-gate /* 68650Sstevel@tonic-gate * The previous value that we set 68660Sstevel@tonic-gate * is unchanged (i.e., it hasn't been 68670Sstevel@tonic-gate * scragged by /etc/system). Rewrite it. 68680Sstevel@tonic-gate */ 68690Sstevel@tonic-gate if (ecache_size <= 1048576) 68700Sstevel@tonic-gate hw_copy_limit_8 = 8 * 68710Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68720Sstevel@tonic-gate else if (ecache_size <= 2097152) 68730Sstevel@tonic-gate hw_copy_limit_8 = 8 * 68740Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68750Sstevel@tonic-gate else if (ecache_size <= 4194304) 68760Sstevel@tonic-gate hw_copy_limit_8 = 8 * 68770Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68780Sstevel@tonic-gate else 68790Sstevel@tonic-gate hw_copy_limit_8 = 10 * 68800Sstevel@tonic-gate VIS_COPY_THRESHOLD; 68810Sstevel@tonic-gate priv_hcl_8 = hw_copy_limit_8; 68820Sstevel@tonic-gate min_ecache_size = ecache_size; 68830Sstevel@tonic-gate } 68840Sstevel@tonic-gate } 68850Sstevel@tonic-gate } 68860Sstevel@tonic-gate } 68870Sstevel@tonic-gate 68880Sstevel@tonic-gate /* 68890Sstevel@tonic-gate * Called from illegal instruction trap handler to see if we can attribute 68900Sstevel@tonic-gate * the trap to a fpras check. 68910Sstevel@tonic-gate */ 68920Sstevel@tonic-gate int 68930Sstevel@tonic-gate fpras_chktrap(struct regs *rp) 68940Sstevel@tonic-gate { 68950Sstevel@tonic-gate int op; 68960Sstevel@tonic-gate struct fpras_chkfngrp *cgp; 68970Sstevel@tonic-gate uintptr_t tpc = (uintptr_t)rp->r_pc; 68980Sstevel@tonic-gate 68990Sstevel@tonic-gate if (fpras_chkfngrps == NULL) 69000Sstevel@tonic-gate return (0); 69010Sstevel@tonic-gate 69020Sstevel@tonic-gate cgp = &fpras_chkfngrps[CPU->cpu_id]; 69030Sstevel@tonic-gate for (op = 0; op < FPRAS_NCOPYOPS; ++op) { 69040Sstevel@tonic-gate if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 && 69050Sstevel@tonic-gate tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult) 69060Sstevel@tonic-gate break; 69070Sstevel@tonic-gate } 69080Sstevel@tonic-gate if (op == FPRAS_NCOPYOPS) 69090Sstevel@tonic-gate return (0); 69100Sstevel@tonic-gate 69110Sstevel@tonic-gate /* 69120Sstevel@tonic-gate * This is an fpRAS failure caught through an illegal 69130Sstevel@tonic-gate * instruction - trampoline. 69140Sstevel@tonic-gate */ 69150Sstevel@tonic-gate rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline; 69160Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 69170Sstevel@tonic-gate return (1); 69180Sstevel@tonic-gate } 69190Sstevel@tonic-gate 69200Sstevel@tonic-gate /* 69210Sstevel@tonic-gate * fpras_failure is called when a fpras check detects a bad calculation 69220Sstevel@tonic-gate * result or an illegal instruction trap is attributed to an fpras 69230Sstevel@tonic-gate * check. In all cases we are still bound to CPU. 69240Sstevel@tonic-gate */ 69250Sstevel@tonic-gate int 69260Sstevel@tonic-gate fpras_failure(int op, int how) 69270Sstevel@tonic-gate { 69280Sstevel@tonic-gate int use_hw_bcopy_orig, use_hw_bzero_orig; 69290Sstevel@tonic-gate uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig; 69300Sstevel@tonic-gate ch_async_flt_t ch_flt; 69310Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)&ch_flt; 69320Sstevel@tonic-gate struct fpras_chkfn *sfp, *cfp; 69330Sstevel@tonic-gate uint32_t *sip, *cip; 69340Sstevel@tonic-gate int i; 69350Sstevel@tonic-gate 69360Sstevel@tonic-gate /* 69370Sstevel@tonic-gate * We're running on a sick CPU. Avoid further FPU use at least for 69380Sstevel@tonic-gate * the time in which we dispatch an ereport and (if applicable) panic. 69390Sstevel@tonic-gate */ 69400Sstevel@tonic-gate use_hw_bcopy_orig = use_hw_bcopy; 69410Sstevel@tonic-gate use_hw_bzero_orig = use_hw_bzero; 69420Sstevel@tonic-gate hcl1_orig = hw_copy_limit_1; 69430Sstevel@tonic-gate hcl2_orig = hw_copy_limit_2; 69440Sstevel@tonic-gate hcl4_orig = hw_copy_limit_4; 69450Sstevel@tonic-gate hcl8_orig = hw_copy_limit_8; 69460Sstevel@tonic-gate use_hw_bcopy = use_hw_bzero = 0; 69470Sstevel@tonic-gate hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 = 69480Sstevel@tonic-gate hw_copy_limit_8 = 0; 69490Sstevel@tonic-gate 69500Sstevel@tonic-gate bzero(&ch_flt, sizeof (ch_async_flt_t)); 69510Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 69520Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 69530Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 69540Sstevel@tonic-gate aflt->flt_status = (how << 8) | op; 69550Sstevel@tonic-gate aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY; 69560Sstevel@tonic-gate ch_flt.flt_type = CPU_FPUERR; 69570Sstevel@tonic-gate 69580Sstevel@tonic-gate /* 69590Sstevel@tonic-gate * We must panic if the copy operation had no lofault protection - 69600Sstevel@tonic-gate * ie, don't panic for copyin, copyout, kcopy and bcopy called 69610Sstevel@tonic-gate * under on_fault and do panic for unprotected bcopy and hwblkpagecopy. 69620Sstevel@tonic-gate */ 69630Sstevel@tonic-gate aflt->flt_panic = (curthread->t_lofault == NULL); 69640Sstevel@tonic-gate 69650Sstevel@tonic-gate /* 69660Sstevel@tonic-gate * XOR the source instruction block with the copied instruction 69670Sstevel@tonic-gate * block - this will show us which bit(s) are corrupted. 69680Sstevel@tonic-gate */ 69690Sstevel@tonic-gate sfp = (struct fpras_chkfn *)fpras_chkfn_type1; 69700Sstevel@tonic-gate cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op]; 69710Sstevel@tonic-gate if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) { 69720Sstevel@tonic-gate sip = &sfp->fpras_blk0[0]; 69730Sstevel@tonic-gate cip = &cfp->fpras_blk0[0]; 69740Sstevel@tonic-gate } else { 69750Sstevel@tonic-gate sip = &sfp->fpras_blk1[0]; 69760Sstevel@tonic-gate cip = &cfp->fpras_blk1[0]; 69770Sstevel@tonic-gate } 69780Sstevel@tonic-gate for (i = 0; i < 16; ++i, ++sip, ++cip) 69790Sstevel@tonic-gate ch_flt.flt_fpdata[i] = *sip ^ *cip; 69800Sstevel@tonic-gate 69810Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt, 69820Sstevel@tonic-gate sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 69830Sstevel@tonic-gate 69840Sstevel@tonic-gate if (aflt->flt_panic) 69850Sstevel@tonic-gate fm_panic("FPU failure on CPU %d", CPU->cpu_id); 69860Sstevel@tonic-gate 69870Sstevel@tonic-gate /* 69880Sstevel@tonic-gate * We get here for copyin/copyout and kcopy or bcopy where the 69890Sstevel@tonic-gate * caller has used on_fault. We will flag the error so that 69900Sstevel@tonic-gate * the process may be killed The trap_async_hwerr mechanism will 69910Sstevel@tonic-gate * take appropriate further action (such as a reboot, contract 69920Sstevel@tonic-gate * notification etc). Since we may be continuing we will 69930Sstevel@tonic-gate * restore the global hardware copy acceleration switches. 69940Sstevel@tonic-gate * 69950Sstevel@tonic-gate * When we return from this function to the copy function we want to 69960Sstevel@tonic-gate * avoid potentially bad data being used, ie we want the affected 69970Sstevel@tonic-gate * copy function to return an error. The caller should therefore 69980Sstevel@tonic-gate * invoke its lofault handler (which always exists for these functions) 69990Sstevel@tonic-gate * which will return the appropriate error. 70000Sstevel@tonic-gate */ 70010Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR; 70020Sstevel@tonic-gate aston(curthread); 70030Sstevel@tonic-gate 70040Sstevel@tonic-gate use_hw_bcopy = use_hw_bcopy_orig; 70050Sstevel@tonic-gate use_hw_bzero = use_hw_bzero_orig; 70060Sstevel@tonic-gate hw_copy_limit_1 = hcl1_orig; 70070Sstevel@tonic-gate hw_copy_limit_2 = hcl2_orig; 70080Sstevel@tonic-gate hw_copy_limit_4 = hcl4_orig; 70090Sstevel@tonic-gate hw_copy_limit_8 = hcl8_orig; 70100Sstevel@tonic-gate 70110Sstevel@tonic-gate return (1); 70120Sstevel@tonic-gate } 70130Sstevel@tonic-gate 70140Sstevel@tonic-gate #define VIS_BLOCKSIZE 64 70150Sstevel@tonic-gate 70160Sstevel@tonic-gate int 70170Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 70180Sstevel@tonic-gate { 70190Sstevel@tonic-gate int ret, watched; 70200Sstevel@tonic-gate 70210Sstevel@tonic-gate watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 70220Sstevel@tonic-gate ret = dtrace_blksuword32(addr, data, 0); 70230Sstevel@tonic-gate if (watched) 70240Sstevel@tonic-gate watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 70250Sstevel@tonic-gate 70260Sstevel@tonic-gate return (ret); 70270Sstevel@tonic-gate } 70280Sstevel@tonic-gate 70290Sstevel@tonic-gate /* 70300Sstevel@tonic-gate * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the 70310Sstevel@tonic-gate * faulted cpu into that state). Cross-trap to the faulted cpu to clear 70320Sstevel@tonic-gate * CEEN from the EER to disable traps for further disrupting error types 70330Sstevel@tonic-gate * on that cpu. We could cross-call instead, but that has a larger 70340Sstevel@tonic-gate * instruction and data footprint than cross-trapping, and the cpu is known 70350Sstevel@tonic-gate * to be faulted. 70360Sstevel@tonic-gate */ 70370Sstevel@tonic-gate 70380Sstevel@tonic-gate void 70390Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp) 70400Sstevel@tonic-gate { 70410Sstevel@tonic-gate xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS); 70420Sstevel@tonic-gate } 70430Sstevel@tonic-gate 70440Sstevel@tonic-gate /* 70450Sstevel@tonic-gate * Called when a cpu leaves the CPU_FAULTED state to return to one of 70460Sstevel@tonic-gate * offline, spare, or online (by the cpu requesting this state change). 70470Sstevel@tonic-gate * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of 70480Sstevel@tonic-gate * disrupting error bits that have accumulated without trapping, then 70490Sstevel@tonic-gate * we cross-trap to re-enable CEEN controlled traps. 70500Sstevel@tonic-gate */ 70510Sstevel@tonic-gate void 70520Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp) 70530Sstevel@tonic-gate { 70540Sstevel@tonic-gate ch_cpu_errors_t cpu_error_regs; 70550Sstevel@tonic-gate 70560Sstevel@tonic-gate cpu_error_regs.afsr = C_AFSR_CECC_ERRS; 70570Sstevel@tonic-gate if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) 70580Sstevel@tonic-gate cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS; 70590Sstevel@tonic-gate xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state, 70600Sstevel@tonic-gate (uint64_t)&cpu_error_regs, 0); 70610Sstevel@tonic-gate 70620Sstevel@tonic-gate xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS); 70630Sstevel@tonic-gate } 70640Sstevel@tonic-gate 70650Sstevel@tonic-gate /* 70660Sstevel@tonic-gate * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by 70670Sstevel@tonic-gate * the errors in the original AFSR, 0 otherwise. 70680Sstevel@tonic-gate * 70690Sstevel@tonic-gate * For all procs if the initial error was a BERR or TO, then it is possible 70700Sstevel@tonic-gate * that we may have caused a secondary BERR or TO in the process of logging the 70710Sstevel@tonic-gate * inital error via cpu_run_bus_error_handlers(). If this is the case then 70720Sstevel@tonic-gate * if the request was protected then a panic is still not necessary, if not 70730Sstevel@tonic-gate * protected then aft_panic is already set - so either way there's no need 70740Sstevel@tonic-gate * to set aft_panic for the secondary error. 70750Sstevel@tonic-gate * 70760Sstevel@tonic-gate * For Cheetah and Jalapeno, if the original error was a UE which occurred on 70770Sstevel@tonic-gate * a store merge, then the error handling code will call cpu_deferred_error(). 70780Sstevel@tonic-gate * When clear_errors() is called, it will determine that secondary errors have 70790Sstevel@tonic-gate * occurred - in particular, the store merge also caused a EDU and WDU that 70800Sstevel@tonic-gate * weren't discovered until this point. 70810Sstevel@tonic-gate * 70820Sstevel@tonic-gate * We do three checks to verify that we are in this case. If we pass all three 70830Sstevel@tonic-gate * checks, we return 1 to indicate that we should not panic. If any unexpected 70840Sstevel@tonic-gate * errors occur, we return 0. 70850Sstevel@tonic-gate * 70860Sstevel@tonic-gate * For Cheetah+ and derivative procs, the store merge causes a DUE, which is 70870Sstevel@tonic-gate * handled in cpu_disrupting_errors(). Since this function is not even called 70880Sstevel@tonic-gate * in the case we are interested in, we just return 0 for these processors. 70890Sstevel@tonic-gate */ 70900Sstevel@tonic-gate /*ARGSUSED*/ 70910Sstevel@tonic-gate static int 70920Sstevel@tonic-gate cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs, 70930Sstevel@tonic-gate uint64_t t_afar) 70940Sstevel@tonic-gate { 70950Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 70960Sstevel@tonic-gate #else /* CHEETAH_PLUS */ 70970Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)ch_flt; 70980Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 70990Sstevel@tonic-gate 71000Sstevel@tonic-gate /* 71010Sstevel@tonic-gate * Was the original error a BERR or TO and only a BERR or TO 71020Sstevel@tonic-gate * (multiple errors are also OK) 71030Sstevel@tonic-gate */ 71040Sstevel@tonic-gate if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) { 71050Sstevel@tonic-gate /* 71060Sstevel@tonic-gate * Is the new error a BERR or TO and only a BERR or TO 71070Sstevel@tonic-gate * (multiple errors are also OK) 71080Sstevel@tonic-gate */ 71090Sstevel@tonic-gate if ((ch_flt->afsr_errs & 71100Sstevel@tonic-gate ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) 71110Sstevel@tonic-gate return (1); 71120Sstevel@tonic-gate } 71130Sstevel@tonic-gate 71140Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 71150Sstevel@tonic-gate return (0); 71160Sstevel@tonic-gate #else /* CHEETAH_PLUS */ 71170Sstevel@tonic-gate /* 71180Sstevel@tonic-gate * Now look for secondary effects of a UE on cheetah/jalapeno 71190Sstevel@tonic-gate * 71200Sstevel@tonic-gate * Check the original error was a UE, and only a UE. Note that 71210Sstevel@tonic-gate * the ME bit will cause us to fail this check. 71220Sstevel@tonic-gate */ 71230Sstevel@tonic-gate if (t_afsr_errs != C_AFSR_UE) 71240Sstevel@tonic-gate return (0); 71250Sstevel@tonic-gate 71260Sstevel@tonic-gate /* 71270Sstevel@tonic-gate * Check the secondary errors were exclusively an EDU and/or WDU. 71280Sstevel@tonic-gate */ 71290Sstevel@tonic-gate if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0) 71300Sstevel@tonic-gate return (0); 71310Sstevel@tonic-gate 71320Sstevel@tonic-gate /* 71330Sstevel@tonic-gate * Check the AFAR of the original error and secondary errors 71340Sstevel@tonic-gate * match to the 64-byte boundary 71350Sstevel@tonic-gate */ 71360Sstevel@tonic-gate if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64)) 71370Sstevel@tonic-gate return (0); 71380Sstevel@tonic-gate 71390Sstevel@tonic-gate /* 71400Sstevel@tonic-gate * We've passed all the checks, so it's a secondary error! 71410Sstevel@tonic-gate */ 71420Sstevel@tonic-gate return (1); 71430Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 71440Sstevel@tonic-gate } 71450Sstevel@tonic-gate 71460Sstevel@tonic-gate /* 71470Sstevel@tonic-gate * Translate the flt_bit or flt_type into an error type. First, flt_bit 71480Sstevel@tonic-gate * is checked for any valid errors. If found, the error type is 71490Sstevel@tonic-gate * returned. If not found, the flt_type is checked for L1$ parity errors. 71500Sstevel@tonic-gate */ 71510Sstevel@tonic-gate /*ARGSUSED*/ 71520Sstevel@tonic-gate static uint8_t 71530Sstevel@tonic-gate cpu_flt_bit_to_plat_error(struct async_flt *aflt) 71540Sstevel@tonic-gate { 71550Sstevel@tonic-gate #if defined(JALAPENO) 71560Sstevel@tonic-gate /* 71570Sstevel@tonic-gate * Currently, logging errors to the SC is not supported on Jalapeno 71580Sstevel@tonic-gate */ 71590Sstevel@tonic-gate return (PLAT_ECC_ERROR2_NONE); 71600Sstevel@tonic-gate #else 71610Sstevel@tonic-gate ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 71620Sstevel@tonic-gate 71630Sstevel@tonic-gate switch (ch_flt->flt_bit) { 71640Sstevel@tonic-gate case C_AFSR_CE: 71650Sstevel@tonic-gate return (PLAT_ECC_ERROR2_CE); 71660Sstevel@tonic-gate case C_AFSR_UCC: 71670Sstevel@tonic-gate case C_AFSR_EDC: 71680Sstevel@tonic-gate case C_AFSR_WDC: 71690Sstevel@tonic-gate case C_AFSR_CPC: 71700Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_CE); 71710Sstevel@tonic-gate case C_AFSR_EMC: 71720Sstevel@tonic-gate return (PLAT_ECC_ERROR2_EMC); 71730Sstevel@tonic-gate case C_AFSR_IVC: 71740Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IVC); 71750Sstevel@tonic-gate case C_AFSR_UE: 71760Sstevel@tonic-gate return (PLAT_ECC_ERROR2_UE); 71770Sstevel@tonic-gate case C_AFSR_UCU: 71780Sstevel@tonic-gate case C_AFSR_EDU: 71790Sstevel@tonic-gate case C_AFSR_WDU: 71800Sstevel@tonic-gate case C_AFSR_CPU: 71810Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_UE); 71820Sstevel@tonic-gate case C_AFSR_IVU: 71830Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IVU); 71840Sstevel@tonic-gate case C_AFSR_TO: 71850Sstevel@tonic-gate return (PLAT_ECC_ERROR2_TO); 71860Sstevel@tonic-gate case C_AFSR_BERR: 71870Sstevel@tonic-gate return (PLAT_ECC_ERROR2_BERR); 71880Sstevel@tonic-gate #if defined(CHEETAH_PLUS) 71890Sstevel@tonic-gate case C_AFSR_L3_EDC: 71900Sstevel@tonic-gate case C_AFSR_L3_UCC: 71910Sstevel@tonic-gate case C_AFSR_L3_CPC: 71920Sstevel@tonic-gate case C_AFSR_L3_WDC: 71930Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_CE); 71940Sstevel@tonic-gate case C_AFSR_IMC: 71950Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IMC); 71960Sstevel@tonic-gate case C_AFSR_TSCE: 71970Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_TSCE); 71980Sstevel@tonic-gate case C_AFSR_THCE: 71990Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L2_THCE); 72000Sstevel@tonic-gate case C_AFSR_L3_MECC: 72010Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_MECC); 72020Sstevel@tonic-gate case C_AFSR_L3_THCE: 72030Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_THCE); 72040Sstevel@tonic-gate case C_AFSR_L3_CPU: 72050Sstevel@tonic-gate case C_AFSR_L3_EDU: 72060Sstevel@tonic-gate case C_AFSR_L3_UCU: 72070Sstevel@tonic-gate case C_AFSR_L3_WDU: 72080Sstevel@tonic-gate return (PLAT_ECC_ERROR2_L3_UE); 72090Sstevel@tonic-gate case C_AFSR_DUE: 72100Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DUE); 72110Sstevel@tonic-gate case C_AFSR_DTO: 72120Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DTO); 72130Sstevel@tonic-gate case C_AFSR_DBERR: 72140Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DBERR); 72150Sstevel@tonic-gate #endif /* CHEETAH_PLUS */ 72160Sstevel@tonic-gate default: 72170Sstevel@tonic-gate switch (ch_flt->flt_type) { 72180Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY) 72190Sstevel@tonic-gate case CPU_IC_PARITY: 72200Sstevel@tonic-gate return (PLAT_ECC_ERROR2_IPE); 72210Sstevel@tonic-gate case CPU_DC_PARITY: 72220Sstevel@tonic-gate if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 72230Sstevel@tonic-gate if (ch_flt->parity_data.dpe.cpl_cache == 72240Sstevel@tonic-gate CPU_PC_PARITY) { 72250Sstevel@tonic-gate return (PLAT_ECC_ERROR2_PCACHE); 72260Sstevel@tonic-gate } 72270Sstevel@tonic-gate } 72280Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DPE); 72290Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */ 72300Sstevel@tonic-gate case CPU_ITLB_PARITY: 72310Sstevel@tonic-gate return (PLAT_ECC_ERROR2_ITLB); 72320Sstevel@tonic-gate case CPU_DTLB_PARITY: 72330Sstevel@tonic-gate return (PLAT_ECC_ERROR2_DTLB); 72340Sstevel@tonic-gate default: 72350Sstevel@tonic-gate return (PLAT_ECC_ERROR2_NONE); 72360Sstevel@tonic-gate } 72370Sstevel@tonic-gate } 72380Sstevel@tonic-gate #endif /* JALAPENO */ 72390Sstevel@tonic-gate } 7240