1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/types.h> 30*0Sstevel@tonic-gate #include <sys/systm.h> 31*0Sstevel@tonic-gate #include <sys/archsystm.h> 32*0Sstevel@tonic-gate #include <sys/machparam.h> 33*0Sstevel@tonic-gate #include <sys/machsystm.h> 34*0Sstevel@tonic-gate #include <sys/cpu.h> 35*0Sstevel@tonic-gate #include <sys/elf_SPARC.h> 36*0Sstevel@tonic-gate #include <vm/hat_sfmmu.h> 37*0Sstevel@tonic-gate #include <vm/page.h> 38*0Sstevel@tonic-gate #include <sys/cpuvar.h> 39*0Sstevel@tonic-gate #include <sys/spitregs.h> 40*0Sstevel@tonic-gate #include <sys/async.h> 41*0Sstevel@tonic-gate #include <sys/cmn_err.h> 42*0Sstevel@tonic-gate #include <sys/debug.h> 43*0Sstevel@tonic-gate #include <sys/dditypes.h> 44*0Sstevel@tonic-gate #include <sys/sunddi.h> 45*0Sstevel@tonic-gate #include <sys/cpu_module.h> 46*0Sstevel@tonic-gate #include <sys/prom_debug.h> 47*0Sstevel@tonic-gate #include <sys/vmsystm.h> 48*0Sstevel@tonic-gate #include <sys/prom_plat.h> 49*0Sstevel@tonic-gate #include <sys/sysmacros.h> 50*0Sstevel@tonic-gate #include <sys/intreg.h> 51*0Sstevel@tonic-gate #include <sys/machtrap.h> 52*0Sstevel@tonic-gate #include <sys/ontrap.h> 53*0Sstevel@tonic-gate #include <sys/ivintr.h> 54*0Sstevel@tonic-gate #include <sys/atomic.h> 55*0Sstevel@tonic-gate #include <sys/panic.h> 56*0Sstevel@tonic-gate #include <sys/ndifm.h> 57*0Sstevel@tonic-gate #include <sys/fm/protocol.h> 58*0Sstevel@tonic-gate #include <sys/fm/util.h> 59*0Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-II.h> 60*0Sstevel@tonic-gate #include <sys/ddi.h> 61*0Sstevel@tonic-gate #include <sys/ecc_kstat.h> 62*0Sstevel@tonic-gate #include <sys/watchpoint.h> 63*0Sstevel@tonic-gate #include <sys/dtrace.h> 64*0Sstevel@tonic-gate #include <sys/errclassify.h> 65*0Sstevel@tonic-gate 66*0Sstevel@tonic-gate uchar_t *ctx_pgsz_array = NULL; 67*0Sstevel@tonic-gate 68*0Sstevel@tonic-gate /* 69*0Sstevel@tonic-gate * Structure for the 8 byte ecache data dump and the associated AFSR state. 70*0Sstevel@tonic-gate * There will be 8 of these structures used to dump an ecache line (64 bytes). 71*0Sstevel@tonic-gate */ 72*0Sstevel@tonic-gate typedef struct sf_ec_data_elm { 73*0Sstevel@tonic-gate uint64_t ec_d8; 74*0Sstevel@tonic-gate uint64_t ec_afsr; 75*0Sstevel@tonic-gate } ec_data_t; 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate /* 78*0Sstevel@tonic-gate * Define spitfire (Ultra I/II) specific asynchronous error structure 79*0Sstevel@tonic-gate */ 80*0Sstevel@tonic-gate typedef struct spitfire_async_flt { 81*0Sstevel@tonic-gate struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 82*0Sstevel@tonic-gate ushort_t flt_type; /* types of faults - cpu specific */ 83*0Sstevel@tonic-gate ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 84*0Sstevel@tonic-gate uint64_t flt_ec_tag; /* E$ tag info */ 85*0Sstevel@tonic-gate int flt_ec_lcnt; /* number of bad E$ lines */ 86*0Sstevel@tonic-gate ushort_t flt_sdbh; /* UDBH reg */ 87*0Sstevel@tonic-gate ushort_t flt_sdbl; /* UDBL reg */ 88*0Sstevel@tonic-gate } spitf_async_flt; 89*0Sstevel@tonic-gate 90*0Sstevel@tonic-gate /* 91*0Sstevel@tonic-gate * Prototypes for support routines in spitfire_asm.s: 92*0Sstevel@tonic-gate */ 93*0Sstevel@tonic-gate extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 94*0Sstevel@tonic-gate extern uint64_t get_lsu(void); 95*0Sstevel@tonic-gate extern void set_lsu(uint64_t ncc); 96*0Sstevel@tonic-gate extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 97*0Sstevel@tonic-gate uint64_t *oafsr, uint64_t *acc_afsr); 98*0Sstevel@tonic-gate extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 99*0Sstevel@tonic-gate extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 100*0Sstevel@tonic-gate uint64_t *acc_afsr); 101*0Sstevel@tonic-gate extern uint64_t read_and_clear_afsr(); 102*0Sstevel@tonic-gate extern void write_ec_tag_parity(uint32_t id); 103*0Sstevel@tonic-gate extern void write_hb_ec_tag_parity(uint32_t id); 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate /* 106*0Sstevel@tonic-gate * Spitfire module routines: 107*0Sstevel@tonic-gate */ 108*0Sstevel@tonic-gate static void cpu_async_log_err(void *flt); 109*0Sstevel@tonic-gate /*PRINTFLIKE6*/ 110*0Sstevel@tonic-gate static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 111*0Sstevel@tonic-gate uint_t logflags, const char *endstr, const char *fmt, ...); 112*0Sstevel@tonic-gate 113*0Sstevel@tonic-gate static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 114*0Sstevel@tonic-gate static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 115*0Sstevel@tonic-gate static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 116*0Sstevel@tonic-gate 117*0Sstevel@tonic-gate static void log_ce_err(struct async_flt *aflt, char *unum); 118*0Sstevel@tonic-gate static void log_ue_err(struct async_flt *aflt, char *unum); 119*0Sstevel@tonic-gate static void check_misc_err(spitf_async_flt *spf_flt); 120*0Sstevel@tonic-gate static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 121*0Sstevel@tonic-gate static int check_ecc(struct async_flt *aflt); 122*0Sstevel@tonic-gate static uint_t get_cpu_status(uint64_t arg); 123*0Sstevel@tonic-gate static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 124*0Sstevel@tonic-gate static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 125*0Sstevel@tonic-gate int *m, uint64_t *afsr); 126*0Sstevel@tonic-gate static void ecache_kstat_init(struct cpu *cp); 127*0Sstevel@tonic-gate static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 128*0Sstevel@tonic-gate uint64_t paddr, int mpb, uint64_t); 129*0Sstevel@tonic-gate static uint64_t ecache_scrub_misc_err(int, uint64_t); 130*0Sstevel@tonic-gate static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 131*0Sstevel@tonic-gate static void ecache_page_retire(void *); 132*0Sstevel@tonic-gate static int ecc_kstat_update(kstat_t *ksp, int rw); 133*0Sstevel@tonic-gate static int ce_count_unum(int status, int len, char *unum); 134*0Sstevel@tonic-gate static void add_leaky_bucket_timeout(void); 135*0Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd); 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate extern uint_t read_all_memscrub; 138*0Sstevel@tonic-gate extern void memscrub_run(void); 139*0Sstevel@tonic-gate 140*0Sstevel@tonic-gate static uchar_t isus2i; /* set if sabre */ 141*0Sstevel@tonic-gate static uchar_t isus2e; /* set if hummingbird */ 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate /* 144*0Sstevel@tonic-gate * Default ecache mask and shift settings for Spitfire. If we detect a 145*0Sstevel@tonic-gate * different CPU implementation, we will modify these values at boot time. 146*0Sstevel@tonic-gate */ 147*0Sstevel@tonic-gate static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 148*0Sstevel@tonic-gate static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 149*0Sstevel@tonic-gate static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 150*0Sstevel@tonic-gate static int cpu_ec_par_shift = S_ECPAR_SHIFT; 151*0Sstevel@tonic-gate static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 152*0Sstevel@tonic-gate static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 153*0Sstevel@tonic-gate static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 154*0Sstevel@tonic-gate static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 155*0Sstevel@tonic-gate static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 156*0Sstevel@tonic-gate static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gate /* 159*0Sstevel@tonic-gate * Default ecache state bits for Spitfire. These individual bits indicate if 160*0Sstevel@tonic-gate * the given line is in any of the valid or modified states, respectively. 161*0Sstevel@tonic-gate * Again, we modify these at boot if we detect a different CPU. 162*0Sstevel@tonic-gate */ 163*0Sstevel@tonic-gate static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 164*0Sstevel@tonic-gate static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 165*0Sstevel@tonic-gate static uchar_t cpu_ec_parity = S_EC_PARITY; 166*0Sstevel@tonic-gate static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate /* 169*0Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when an ECC 170*0Sstevel@tonic-gate * error occurrs. The array is indexed an 8-bit syndrome. The entries 171*0Sstevel@tonic-gate * of this array have the following semantics: 172*0Sstevel@tonic-gate * 173*0Sstevel@tonic-gate * 00-63 The number of the bad bit, when only one bit is bad. 174*0Sstevel@tonic-gate * 64 ECC bit C0 is bad. 175*0Sstevel@tonic-gate * 65 ECC bit C1 is bad. 176*0Sstevel@tonic-gate * 66 ECC bit C2 is bad. 177*0Sstevel@tonic-gate * 67 ECC bit C3 is bad. 178*0Sstevel@tonic-gate * 68 ECC bit C4 is bad. 179*0Sstevel@tonic-gate * 69 ECC bit C5 is bad. 180*0Sstevel@tonic-gate * 70 ECC bit C6 is bad. 181*0Sstevel@tonic-gate * 71 ECC bit C7 is bad. 182*0Sstevel@tonic-gate * 72 Two bits are bad. 183*0Sstevel@tonic-gate * 73 Three bits are bad. 184*0Sstevel@tonic-gate * 74 Four bits are bad. 185*0Sstevel@tonic-gate * 75 More than Four bits are bad. 186*0Sstevel@tonic-gate * 76 NO bits are bad. 187*0Sstevel@tonic-gate * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 188*0Sstevel@tonic-gate */ 189*0Sstevel@tonic-gate 190*0Sstevel@tonic-gate #define C0 64 191*0Sstevel@tonic-gate #define C1 65 192*0Sstevel@tonic-gate #define C2 66 193*0Sstevel@tonic-gate #define C3 67 194*0Sstevel@tonic-gate #define C4 68 195*0Sstevel@tonic-gate #define C5 69 196*0Sstevel@tonic-gate #define C6 70 197*0Sstevel@tonic-gate #define C7 71 198*0Sstevel@tonic-gate #define M2 72 199*0Sstevel@tonic-gate #define M3 73 200*0Sstevel@tonic-gate #define M4 74 201*0Sstevel@tonic-gate #define MX 75 202*0Sstevel@tonic-gate #define NA 76 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gate #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 205*0Sstevel@tonic-gate (synd_code < C0)) 206*0Sstevel@tonic-gate #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 207*0Sstevel@tonic-gate (synd_code <= C7)) 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate static char ecc_syndrome_tab[] = 210*0Sstevel@tonic-gate { 211*0Sstevel@tonic-gate NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 212*0Sstevel@tonic-gate C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 213*0Sstevel@tonic-gate C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 214*0Sstevel@tonic-gate M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 215*0Sstevel@tonic-gate C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 216*0Sstevel@tonic-gate M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 217*0Sstevel@tonic-gate M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 218*0Sstevel@tonic-gate M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 219*0Sstevel@tonic-gate C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 220*0Sstevel@tonic-gate M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 221*0Sstevel@tonic-gate M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 222*0Sstevel@tonic-gate M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 223*0Sstevel@tonic-gate M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 224*0Sstevel@tonic-gate M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 225*0Sstevel@tonic-gate M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 226*0Sstevel@tonic-gate M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 227*0Sstevel@tonic-gate }; 228*0Sstevel@tonic-gate 229*0Sstevel@tonic-gate #define SYND_TBL_SIZE 256 230*0Sstevel@tonic-gate 231*0Sstevel@tonic-gate /* 232*0Sstevel@tonic-gate * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 233*0Sstevel@tonic-gate * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 234*0Sstevel@tonic-gate */ 235*0Sstevel@tonic-gate #define UDBL_REG 0x8000 236*0Sstevel@tonic-gate #define UDBL(synd) ((synd & UDBL_REG) >> 15) 237*0Sstevel@tonic-gate #define SYND(synd) (synd & 0x7FFF) 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gate /* 240*0Sstevel@tonic-gate * These error types are specific to Spitfire and are used internally for the 241*0Sstevel@tonic-gate * spitfire fault structure flt_type field. 242*0Sstevel@tonic-gate */ 243*0Sstevel@tonic-gate #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 244*0Sstevel@tonic-gate #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 245*0Sstevel@tonic-gate #define CPU_WP_ERR 2 /* WP parity error */ 246*0Sstevel@tonic-gate #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 247*0Sstevel@tonic-gate #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 248*0Sstevel@tonic-gate #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 249*0Sstevel@tonic-gate #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 250*0Sstevel@tonic-gate #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 251*0Sstevel@tonic-gate #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 252*0Sstevel@tonic-gate #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 253*0Sstevel@tonic-gate #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 254*0Sstevel@tonic-gate #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 255*0Sstevel@tonic-gate #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 256*0Sstevel@tonic-gate #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 257*0Sstevel@tonic-gate #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 258*0Sstevel@tonic-gate #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 259*0Sstevel@tonic-gate 260*0Sstevel@tonic-gate /* 261*0Sstevel@tonic-gate * Macro to access the "Spitfire cpu private" data structure. 262*0Sstevel@tonic-gate */ 263*0Sstevel@tonic-gate #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate /* 266*0Sstevel@tonic-gate * set to 0 to disable automatic retiring of pages on 267*0Sstevel@tonic-gate * DIMMs that have excessive soft errors 268*0Sstevel@tonic-gate */ 269*0Sstevel@tonic-gate int automatic_page_removal = 1; 270*0Sstevel@tonic-gate 271*0Sstevel@tonic-gate /* 272*0Sstevel@tonic-gate * Heuristic for figuring out which module to replace. 273*0Sstevel@tonic-gate * Relative likelihood that this P_SYND indicates that this module is bad. 274*0Sstevel@tonic-gate * We call it a "score", though, not a relative likelihood. 275*0Sstevel@tonic-gate * 276*0Sstevel@tonic-gate * Step 1. 277*0Sstevel@tonic-gate * Assign a score to each byte of P_SYND according to the following rules: 278*0Sstevel@tonic-gate * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 279*0Sstevel@tonic-gate * If one bit on, give it a 95. 280*0Sstevel@tonic-gate * If seven bits on, give it a 10. 281*0Sstevel@tonic-gate * If two bits on: 282*0Sstevel@tonic-gate * in different nybbles, a 90 283*0Sstevel@tonic-gate * in same nybble, but unaligned, 85 284*0Sstevel@tonic-gate * in same nybble and as an aligned pair, 80 285*0Sstevel@tonic-gate * If six bits on, look at the bits that are off: 286*0Sstevel@tonic-gate * in same nybble and as an aligned pair, 15 287*0Sstevel@tonic-gate * in same nybble, but unaligned, 20 288*0Sstevel@tonic-gate * in different nybbles, a 25 289*0Sstevel@tonic-gate * If three bits on: 290*0Sstevel@tonic-gate * in diferent nybbles, no aligned pairs, 75 291*0Sstevel@tonic-gate * in diferent nybbles, one aligned pair, 70 292*0Sstevel@tonic-gate * in the same nybble, 65 293*0Sstevel@tonic-gate * If five bits on, look at the bits that are off: 294*0Sstevel@tonic-gate * in the same nybble, 30 295*0Sstevel@tonic-gate * in diferent nybbles, one aligned pair, 35 296*0Sstevel@tonic-gate * in diferent nybbles, no aligned pairs, 40 297*0Sstevel@tonic-gate * If four bits on: 298*0Sstevel@tonic-gate * all in one nybble, 45 299*0Sstevel@tonic-gate * as two aligned pairs, 50 300*0Sstevel@tonic-gate * one aligned pair, 55 301*0Sstevel@tonic-gate * no aligned pairs, 60 302*0Sstevel@tonic-gate * 303*0Sstevel@tonic-gate * Step 2: 304*0Sstevel@tonic-gate * Take the higher of the two scores (one for each byte) as the score 305*0Sstevel@tonic-gate * for the module. 306*0Sstevel@tonic-gate * 307*0Sstevel@tonic-gate * Print the score for each module, and field service should replace the 308*0Sstevel@tonic-gate * module with the highest score. 309*0Sstevel@tonic-gate */ 310*0Sstevel@tonic-gate 311*0Sstevel@tonic-gate /* 312*0Sstevel@tonic-gate * In the table below, the first row/column comment indicates the 313*0Sstevel@tonic-gate * number of bits on in that nybble; the second row/column comment is 314*0Sstevel@tonic-gate * the hex digit. 315*0Sstevel@tonic-gate */ 316*0Sstevel@tonic-gate 317*0Sstevel@tonic-gate static int 318*0Sstevel@tonic-gate p_synd_score_table[256] = { 319*0Sstevel@tonic-gate /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 320*0Sstevel@tonic-gate /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 321*0Sstevel@tonic-gate /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 322*0Sstevel@tonic-gate /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323*0Sstevel@tonic-gate /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 324*0Sstevel@tonic-gate /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 325*0Sstevel@tonic-gate /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 326*0Sstevel@tonic-gate /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327*0Sstevel@tonic-gate /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 328*0Sstevel@tonic-gate /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 329*0Sstevel@tonic-gate /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 330*0Sstevel@tonic-gate /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331*0Sstevel@tonic-gate /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 332*0Sstevel@tonic-gate /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 333*0Sstevel@tonic-gate /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 334*0Sstevel@tonic-gate /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335*0Sstevel@tonic-gate /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 336*0Sstevel@tonic-gate /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 337*0Sstevel@tonic-gate }; 338*0Sstevel@tonic-gate 339*0Sstevel@tonic-gate int 340*0Sstevel@tonic-gate ecc_psynd_score(ushort_t p_synd) 341*0Sstevel@tonic-gate { 342*0Sstevel@tonic-gate int i, j, a, b; 343*0Sstevel@tonic-gate 344*0Sstevel@tonic-gate i = p_synd & 0xFF; 345*0Sstevel@tonic-gate j = (p_synd >> 8) & 0xFF; 346*0Sstevel@tonic-gate 347*0Sstevel@tonic-gate a = p_synd_score_table[i]; 348*0Sstevel@tonic-gate b = p_synd_score_table[j]; 349*0Sstevel@tonic-gate 350*0Sstevel@tonic-gate return (a > b ? a : b); 351*0Sstevel@tonic-gate } 352*0Sstevel@tonic-gate 353*0Sstevel@tonic-gate /* 354*0Sstevel@tonic-gate * Async Fault Logging 355*0Sstevel@tonic-gate * 356*0Sstevel@tonic-gate * To ease identifying, reading, and filtering async fault log messages, the 357*0Sstevel@tonic-gate * label [AFT#] is now prepended to each async fault message. These messages 358*0Sstevel@tonic-gate * and the logging rules are implemented by cpu_aflt_log(), below. 359*0Sstevel@tonic-gate * 360*0Sstevel@tonic-gate * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 361*0Sstevel@tonic-gate * This includes both corrected ECC memory and ecache faults. 362*0Sstevel@tonic-gate * 363*0Sstevel@tonic-gate * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 364*0Sstevel@tonic-gate * else except CE errors) with a priority of 1 (highest). This tag 365*0Sstevel@tonic-gate * is also used for panic messages that result from an async fault. 366*0Sstevel@tonic-gate * 367*0Sstevel@tonic-gate * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 368*0Sstevel@tonic-gate * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 369*0Sstevel@tonic-gate * of the E-$ data and tags. 370*0Sstevel@tonic-gate * 371*0Sstevel@tonic-gate * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 372*0Sstevel@tonic-gate * printed on the console. To send all AFT logs to both the log and the 373*0Sstevel@tonic-gate * console, set aft_verbose = 1. 374*0Sstevel@tonic-gate */ 375*0Sstevel@tonic-gate 376*0Sstevel@tonic-gate #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 377*0Sstevel@tonic-gate #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 378*0Sstevel@tonic-gate #define CPU_ERRID 0x0004 /* print flt_id */ 379*0Sstevel@tonic-gate #define CPU_TL 0x0008 /* print flt_tl */ 380*0Sstevel@tonic-gate #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 381*0Sstevel@tonic-gate #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 382*0Sstevel@tonic-gate #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 383*0Sstevel@tonic-gate #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 384*0Sstevel@tonic-gate #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 385*0Sstevel@tonic-gate #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 386*0Sstevel@tonic-gate #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 387*0Sstevel@tonic-gate #define CPU_FAULTPC 0x0800 /* print flt_pc */ 388*0Sstevel@tonic-gate #define CPU_SYND 0x1000 /* print flt_synd and unum */ 389*0Sstevel@tonic-gate 390*0Sstevel@tonic-gate #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 391*0Sstevel@tonic-gate CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 392*0Sstevel@tonic-gate CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 393*0Sstevel@tonic-gate CPU_FAULTPC) 394*0Sstevel@tonic-gate #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 395*0Sstevel@tonic-gate #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 396*0Sstevel@tonic-gate ~CPU_SPACE) 397*0Sstevel@tonic-gate #define PARERR_LFLAGS (CMN_LFLAGS) 398*0Sstevel@tonic-gate #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 399*0Sstevel@tonic-gate #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 400*0Sstevel@tonic-gate ~CPU_FLTCPU & ~CPU_FAULTPC) 401*0Sstevel@tonic-gate #define BERRTO_LFLAGS (CMN_LFLAGS) 402*0Sstevel@tonic-gate #define NO_LFLAGS (0) 403*0Sstevel@tonic-gate 404*0Sstevel@tonic-gate #define AFSR_FMTSTR0 "\020\1ME" 405*0Sstevel@tonic-gate #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 406*0Sstevel@tonic-gate "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 407*0Sstevel@tonic-gate #define UDB_FMTSTR "\020\012UE\011CE" 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate /* 410*0Sstevel@tonic-gate * Maximum number of contexts for Spitfire. 411*0Sstevel@tonic-gate */ 412*0Sstevel@tonic-gate #define MAX_NCTXS (1 << 13) 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate /* 415*0Sstevel@tonic-gate * Save the cache bootup state for use when internal 416*0Sstevel@tonic-gate * caches are to be re-enabled after an error occurs. 417*0Sstevel@tonic-gate */ 418*0Sstevel@tonic-gate uint64_t cache_boot_state = 0; 419*0Sstevel@tonic-gate 420*0Sstevel@tonic-gate /* 421*0Sstevel@tonic-gate * PA[31:0] represent Displacement in UPA configuration space. 422*0Sstevel@tonic-gate */ 423*0Sstevel@tonic-gate uint_t root_phys_addr_lo_mask = 0xffffffff; 424*0Sstevel@tonic-gate 425*0Sstevel@tonic-gate /* 426*0Sstevel@tonic-gate * Spitfire legacy globals 427*0Sstevel@tonic-gate */ 428*0Sstevel@tonic-gate int itlb_entries; 429*0Sstevel@tonic-gate int dtlb_entries; 430*0Sstevel@tonic-gate 431*0Sstevel@tonic-gate void 432*0Sstevel@tonic-gate cpu_setup(void) 433*0Sstevel@tonic-gate { 434*0Sstevel@tonic-gate extern int page_retire_messages; 435*0Sstevel@tonic-gate extern int at_flags; 436*0Sstevel@tonic-gate #if defined(SF_ERRATA_57) 437*0Sstevel@tonic-gate extern caddr_t errata57_limit; 438*0Sstevel@tonic-gate #endif 439*0Sstevel@tonic-gate extern int disable_text_largepages; 440*0Sstevel@tonic-gate extern int disable_initdata_largepages; 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 443*0Sstevel@tonic-gate 444*0Sstevel@tonic-gate at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 445*0Sstevel@tonic-gate 446*0Sstevel@tonic-gate /* 447*0Sstevel@tonic-gate * Spitfire isn't currently FMA-aware, so we have to enable the 448*0Sstevel@tonic-gate * page retirement messages. 449*0Sstevel@tonic-gate */ 450*0Sstevel@tonic-gate page_retire_messages = 1; 451*0Sstevel@tonic-gate 452*0Sstevel@tonic-gate /* 453*0Sstevel@tonic-gate * save the cache bootup state. 454*0Sstevel@tonic-gate */ 455*0Sstevel@tonic-gate cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate /* 458*0Sstevel@tonic-gate * Use the maximum number of contexts available for Spitfire unless 459*0Sstevel@tonic-gate * it has been tuned for debugging. 460*0Sstevel@tonic-gate * We are checking against 0 here since this value can be patched 461*0Sstevel@tonic-gate * while booting. It can not be patched via /etc/system since it 462*0Sstevel@tonic-gate * will be patched too late and thus cause the system to panic. 463*0Sstevel@tonic-gate */ 464*0Sstevel@tonic-gate if (nctxs == 0) 465*0Sstevel@tonic-gate nctxs = MAX_NCTXS; 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate if (use_page_coloring) { 468*0Sstevel@tonic-gate do_pg_coloring = 1; 469*0Sstevel@tonic-gate if (use_virtual_coloring) 470*0Sstevel@tonic-gate do_virtual_coloring = 1; 471*0Sstevel@tonic-gate } 472*0Sstevel@tonic-gate 473*0Sstevel@tonic-gate /* 474*0Sstevel@tonic-gate * Tune pp_slots to use up to 1/8th of the tlb entries. 475*0Sstevel@tonic-gate */ 476*0Sstevel@tonic-gate pp_slots = MIN(8, MAXPP_SLOTS); 477*0Sstevel@tonic-gate 478*0Sstevel@tonic-gate /* 479*0Sstevel@tonic-gate * Block stores invalidate all pages of the d$ so pagecopy 480*0Sstevel@tonic-gate * et. al. do not need virtual translations with virtual 481*0Sstevel@tonic-gate * coloring taken into consideration. 482*0Sstevel@tonic-gate */ 483*0Sstevel@tonic-gate pp_consistent_coloring = 0; 484*0Sstevel@tonic-gate 485*0Sstevel@tonic-gate isa_list = 486*0Sstevel@tonic-gate "sparcv9+vis sparcv9 " 487*0Sstevel@tonic-gate "sparcv8plus+vis sparcv8plus " 488*0Sstevel@tonic-gate "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 489*0Sstevel@tonic-gate 490*0Sstevel@tonic-gate cpu_hwcap_flags = AV_SPARC_VIS; 491*0Sstevel@tonic-gate 492*0Sstevel@tonic-gate /* 493*0Sstevel@tonic-gate * On Spitfire, there's a hole in the address space 494*0Sstevel@tonic-gate * that we must never map (the hardware only support 44-bits of 495*0Sstevel@tonic-gate * virtual address). Later CPUs are expected to have wider 496*0Sstevel@tonic-gate * supported address ranges. 497*0Sstevel@tonic-gate * 498*0Sstevel@tonic-gate * See address map on p23 of the UltraSPARC 1 user's manual. 499*0Sstevel@tonic-gate */ 500*0Sstevel@tonic-gate hole_start = (caddr_t)0x80000000000ull; 501*0Sstevel@tonic-gate hole_end = (caddr_t)0xfffff80000000000ull; 502*0Sstevel@tonic-gate 503*0Sstevel@tonic-gate /* 504*0Sstevel@tonic-gate * A spitfire call bug requires us to be a further 4Gbytes of 505*0Sstevel@tonic-gate * firewall from the spec. 506*0Sstevel@tonic-gate * 507*0Sstevel@tonic-gate * See Spitfire Errata #21 508*0Sstevel@tonic-gate */ 509*0Sstevel@tonic-gate hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 510*0Sstevel@tonic-gate hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate /* 513*0Sstevel@tonic-gate * The kpm mapping window. 514*0Sstevel@tonic-gate * kpm_size: 515*0Sstevel@tonic-gate * The size of a single kpm range. 516*0Sstevel@tonic-gate * The overall size will be: kpm_size * vac_colors. 517*0Sstevel@tonic-gate * kpm_vbase: 518*0Sstevel@tonic-gate * The virtual start address of the kpm range within the kernel 519*0Sstevel@tonic-gate * virtual address space. kpm_vbase has to be kpm_size aligned. 520*0Sstevel@tonic-gate */ 521*0Sstevel@tonic-gate kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 522*0Sstevel@tonic-gate kpm_size_shift = 41; 523*0Sstevel@tonic-gate kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 524*0Sstevel@tonic-gate 525*0Sstevel@tonic-gate #if defined(SF_ERRATA_57) 526*0Sstevel@tonic-gate errata57_limit = (caddr_t)0x80000000ul; 527*0Sstevel@tonic-gate #endif 528*0Sstevel@tonic-gate 529*0Sstevel@tonic-gate /* 530*0Sstevel@tonic-gate * Allow only 8K, 64K and 4M pages for text by default. 531*0Sstevel@tonic-gate * Allow only 8K and 64K page for initialized data segments by 532*0Sstevel@tonic-gate * default. 533*0Sstevel@tonic-gate */ 534*0Sstevel@tonic-gate disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | 535*0Sstevel@tonic-gate (1 << TTE256M); 536*0Sstevel@tonic-gate disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | 537*0Sstevel@tonic-gate (1 << TTE32M) | (1 << TTE256M); 538*0Sstevel@tonic-gate } 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gate static int 541*0Sstevel@tonic-gate getintprop(dnode_t node, char *name, int deflt) 542*0Sstevel@tonic-gate { 543*0Sstevel@tonic-gate int value; 544*0Sstevel@tonic-gate 545*0Sstevel@tonic-gate switch (prom_getproplen(node, name)) { 546*0Sstevel@tonic-gate case 0: 547*0Sstevel@tonic-gate value = 1; /* boolean properties */ 548*0Sstevel@tonic-gate break; 549*0Sstevel@tonic-gate 550*0Sstevel@tonic-gate case sizeof (int): 551*0Sstevel@tonic-gate (void) prom_getprop(node, name, (caddr_t)&value); 552*0Sstevel@tonic-gate break; 553*0Sstevel@tonic-gate 554*0Sstevel@tonic-gate default: 555*0Sstevel@tonic-gate value = deflt; 556*0Sstevel@tonic-gate break; 557*0Sstevel@tonic-gate } 558*0Sstevel@tonic-gate 559*0Sstevel@tonic-gate return (value); 560*0Sstevel@tonic-gate } 561*0Sstevel@tonic-gate 562*0Sstevel@tonic-gate /* 563*0Sstevel@tonic-gate * Set the magic constants of the implementation. 564*0Sstevel@tonic-gate */ 565*0Sstevel@tonic-gate void 566*0Sstevel@tonic-gate cpu_fiximp(dnode_t dnode) 567*0Sstevel@tonic-gate { 568*0Sstevel@tonic-gate extern int vac_size, vac_shift; 569*0Sstevel@tonic-gate extern uint_t vac_mask; 570*0Sstevel@tonic-gate extern int dcache_line_mask; 571*0Sstevel@tonic-gate int i, a; 572*0Sstevel@tonic-gate static struct { 573*0Sstevel@tonic-gate char *name; 574*0Sstevel@tonic-gate int *var; 575*0Sstevel@tonic-gate } prop[] = { 576*0Sstevel@tonic-gate "dcache-size", &dcache_size, 577*0Sstevel@tonic-gate "dcache-line-size", &dcache_linesize, 578*0Sstevel@tonic-gate "icache-size", &icache_size, 579*0Sstevel@tonic-gate "icache-line-size", &icache_linesize, 580*0Sstevel@tonic-gate "ecache-size", &ecache_size, 581*0Sstevel@tonic-gate "ecache-line-size", &ecache_alignsize, 582*0Sstevel@tonic-gate "ecache-associativity", &ecache_associativity, 583*0Sstevel@tonic-gate "#itlb-entries", &itlb_entries, 584*0Sstevel@tonic-gate "#dtlb-entries", &dtlb_entries, 585*0Sstevel@tonic-gate }; 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 588*0Sstevel@tonic-gate if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 589*0Sstevel@tonic-gate *prop[i].var = a; 590*0Sstevel@tonic-gate } 591*0Sstevel@tonic-gate } 592*0Sstevel@tonic-gate 593*0Sstevel@tonic-gate ecache_setsize = ecache_size / ecache_associativity; 594*0Sstevel@tonic-gate 595*0Sstevel@tonic-gate vac_size = S_VAC_SIZE; 596*0Sstevel@tonic-gate vac_mask = MMU_PAGEMASK & (vac_size - 1); 597*0Sstevel@tonic-gate i = 0; a = vac_size; 598*0Sstevel@tonic-gate while (a >>= 1) 599*0Sstevel@tonic-gate ++i; 600*0Sstevel@tonic-gate vac_shift = i; 601*0Sstevel@tonic-gate shm_alignment = vac_size; 602*0Sstevel@tonic-gate vac = 1; 603*0Sstevel@tonic-gate 604*0Sstevel@tonic-gate dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 605*0Sstevel@tonic-gate 606*0Sstevel@tonic-gate /* 607*0Sstevel@tonic-gate * UltraSPARC I & II have ecache sizes running 608*0Sstevel@tonic-gate * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 609*0Sstevel@tonic-gate * and 8 MB. Adjust the copyin/copyout limits 610*0Sstevel@tonic-gate * according to the cache size. The magic number 611*0Sstevel@tonic-gate * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 612*0Sstevel@tonic-gate * and its floor of VIS_COPY_THRESHOLD bytes before it will use 613*0Sstevel@tonic-gate * VIS instructions. 614*0Sstevel@tonic-gate * 615*0Sstevel@tonic-gate * We assume that all CPUs on the system have the same size 616*0Sstevel@tonic-gate * ecache. We're also called very early in the game. 617*0Sstevel@tonic-gate * /etc/system will be parsed *after* we're called so 618*0Sstevel@tonic-gate * these values can be overwritten. 619*0Sstevel@tonic-gate */ 620*0Sstevel@tonic-gate 621*0Sstevel@tonic-gate hw_copy_limit_1 = VIS_COPY_THRESHOLD; 622*0Sstevel@tonic-gate if (ecache_size <= 524288) { 623*0Sstevel@tonic-gate hw_copy_limit_2 = VIS_COPY_THRESHOLD; 624*0Sstevel@tonic-gate hw_copy_limit_4 = VIS_COPY_THRESHOLD; 625*0Sstevel@tonic-gate hw_copy_limit_8 = VIS_COPY_THRESHOLD; 626*0Sstevel@tonic-gate } else if (ecache_size == 1048576) { 627*0Sstevel@tonic-gate hw_copy_limit_2 = 1024; 628*0Sstevel@tonic-gate hw_copy_limit_4 = 1280; 629*0Sstevel@tonic-gate hw_copy_limit_8 = 1536; 630*0Sstevel@tonic-gate } else if (ecache_size == 2097152) { 631*0Sstevel@tonic-gate hw_copy_limit_2 = 1536; 632*0Sstevel@tonic-gate hw_copy_limit_4 = 2048; 633*0Sstevel@tonic-gate hw_copy_limit_8 = 2560; 634*0Sstevel@tonic-gate } else if (ecache_size == 4194304) { 635*0Sstevel@tonic-gate hw_copy_limit_2 = 2048; 636*0Sstevel@tonic-gate hw_copy_limit_4 = 2560; 637*0Sstevel@tonic-gate hw_copy_limit_8 = 3072; 638*0Sstevel@tonic-gate } else { 639*0Sstevel@tonic-gate hw_copy_limit_2 = 2560; 640*0Sstevel@tonic-gate hw_copy_limit_4 = 3072; 641*0Sstevel@tonic-gate hw_copy_limit_8 = 3584; 642*0Sstevel@tonic-gate } 643*0Sstevel@tonic-gate } 644*0Sstevel@tonic-gate 645*0Sstevel@tonic-gate /* 646*0Sstevel@tonic-gate * Called by setcpudelay 647*0Sstevel@tonic-gate */ 648*0Sstevel@tonic-gate void 649*0Sstevel@tonic-gate cpu_init_tick_freq(void) 650*0Sstevel@tonic-gate { 651*0Sstevel@tonic-gate /* 652*0Sstevel@tonic-gate * Determine the cpu frequency by calling 653*0Sstevel@tonic-gate * tod_get_cpufrequency. Use an approximate freqency 654*0Sstevel@tonic-gate * value computed by the prom if the tod module 655*0Sstevel@tonic-gate * is not initialized and loaded yet. 656*0Sstevel@tonic-gate */ 657*0Sstevel@tonic-gate if (tod_ops.tod_get_cpufrequency != NULL) { 658*0Sstevel@tonic-gate mutex_enter(&tod_lock); 659*0Sstevel@tonic-gate sys_tick_freq = tod_ops.tod_get_cpufrequency(); 660*0Sstevel@tonic-gate mutex_exit(&tod_lock); 661*0Sstevel@tonic-gate } else { 662*0Sstevel@tonic-gate #if defined(HUMMINGBIRD) 663*0Sstevel@tonic-gate /* 664*0Sstevel@tonic-gate * the hummingbird version of %stick is used as the basis for 665*0Sstevel@tonic-gate * low level timing; this provides an independent constant-rate 666*0Sstevel@tonic-gate * clock for general system use, and frees power mgmt to set 667*0Sstevel@tonic-gate * various cpu clock speeds. 668*0Sstevel@tonic-gate */ 669*0Sstevel@tonic-gate if (system_clock_freq == 0) 670*0Sstevel@tonic-gate cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 671*0Sstevel@tonic-gate system_clock_freq); 672*0Sstevel@tonic-gate sys_tick_freq = system_clock_freq; 673*0Sstevel@tonic-gate #else /* SPITFIRE */ 674*0Sstevel@tonic-gate sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 675*0Sstevel@tonic-gate #endif 676*0Sstevel@tonic-gate } 677*0Sstevel@tonic-gate } 678*0Sstevel@tonic-gate 679*0Sstevel@tonic-gate 680*0Sstevel@tonic-gate void shipit(int upaid); 681*0Sstevel@tonic-gate extern uint64_t xc_tick_limit; 682*0Sstevel@tonic-gate extern uint64_t xc_tick_jump_limit; 683*0Sstevel@tonic-gate 684*0Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 685*0Sstevel@tonic-gate uint64_t x_early[NCPU][64]; 686*0Sstevel@tonic-gate #endif 687*0Sstevel@tonic-gate 688*0Sstevel@tonic-gate /* 689*0Sstevel@tonic-gate * Note: A version of this function is used by the debugger via the KDI, 690*0Sstevel@tonic-gate * and must be kept in sync with this version. Any changes made to this 691*0Sstevel@tonic-gate * function to support new chips or to accomodate errata must also be included 692*0Sstevel@tonic-gate * in the KDI-specific version. See spitfire_kdi.c. 693*0Sstevel@tonic-gate */ 694*0Sstevel@tonic-gate void 695*0Sstevel@tonic-gate send_one_mondo(int cpuid) 696*0Sstevel@tonic-gate { 697*0Sstevel@tonic-gate uint64_t idsr, starttick, endtick; 698*0Sstevel@tonic-gate int upaid, busy, nack; 699*0Sstevel@tonic-gate uint64_t tick, tick_prev; 700*0Sstevel@tonic-gate ulong_t ticks; 701*0Sstevel@tonic-gate 702*0Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 703*0Sstevel@tonic-gate upaid = CPUID_TO_UPAID(cpuid); 704*0Sstevel@tonic-gate tick = starttick = gettick(); 705*0Sstevel@tonic-gate shipit(upaid); 706*0Sstevel@tonic-gate endtick = starttick + xc_tick_limit; 707*0Sstevel@tonic-gate busy = nack = 0; 708*0Sstevel@tonic-gate for (;;) { 709*0Sstevel@tonic-gate idsr = getidsr(); 710*0Sstevel@tonic-gate if (idsr == 0) 711*0Sstevel@tonic-gate break; 712*0Sstevel@tonic-gate /* 713*0Sstevel@tonic-gate * When we detect an irregular tick jump, we adjust 714*0Sstevel@tonic-gate * the timer window to the current tick value. 715*0Sstevel@tonic-gate */ 716*0Sstevel@tonic-gate tick_prev = tick; 717*0Sstevel@tonic-gate tick = gettick(); 718*0Sstevel@tonic-gate ticks = tick - tick_prev; 719*0Sstevel@tonic-gate if (ticks > xc_tick_jump_limit) { 720*0Sstevel@tonic-gate endtick = tick + xc_tick_limit; 721*0Sstevel@tonic-gate } else if (tick > endtick) { 722*0Sstevel@tonic-gate if (panic_quiesce) 723*0Sstevel@tonic-gate return; 724*0Sstevel@tonic-gate cmn_err(CE_PANIC, 725*0Sstevel@tonic-gate "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 726*0Sstevel@tonic-gate upaid, nack, busy); 727*0Sstevel@tonic-gate } 728*0Sstevel@tonic-gate if (idsr & IDSR_BUSY) { 729*0Sstevel@tonic-gate busy++; 730*0Sstevel@tonic-gate continue; 731*0Sstevel@tonic-gate } 732*0Sstevel@tonic-gate drv_usecwait(1); 733*0Sstevel@tonic-gate shipit(upaid); 734*0Sstevel@tonic-gate nack++; 735*0Sstevel@tonic-gate busy = 0; 736*0Sstevel@tonic-gate } 737*0Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 738*0Sstevel@tonic-gate x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 739*0Sstevel@tonic-gate #endif 740*0Sstevel@tonic-gate } 741*0Sstevel@tonic-gate 742*0Sstevel@tonic-gate void 743*0Sstevel@tonic-gate send_mondo_set(cpuset_t set) 744*0Sstevel@tonic-gate { 745*0Sstevel@tonic-gate int i; 746*0Sstevel@tonic-gate 747*0Sstevel@tonic-gate for (i = 0; i < NCPU; i++) 748*0Sstevel@tonic-gate if (CPU_IN_SET(set, i)) { 749*0Sstevel@tonic-gate send_one_mondo(i); 750*0Sstevel@tonic-gate CPUSET_DEL(set, i); 751*0Sstevel@tonic-gate if (CPUSET_ISNULL(set)) 752*0Sstevel@tonic-gate break; 753*0Sstevel@tonic-gate } 754*0Sstevel@tonic-gate } 755*0Sstevel@tonic-gate 756*0Sstevel@tonic-gate void 757*0Sstevel@tonic-gate syncfpu(void) 758*0Sstevel@tonic-gate { 759*0Sstevel@tonic-gate } 760*0Sstevel@tonic-gate 761*0Sstevel@tonic-gate /* 762*0Sstevel@tonic-gate * Determine the size of the CPU module's error structure in bytes. This is 763*0Sstevel@tonic-gate * called once during boot to initialize the error queues. 764*0Sstevel@tonic-gate */ 765*0Sstevel@tonic-gate int 766*0Sstevel@tonic-gate cpu_aflt_size(void) 767*0Sstevel@tonic-gate { 768*0Sstevel@tonic-gate /* 769*0Sstevel@tonic-gate * We need to determine whether this is a sabre, Hummingbird or a 770*0Sstevel@tonic-gate * Spitfire/Blackbird impl and set the appropriate state variables for 771*0Sstevel@tonic-gate * ecache tag manipulation. We can't do this in cpu_setup() as it is 772*0Sstevel@tonic-gate * too early in the boot flow and the cpunodes are not initialized. 773*0Sstevel@tonic-gate * This routine will be called once after cpunodes[] is ready, so do 774*0Sstevel@tonic-gate * it here. 775*0Sstevel@tonic-gate */ 776*0Sstevel@tonic-gate if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 777*0Sstevel@tonic-gate isus2i = 1; 778*0Sstevel@tonic-gate cpu_ec_tag_mask = SB_ECTAG_MASK; 779*0Sstevel@tonic-gate cpu_ec_state_mask = SB_ECSTATE_MASK; 780*0Sstevel@tonic-gate cpu_ec_par_mask = SB_ECPAR_MASK; 781*0Sstevel@tonic-gate cpu_ec_par_shift = SB_ECPAR_SHIFT; 782*0Sstevel@tonic-gate cpu_ec_tag_shift = SB_ECTAG_SHIFT; 783*0Sstevel@tonic-gate cpu_ec_state_shift = SB_ECSTATE_SHIFT; 784*0Sstevel@tonic-gate cpu_ec_state_exl = SB_ECSTATE_EXL; 785*0Sstevel@tonic-gate cpu_ec_state_mod = SB_ECSTATE_MOD; 786*0Sstevel@tonic-gate 787*0Sstevel@tonic-gate /* These states do not exist in sabre - set to 0xFF */ 788*0Sstevel@tonic-gate cpu_ec_state_shr = 0xFF; 789*0Sstevel@tonic-gate cpu_ec_state_own = 0xFF; 790*0Sstevel@tonic-gate 791*0Sstevel@tonic-gate cpu_ec_state_valid = SB_ECSTATE_VALID; 792*0Sstevel@tonic-gate cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 793*0Sstevel@tonic-gate cpu_ec_state_parity = SB_ECSTATE_PARITY; 794*0Sstevel@tonic-gate cpu_ec_parity = SB_EC_PARITY; 795*0Sstevel@tonic-gate } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 796*0Sstevel@tonic-gate isus2e = 1; 797*0Sstevel@tonic-gate cpu_ec_tag_mask = HB_ECTAG_MASK; 798*0Sstevel@tonic-gate cpu_ec_state_mask = HB_ECSTATE_MASK; 799*0Sstevel@tonic-gate cpu_ec_par_mask = HB_ECPAR_MASK; 800*0Sstevel@tonic-gate cpu_ec_par_shift = HB_ECPAR_SHIFT; 801*0Sstevel@tonic-gate cpu_ec_tag_shift = HB_ECTAG_SHIFT; 802*0Sstevel@tonic-gate cpu_ec_state_shift = HB_ECSTATE_SHIFT; 803*0Sstevel@tonic-gate cpu_ec_state_exl = HB_ECSTATE_EXL; 804*0Sstevel@tonic-gate cpu_ec_state_mod = HB_ECSTATE_MOD; 805*0Sstevel@tonic-gate 806*0Sstevel@tonic-gate /* These states do not exist in hummingbird - set to 0xFF */ 807*0Sstevel@tonic-gate cpu_ec_state_shr = 0xFF; 808*0Sstevel@tonic-gate cpu_ec_state_own = 0xFF; 809*0Sstevel@tonic-gate 810*0Sstevel@tonic-gate cpu_ec_state_valid = HB_ECSTATE_VALID; 811*0Sstevel@tonic-gate cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 812*0Sstevel@tonic-gate cpu_ec_state_parity = HB_ECSTATE_PARITY; 813*0Sstevel@tonic-gate cpu_ec_parity = HB_EC_PARITY; 814*0Sstevel@tonic-gate } 815*0Sstevel@tonic-gate 816*0Sstevel@tonic-gate return (sizeof (spitf_async_flt)); 817*0Sstevel@tonic-gate } 818*0Sstevel@tonic-gate 819*0Sstevel@tonic-gate 820*0Sstevel@tonic-gate /* 821*0Sstevel@tonic-gate * Correctable ecc error trap handler 822*0Sstevel@tonic-gate */ 823*0Sstevel@tonic-gate /*ARGSUSED*/ 824*0Sstevel@tonic-gate void 825*0Sstevel@tonic-gate cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 826*0Sstevel@tonic-gate uint_t p_afsr_high, uint_t p_afar_high) 827*0Sstevel@tonic-gate { 828*0Sstevel@tonic-gate ushort_t sdbh, sdbl; 829*0Sstevel@tonic-gate ushort_t e_syndh, e_syndl; 830*0Sstevel@tonic-gate spitf_async_flt spf_flt; 831*0Sstevel@tonic-gate struct async_flt *ecc; 832*0Sstevel@tonic-gate int queue = 1; 833*0Sstevel@tonic-gate 834*0Sstevel@tonic-gate uint64_t t_afar = p_afar; 835*0Sstevel@tonic-gate uint64_t t_afsr = p_afsr; 836*0Sstevel@tonic-gate 837*0Sstevel@tonic-gate /* 838*0Sstevel@tonic-gate * Note: the Spitfire data buffer error registers 839*0Sstevel@tonic-gate * (upper and lower halves) are or'ed into the upper 840*0Sstevel@tonic-gate * word of the afsr by ce_err(). 841*0Sstevel@tonic-gate */ 842*0Sstevel@tonic-gate sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 843*0Sstevel@tonic-gate sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 844*0Sstevel@tonic-gate 845*0Sstevel@tonic-gate e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 846*0Sstevel@tonic-gate e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 847*0Sstevel@tonic-gate 848*0Sstevel@tonic-gate t_afsr &= S_AFSR_MASK; 849*0Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 850*0Sstevel@tonic-gate 851*0Sstevel@tonic-gate /* Setup the async fault structure */ 852*0Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 853*0Sstevel@tonic-gate ecc = (struct async_flt *)&spf_flt; 854*0Sstevel@tonic-gate ecc->flt_id = gethrtime_waitfree(); 855*0Sstevel@tonic-gate ecc->flt_stat = t_afsr; 856*0Sstevel@tonic-gate ecc->flt_addr = t_afar; 857*0Sstevel@tonic-gate ecc->flt_status = ECC_C_TRAP; 858*0Sstevel@tonic-gate ecc->flt_bus_id = getprocessorid(); 859*0Sstevel@tonic-gate ecc->flt_inst = CPU->cpu_id; 860*0Sstevel@tonic-gate ecc->flt_pc = (caddr_t)rp->r_pc; 861*0Sstevel@tonic-gate ecc->flt_func = log_ce_err; 862*0Sstevel@tonic-gate ecc->flt_in_memory = 863*0Sstevel@tonic-gate (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 864*0Sstevel@tonic-gate spf_flt.flt_sdbh = sdbh; 865*0Sstevel@tonic-gate spf_flt.flt_sdbl = sdbl; 866*0Sstevel@tonic-gate 867*0Sstevel@tonic-gate /* 868*0Sstevel@tonic-gate * Check for fatal conditions. 869*0Sstevel@tonic-gate */ 870*0Sstevel@tonic-gate check_misc_err(&spf_flt); 871*0Sstevel@tonic-gate 872*0Sstevel@tonic-gate /* 873*0Sstevel@tonic-gate * Pananoid checks for valid AFSR and UDBs 874*0Sstevel@tonic-gate */ 875*0Sstevel@tonic-gate if ((t_afsr & P_AFSR_CE) == 0) { 876*0Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 877*0Sstevel@tonic-gate "** Panic due to CE bit not set in the AFSR", 878*0Sstevel@tonic-gate " Corrected Memory Error on"); 879*0Sstevel@tonic-gate } 880*0Sstevel@tonic-gate 881*0Sstevel@tonic-gate /* 882*0Sstevel@tonic-gate * We want to skip logging only if ALL the following 883*0Sstevel@tonic-gate * conditions are true: 884*0Sstevel@tonic-gate * 885*0Sstevel@tonic-gate * 1. There is only one error 886*0Sstevel@tonic-gate * 2. That error is a correctable memory error 887*0Sstevel@tonic-gate * 3. The error is caused by the memory scrubber (in which case 888*0Sstevel@tonic-gate * the error will have occurred under on_trap protection) 889*0Sstevel@tonic-gate * 4. The error is on a retired page 890*0Sstevel@tonic-gate * 891*0Sstevel@tonic-gate * Note: OT_DATA_EC is used places other than the memory scrubber. 892*0Sstevel@tonic-gate * However, none of those errors should occur on a retired page. 893*0Sstevel@tonic-gate */ 894*0Sstevel@tonic-gate if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 895*0Sstevel@tonic-gate curthread->t_ontrap != NULL) { 896*0Sstevel@tonic-gate 897*0Sstevel@tonic-gate if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 898*0Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 899*0Sstevel@tonic-gate (ecc->flt_addr >> MMU_PAGESHIFT)); 900*0Sstevel@tonic-gate 901*0Sstevel@tonic-gate if (pp != NULL && page_isretired(pp)) { 902*0Sstevel@tonic-gate queue = 0; 903*0Sstevel@tonic-gate } 904*0Sstevel@tonic-gate } 905*0Sstevel@tonic-gate } 906*0Sstevel@tonic-gate 907*0Sstevel@tonic-gate if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 908*0Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 909*0Sstevel@tonic-gate "** Panic due to CE bits not set in the UDBs", 910*0Sstevel@tonic-gate " Corrected Memory Error on"); 911*0Sstevel@tonic-gate } 912*0Sstevel@tonic-gate 913*0Sstevel@tonic-gate if ((sdbh >> 8) & 1) { 914*0Sstevel@tonic-gate ecc->flt_synd = e_syndh; 915*0Sstevel@tonic-gate ce_scrub(ecc); 916*0Sstevel@tonic-gate if (queue) { 917*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 918*0Sstevel@tonic-gate sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 919*0Sstevel@tonic-gate } 920*0Sstevel@tonic-gate } 921*0Sstevel@tonic-gate 922*0Sstevel@tonic-gate if ((sdbl >> 8) & 1) { 923*0Sstevel@tonic-gate ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 924*0Sstevel@tonic-gate ecc->flt_synd = e_syndl | UDBL_REG; 925*0Sstevel@tonic-gate ce_scrub(ecc); 926*0Sstevel@tonic-gate if (queue) { 927*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 928*0Sstevel@tonic-gate sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 929*0Sstevel@tonic-gate } 930*0Sstevel@tonic-gate } 931*0Sstevel@tonic-gate 932*0Sstevel@tonic-gate /* 933*0Sstevel@tonic-gate * Re-enable all error trapping (CEEN currently cleared). 934*0Sstevel@tonic-gate */ 935*0Sstevel@tonic-gate clr_datapath(); 936*0Sstevel@tonic-gate set_asyncflt(P_AFSR_CE); 937*0Sstevel@tonic-gate set_error_enable(EER_ENABLE); 938*0Sstevel@tonic-gate } 939*0Sstevel@tonic-gate 940*0Sstevel@tonic-gate /* 941*0Sstevel@tonic-gate * Cpu specific CE logging routine 942*0Sstevel@tonic-gate */ 943*0Sstevel@tonic-gate static void 944*0Sstevel@tonic-gate log_ce_err(struct async_flt *aflt, char *unum) 945*0Sstevel@tonic-gate { 946*0Sstevel@tonic-gate spitf_async_flt spf_flt; 947*0Sstevel@tonic-gate 948*0Sstevel@tonic-gate if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 949*0Sstevel@tonic-gate return; 950*0Sstevel@tonic-gate } 951*0Sstevel@tonic-gate 952*0Sstevel@tonic-gate spf_flt.cmn_asyncflt = *aflt; 953*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 954*0Sstevel@tonic-gate " Corrected Memory Error detected by"); 955*0Sstevel@tonic-gate } 956*0Sstevel@tonic-gate 957*0Sstevel@tonic-gate /* 958*0Sstevel@tonic-gate * Spitfire does not perform any further CE classification refinement 959*0Sstevel@tonic-gate */ 960*0Sstevel@tonic-gate /*ARGSUSED*/ 961*0Sstevel@tonic-gate int 962*0Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 963*0Sstevel@tonic-gate size_t afltoffset) 964*0Sstevel@tonic-gate { 965*0Sstevel@tonic-gate return (0); 966*0Sstevel@tonic-gate } 967*0Sstevel@tonic-gate 968*0Sstevel@tonic-gate char * 969*0Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt) 970*0Sstevel@tonic-gate { 971*0Sstevel@tonic-gate if (aflt->flt_status & ECC_INTERMITTENT) 972*0Sstevel@tonic-gate return (ERR_TYPE_DESC_INTERMITTENT); 973*0Sstevel@tonic-gate if (aflt->flt_status & ECC_PERSISTENT) 974*0Sstevel@tonic-gate return (ERR_TYPE_DESC_PERSISTENT); 975*0Sstevel@tonic-gate if (aflt->flt_status & ECC_STICKY) 976*0Sstevel@tonic-gate return (ERR_TYPE_DESC_STICKY); 977*0Sstevel@tonic-gate return (ERR_TYPE_DESC_UNKNOWN); 978*0Sstevel@tonic-gate } 979*0Sstevel@tonic-gate 980*0Sstevel@tonic-gate /* 981*0Sstevel@tonic-gate * Called by correctable ecc error logging code to print out 982*0Sstevel@tonic-gate * the stick/persistent/intermittent status of the error. 983*0Sstevel@tonic-gate */ 984*0Sstevel@tonic-gate static void 985*0Sstevel@tonic-gate cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 986*0Sstevel@tonic-gate { 987*0Sstevel@tonic-gate ushort_t status; 988*0Sstevel@tonic-gate char *status1_str = "Memory"; 989*0Sstevel@tonic-gate char *status2_str = "Intermittent"; 990*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 991*0Sstevel@tonic-gate 992*0Sstevel@tonic-gate status = aflt->flt_status; 993*0Sstevel@tonic-gate 994*0Sstevel@tonic-gate if (status & ECC_ECACHE) 995*0Sstevel@tonic-gate status1_str = "Ecache"; 996*0Sstevel@tonic-gate 997*0Sstevel@tonic-gate if (status & ECC_STICKY) 998*0Sstevel@tonic-gate status2_str = "Sticky"; 999*0Sstevel@tonic-gate else if (status & ECC_PERSISTENT) 1000*0Sstevel@tonic-gate status2_str = "Persistent"; 1001*0Sstevel@tonic-gate 1002*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 1003*0Sstevel@tonic-gate NULL, " Corrected %s Error on %s is %s", 1004*0Sstevel@tonic-gate status1_str, unum, status2_str); 1005*0Sstevel@tonic-gate } 1006*0Sstevel@tonic-gate 1007*0Sstevel@tonic-gate /* 1008*0Sstevel@tonic-gate * check for a valid ce syndrome, then call the 1009*0Sstevel@tonic-gate * displacement flush scrubbing code, and then check the afsr to see if 1010*0Sstevel@tonic-gate * the error was persistent or intermittent. Reread the afar/afsr to see 1011*0Sstevel@tonic-gate * if the error was not scrubbed successfully, and is therefore sticky. 1012*0Sstevel@tonic-gate */ 1013*0Sstevel@tonic-gate /*ARGSUSED1*/ 1014*0Sstevel@tonic-gate void 1015*0Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 1016*0Sstevel@tonic-gate { 1017*0Sstevel@tonic-gate uint64_t eer, afsr; 1018*0Sstevel@tonic-gate ushort_t status; 1019*0Sstevel@tonic-gate 1020*0Sstevel@tonic-gate ASSERT(getpil() > LOCK_LEVEL); 1021*0Sstevel@tonic-gate 1022*0Sstevel@tonic-gate /* 1023*0Sstevel@tonic-gate * It is possible that the flt_addr is not a valid 1024*0Sstevel@tonic-gate * physical address. To deal with this, we disable 1025*0Sstevel@tonic-gate * NCEEN while we scrub that address. If this causes 1026*0Sstevel@tonic-gate * a TIMEOUT/BERR, we know this is an invalid 1027*0Sstevel@tonic-gate * memory location. 1028*0Sstevel@tonic-gate */ 1029*0Sstevel@tonic-gate kpreempt_disable(); 1030*0Sstevel@tonic-gate eer = get_error_enable(); 1031*0Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 1032*0Sstevel@tonic-gate set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1033*0Sstevel@tonic-gate 1034*0Sstevel@tonic-gate /* 1035*0Sstevel@tonic-gate * To check if the error detected by IO is persistent, sticky or 1036*0Sstevel@tonic-gate * intermittent. 1037*0Sstevel@tonic-gate */ 1038*0Sstevel@tonic-gate if (ecc->flt_status & ECC_IOBUS) { 1039*0Sstevel@tonic-gate ecc->flt_stat = P_AFSR_CE; 1040*0Sstevel@tonic-gate } 1041*0Sstevel@tonic-gate 1042*0Sstevel@tonic-gate scrubphys(P2ALIGN(ecc->flt_addr, 64), 1043*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 1044*0Sstevel@tonic-gate 1045*0Sstevel@tonic-gate get_asyncflt(&afsr); 1046*0Sstevel@tonic-gate if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1047*0Sstevel@tonic-gate /* 1048*0Sstevel@tonic-gate * Must ensure that we don't get the TIMEOUT/BERR 1049*0Sstevel@tonic-gate * when we reenable NCEEN, so we clear the AFSR. 1050*0Sstevel@tonic-gate */ 1051*0Sstevel@tonic-gate set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1052*0Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 1053*0Sstevel@tonic-gate set_error_enable(eer); 1054*0Sstevel@tonic-gate kpreempt_enable(); 1055*0Sstevel@tonic-gate return; 1056*0Sstevel@tonic-gate } 1057*0Sstevel@tonic-gate 1058*0Sstevel@tonic-gate if (eer & EER_NCEEN) 1059*0Sstevel@tonic-gate set_error_enable(eer & ~EER_CEEN); 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate /* 1062*0Sstevel@tonic-gate * Check and clear any ECC errors from the scrub. If the scrub did 1063*0Sstevel@tonic-gate * not trip over the error, mark it intermittent. If the scrub did 1064*0Sstevel@tonic-gate * trip the error again and it did not scrub away, mark it sticky. 1065*0Sstevel@tonic-gate * Otherwise mark it persistent. 1066*0Sstevel@tonic-gate */ 1067*0Sstevel@tonic-gate if (check_ecc(ecc) != 0) { 1068*0Sstevel@tonic-gate cpu_read_paddr(ecc, 0, 1); 1069*0Sstevel@tonic-gate 1070*0Sstevel@tonic-gate if (check_ecc(ecc) != 0) 1071*0Sstevel@tonic-gate status = ECC_STICKY; 1072*0Sstevel@tonic-gate else 1073*0Sstevel@tonic-gate status = ECC_PERSISTENT; 1074*0Sstevel@tonic-gate } else 1075*0Sstevel@tonic-gate status = ECC_INTERMITTENT; 1076*0Sstevel@tonic-gate 1077*0Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 1078*0Sstevel@tonic-gate set_error_enable(eer); 1079*0Sstevel@tonic-gate kpreempt_enable(); 1080*0Sstevel@tonic-gate 1081*0Sstevel@tonic-gate ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1082*0Sstevel@tonic-gate ecc->flt_status |= status; 1083*0Sstevel@tonic-gate } 1084*0Sstevel@tonic-gate 1085*0Sstevel@tonic-gate /* 1086*0Sstevel@tonic-gate * get the syndrome and unum, and then call the routines 1087*0Sstevel@tonic-gate * to check the other cpus and iobuses, and then do the error logging. 1088*0Sstevel@tonic-gate */ 1089*0Sstevel@tonic-gate /*ARGSUSED1*/ 1090*0Sstevel@tonic-gate void 1091*0Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1092*0Sstevel@tonic-gate { 1093*0Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 1094*0Sstevel@tonic-gate int len = 0; 1095*0Sstevel@tonic-gate int ce_verbose = 0; 1096*0Sstevel@tonic-gate 1097*0Sstevel@tonic-gate ASSERT(ecc->flt_func != NULL); 1098*0Sstevel@tonic-gate 1099*0Sstevel@tonic-gate /* Get the unum string for logging purposes */ 1100*0Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1101*0Sstevel@tonic-gate UNUM_NAMLEN, &len); 1102*0Sstevel@tonic-gate 1103*0Sstevel@tonic-gate /* Call specific error logging routine */ 1104*0Sstevel@tonic-gate (void) (*ecc->flt_func)(ecc, unum); 1105*0Sstevel@tonic-gate 1106*0Sstevel@tonic-gate /* 1107*0Sstevel@tonic-gate * Count errors per unum. 1108*0Sstevel@tonic-gate * Non-memory errors are all counted via a special unum string. 1109*0Sstevel@tonic-gate */ 1110*0Sstevel@tonic-gate if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING && 1111*0Sstevel@tonic-gate automatic_page_removal) { 1112*0Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 1113*0Sstevel@tonic-gate (ecc->flt_addr >> MMU_PAGESHIFT)); 1114*0Sstevel@tonic-gate 1115*0Sstevel@tonic-gate if (pp) { 1116*0Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 1117*0Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_FAILING); 1118*0Sstevel@tonic-gate } 1119*0Sstevel@tonic-gate } 1120*0Sstevel@tonic-gate 1121*0Sstevel@tonic-gate if (ecc->flt_panic) { 1122*0Sstevel@tonic-gate ce_verbose = 1; 1123*0Sstevel@tonic-gate } else if ((ecc->flt_class == BUS_FAULT) || 1124*0Sstevel@tonic-gate (ecc->flt_stat & P_AFSR_CE)) { 1125*0Sstevel@tonic-gate ce_verbose = (ce_verbose_memory > 0); 1126*0Sstevel@tonic-gate } else { 1127*0Sstevel@tonic-gate ce_verbose = 1; 1128*0Sstevel@tonic-gate } 1129*0Sstevel@tonic-gate 1130*0Sstevel@tonic-gate if (ce_verbose) { 1131*0Sstevel@tonic-gate spitf_async_flt sflt; 1132*0Sstevel@tonic-gate int synd_code; 1133*0Sstevel@tonic-gate 1134*0Sstevel@tonic-gate sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1135*0Sstevel@tonic-gate 1136*0Sstevel@tonic-gate cpu_ce_log_status(&sflt, unum); 1137*0Sstevel@tonic-gate 1138*0Sstevel@tonic-gate synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1139*0Sstevel@tonic-gate SYND(ecc->flt_synd)); 1140*0Sstevel@tonic-gate 1141*0Sstevel@tonic-gate if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1142*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1143*0Sstevel@tonic-gate NULL, " ECC Data Bit %2d was in error " 1144*0Sstevel@tonic-gate "and corrected", synd_code); 1145*0Sstevel@tonic-gate } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1146*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1147*0Sstevel@tonic-gate NULL, " ECC Check Bit %2d was in error " 1148*0Sstevel@tonic-gate "and corrected", synd_code - C0); 1149*0Sstevel@tonic-gate } else { 1150*0Sstevel@tonic-gate /* 1151*0Sstevel@tonic-gate * These are UE errors - we shouldn't be getting CE 1152*0Sstevel@tonic-gate * traps for these; handle them in case of bad h/w. 1153*0Sstevel@tonic-gate */ 1154*0Sstevel@tonic-gate switch (synd_code) { 1155*0Sstevel@tonic-gate case M2: 1156*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1157*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1158*0Sstevel@tonic-gate " Two ECC Bits were in error"); 1159*0Sstevel@tonic-gate break; 1160*0Sstevel@tonic-gate case M3: 1161*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1162*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1163*0Sstevel@tonic-gate " Three ECC Bits were in error"); 1164*0Sstevel@tonic-gate break; 1165*0Sstevel@tonic-gate case M4: 1166*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1167*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1168*0Sstevel@tonic-gate " Four ECC Bits were in error"); 1169*0Sstevel@tonic-gate break; 1170*0Sstevel@tonic-gate case MX: 1171*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1172*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1173*0Sstevel@tonic-gate " More than Four ECC bits were " 1174*0Sstevel@tonic-gate "in error"); 1175*0Sstevel@tonic-gate break; 1176*0Sstevel@tonic-gate default: 1177*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1178*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1179*0Sstevel@tonic-gate " Unknown fault syndrome %d", 1180*0Sstevel@tonic-gate synd_code); 1181*0Sstevel@tonic-gate break; 1182*0Sstevel@tonic-gate } 1183*0Sstevel@tonic-gate } 1184*0Sstevel@tonic-gate } 1185*0Sstevel@tonic-gate 1186*0Sstevel@tonic-gate /* Display entire cache line, if valid address */ 1187*0Sstevel@tonic-gate if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1188*0Sstevel@tonic-gate read_ecc_data(ecc, 1, 1); 1189*0Sstevel@tonic-gate } 1190*0Sstevel@tonic-gate 1191*0Sstevel@tonic-gate /* 1192*0Sstevel@tonic-gate * We route all errors through a single switch statement. 1193*0Sstevel@tonic-gate */ 1194*0Sstevel@tonic-gate void 1195*0Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt) 1196*0Sstevel@tonic-gate { 1197*0Sstevel@tonic-gate 1198*0Sstevel@tonic-gate switch (aflt->flt_class) { 1199*0Sstevel@tonic-gate case CPU_FAULT: 1200*0Sstevel@tonic-gate cpu_async_log_err(aflt); 1201*0Sstevel@tonic-gate break; 1202*0Sstevel@tonic-gate 1203*0Sstevel@tonic-gate case BUS_FAULT: 1204*0Sstevel@tonic-gate bus_async_log_err(aflt); 1205*0Sstevel@tonic-gate break; 1206*0Sstevel@tonic-gate 1207*0Sstevel@tonic-gate default: 1208*0Sstevel@tonic-gate cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1209*0Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1210*0Sstevel@tonic-gate break; 1211*0Sstevel@tonic-gate } 1212*0Sstevel@tonic-gate } 1213*0Sstevel@tonic-gate 1214*0Sstevel@tonic-gate /* Values for action variable in cpu_async_error() */ 1215*0Sstevel@tonic-gate #define ACTION_NONE 0 1216*0Sstevel@tonic-gate #define ACTION_TRAMPOLINE 1 1217*0Sstevel@tonic-gate #define ACTION_AST_FLAGS 2 1218*0Sstevel@tonic-gate 1219*0Sstevel@tonic-gate /* 1220*0Sstevel@tonic-gate * Access error trap handler for asynchronous cpu errors. This routine is 1221*0Sstevel@tonic-gate * called to handle a data or instruction access error. All fatal errors are 1222*0Sstevel@tonic-gate * completely handled by this routine (by panicking). Non fatal error logging 1223*0Sstevel@tonic-gate * is queued for later processing either via AST or softint at a lower PIL. 1224*0Sstevel@tonic-gate * In case of panic, the error log queue will also be processed as part of the 1225*0Sstevel@tonic-gate * panic flow to ensure all errors are logged. This routine is called with all 1226*0Sstevel@tonic-gate * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1227*0Sstevel@tonic-gate * error bits are also cleared. The hardware has also disabled the I and 1228*0Sstevel@tonic-gate * D-caches for us, so we must re-enable them before returning. 1229*0Sstevel@tonic-gate * 1230*0Sstevel@tonic-gate * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1231*0Sstevel@tonic-gate * 1232*0Sstevel@tonic-gate * _______________________________________________________________ 1233*0Sstevel@tonic-gate * | Privileged tl0 | Unprivileged | 1234*0Sstevel@tonic-gate * | Protected | Unprotected | Protected | Unprotected | 1235*0Sstevel@tonic-gate * |on_trap|lofault| | | | 1236*0Sstevel@tonic-gate * -------------|-------|-------+---------------+---------------+-------------| 1237*0Sstevel@tonic-gate * | | | | | | 1238*0Sstevel@tonic-gate * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1239*0Sstevel@tonic-gate * | | | | | | 1240*0Sstevel@tonic-gate * TO/BERR | T | S | L,P | n/a | S | 1241*0Sstevel@tonic-gate * | | | | | | 1242*0Sstevel@tonic-gate * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1243*0Sstevel@tonic-gate * | | | | | | 1244*0Sstevel@tonic-gate * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1245*0Sstevel@tonic-gate * ____________________________________________________________________________ 1246*0Sstevel@tonic-gate * 1247*0Sstevel@tonic-gate * 1248*0Sstevel@tonic-gate * Action codes: 1249*0Sstevel@tonic-gate * 1250*0Sstevel@tonic-gate * L - log 1251*0Sstevel@tonic-gate * M - kick off memscrubber if flt_in_memory 1252*0Sstevel@tonic-gate * P - panic 1253*0Sstevel@tonic-gate * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1254*0Sstevel@tonic-gate * R - i) if aft_panic is set, panic 1255*0Sstevel@tonic-gate * ii) otherwise, send hwerr event to contract and SIGKILL to process 1256*0Sstevel@tonic-gate * S - send SIGBUS to process 1257*0Sstevel@tonic-gate * T - trampoline 1258*0Sstevel@tonic-gate * 1259*0Sstevel@tonic-gate * Special cases: 1260*0Sstevel@tonic-gate * 1261*0Sstevel@tonic-gate * 1) if aft_testfatal is set, all faults result in a panic regardless 1262*0Sstevel@tonic-gate * of type (even WP), protection (even on_trap), or privilege. 1263*0Sstevel@tonic-gate */ 1264*0Sstevel@tonic-gate /*ARGSUSED*/ 1265*0Sstevel@tonic-gate void 1266*0Sstevel@tonic-gate cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1267*0Sstevel@tonic-gate uint_t p_afsr_high, uint_t p_afar_high) 1268*0Sstevel@tonic-gate { 1269*0Sstevel@tonic-gate ushort_t sdbh, sdbl, ttype, tl; 1270*0Sstevel@tonic-gate spitf_async_flt spf_flt; 1271*0Sstevel@tonic-gate struct async_flt *aflt; 1272*0Sstevel@tonic-gate char pr_reason[28]; 1273*0Sstevel@tonic-gate uint64_t oafsr; 1274*0Sstevel@tonic-gate uint64_t acc_afsr = 0; /* accumulated afsr */ 1275*0Sstevel@tonic-gate int action = ACTION_NONE; 1276*0Sstevel@tonic-gate uint64_t t_afar = p_afar; 1277*0Sstevel@tonic-gate uint64_t t_afsr = p_afsr; 1278*0Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED; 1279*0Sstevel@tonic-gate ddi_acc_hdl_t *hp; 1280*0Sstevel@tonic-gate 1281*0Sstevel@tonic-gate /* 1282*0Sstevel@tonic-gate * We need to look at p_flag to determine if the thread detected an 1283*0Sstevel@tonic-gate * error while dumping core. We can't grab p_lock here, but it's ok 1284*0Sstevel@tonic-gate * because we just need a consistent snapshot and we know that everyone 1285*0Sstevel@tonic-gate * else will store a consistent set of bits while holding p_lock. We 1286*0Sstevel@tonic-gate * don't have to worry about a race because SDOCORE is set once prior 1287*0Sstevel@tonic-gate * to doing i/o from the process's address space and is never cleared. 1288*0Sstevel@tonic-gate */ 1289*0Sstevel@tonic-gate uint_t pflag = ttoproc(curthread)->p_flag; 1290*0Sstevel@tonic-gate 1291*0Sstevel@tonic-gate pr_reason[0] = '\0'; 1292*0Sstevel@tonic-gate 1293*0Sstevel@tonic-gate /* 1294*0Sstevel@tonic-gate * Note: the Spitfire data buffer error registers 1295*0Sstevel@tonic-gate * (upper and lower halves) are or'ed into the upper 1296*0Sstevel@tonic-gate * word of the afsr by async_err() if P_AFSR_UE is set. 1297*0Sstevel@tonic-gate */ 1298*0Sstevel@tonic-gate sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1299*0Sstevel@tonic-gate sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1300*0Sstevel@tonic-gate 1301*0Sstevel@tonic-gate /* 1302*0Sstevel@tonic-gate * Grab the ttype encoded in <63:53> of the saved 1303*0Sstevel@tonic-gate * afsr passed from async_err() 1304*0Sstevel@tonic-gate */ 1305*0Sstevel@tonic-gate ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1306*0Sstevel@tonic-gate tl = (ushort_t)(t_afsr >> 62); 1307*0Sstevel@tonic-gate 1308*0Sstevel@tonic-gate t_afsr &= S_AFSR_MASK; 1309*0Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1310*0Sstevel@tonic-gate 1311*0Sstevel@tonic-gate /* 1312*0Sstevel@tonic-gate * Initialize most of the common and CPU-specific structure. We derive 1313*0Sstevel@tonic-gate * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1314*0Sstevel@tonic-gate * initial setting of aflt->flt_panic is based on TL: we must panic if 1315*0Sstevel@tonic-gate * the error occurred at TL > 0. We also set flt_panic if the test/demo 1316*0Sstevel@tonic-gate * tuneable aft_testfatal is set (not the default). 1317*0Sstevel@tonic-gate */ 1318*0Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 1319*0Sstevel@tonic-gate aflt = (struct async_flt *)&spf_flt; 1320*0Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 1321*0Sstevel@tonic-gate aflt->flt_stat = t_afsr; 1322*0Sstevel@tonic-gate aflt->flt_addr = t_afar; 1323*0Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 1324*0Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 1325*0Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc; 1326*0Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 1327*0Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 1328*0Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1329*0Sstevel@tonic-gate aflt->flt_tl = (uchar_t)tl; 1330*0Sstevel@tonic-gate aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1331*0Sstevel@tonic-gate aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1332*0Sstevel@tonic-gate 1333*0Sstevel@tonic-gate /* 1334*0Sstevel@tonic-gate * Set flt_status based on the trap type. If we end up here as the 1335*0Sstevel@tonic-gate * result of a UE detected by the CE handling code, leave status 0. 1336*0Sstevel@tonic-gate */ 1337*0Sstevel@tonic-gate switch (ttype) { 1338*0Sstevel@tonic-gate case T_DATA_ERROR: 1339*0Sstevel@tonic-gate aflt->flt_status = ECC_D_TRAP; 1340*0Sstevel@tonic-gate break; 1341*0Sstevel@tonic-gate case T_INSTR_ERROR: 1342*0Sstevel@tonic-gate aflt->flt_status = ECC_I_TRAP; 1343*0Sstevel@tonic-gate break; 1344*0Sstevel@tonic-gate } 1345*0Sstevel@tonic-gate 1346*0Sstevel@tonic-gate spf_flt.flt_sdbh = sdbh; 1347*0Sstevel@tonic-gate spf_flt.flt_sdbl = sdbl; 1348*0Sstevel@tonic-gate 1349*0Sstevel@tonic-gate /* 1350*0Sstevel@tonic-gate * Check for fatal async errors. 1351*0Sstevel@tonic-gate */ 1352*0Sstevel@tonic-gate check_misc_err(&spf_flt); 1353*0Sstevel@tonic-gate 1354*0Sstevel@tonic-gate /* 1355*0Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0, we need to check to 1356*0Sstevel@tonic-gate * see if we were executing in the kernel under on_trap() or t_lofault 1357*0Sstevel@tonic-gate * protection. If so, modify the saved registers so that we return 1358*0Sstevel@tonic-gate * from the trap to the appropriate trampoline routine. 1359*0Sstevel@tonic-gate */ 1360*0Sstevel@tonic-gate if (aflt->flt_priv && tl == 0) { 1361*0Sstevel@tonic-gate if (curthread->t_ontrap != NULL) { 1362*0Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap; 1363*0Sstevel@tonic-gate 1364*0Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) { 1365*0Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC; 1366*0Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC; 1367*0Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 1368*0Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 1369*0Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 1370*0Sstevel@tonic-gate } 1371*0Sstevel@tonic-gate 1372*0Sstevel@tonic-gate if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1373*0Sstevel@tonic-gate (otp->ot_prot & OT_DATA_ACCESS)) { 1374*0Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS; 1375*0Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS; 1376*0Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 1377*0Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 1378*0Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 1379*0Sstevel@tonic-gate /* 1380*0Sstevel@tonic-gate * for peeks and caut_gets errors are expected 1381*0Sstevel@tonic-gate */ 1382*0Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle; 1383*0Sstevel@tonic-gate if (!hp) 1384*0Sstevel@tonic-gate expected = DDI_FM_ERR_PEEK; 1385*0Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access == 1386*0Sstevel@tonic-gate DDI_CAUTIOUS_ACC) 1387*0Sstevel@tonic-gate expected = DDI_FM_ERR_EXPECTED; 1388*0Sstevel@tonic-gate } 1389*0Sstevel@tonic-gate 1390*0Sstevel@tonic-gate } else if (curthread->t_lofault) { 1391*0Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY; 1392*0Sstevel@tonic-gate rp->r_g1 = EFAULT; 1393*0Sstevel@tonic-gate rp->r_pc = curthread->t_lofault; 1394*0Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 1395*0Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 1396*0Sstevel@tonic-gate } 1397*0Sstevel@tonic-gate } 1398*0Sstevel@tonic-gate 1399*0Sstevel@tonic-gate /* 1400*0Sstevel@tonic-gate * Determine if this error needs to be treated as fatal. Note that 1401*0Sstevel@tonic-gate * multiple errors detected upon entry to this trap handler does not 1402*0Sstevel@tonic-gate * necessarily warrant a panic. We only want to panic if the trap 1403*0Sstevel@tonic-gate * happened in privileged mode and not under t_ontrap or t_lofault 1404*0Sstevel@tonic-gate * protection. The exception is WP: if we *only* get WP, it is not 1405*0Sstevel@tonic-gate * fatal even if the trap occurred in privileged mode, except on Sabre. 1406*0Sstevel@tonic-gate * 1407*0Sstevel@tonic-gate * aft_panic, if set, effectively makes us treat usermode 1408*0Sstevel@tonic-gate * UE/EDP/LDP faults as if they were privileged - so we we will 1409*0Sstevel@tonic-gate * panic instead of sending a contract event. A lofault-protected 1410*0Sstevel@tonic-gate * fault will normally follow the contract event; if aft_panic is 1411*0Sstevel@tonic-gate * set this will be changed to a panic. 1412*0Sstevel@tonic-gate * 1413*0Sstevel@tonic-gate * For usermode BERR/BTO errors, eg from processes performing device 1414*0Sstevel@tonic-gate * control through mapped device memory, we need only deliver 1415*0Sstevel@tonic-gate * a SIGBUS to the offending process. 1416*0Sstevel@tonic-gate * 1417*0Sstevel@tonic-gate * Some additional flt_panic reasons (eg, WP on Sabre) will be 1418*0Sstevel@tonic-gate * checked later; for now we implement the common reasons. 1419*0Sstevel@tonic-gate */ 1420*0Sstevel@tonic-gate if (aflt->flt_prot == AFLT_PROT_NONE) { 1421*0Sstevel@tonic-gate /* 1422*0Sstevel@tonic-gate * Beware - multiple bits may be set in AFSR 1423*0Sstevel@tonic-gate */ 1424*0Sstevel@tonic-gate if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1425*0Sstevel@tonic-gate if (aflt->flt_priv || aft_panic) 1426*0Sstevel@tonic-gate aflt->flt_panic = 1; 1427*0Sstevel@tonic-gate } 1428*0Sstevel@tonic-gate 1429*0Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1430*0Sstevel@tonic-gate if (aflt->flt_priv) 1431*0Sstevel@tonic-gate aflt->flt_panic = 1; 1432*0Sstevel@tonic-gate } 1433*0Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1434*0Sstevel@tonic-gate aflt->flt_panic = 1; 1435*0Sstevel@tonic-gate } 1436*0Sstevel@tonic-gate 1437*0Sstevel@tonic-gate /* 1438*0Sstevel@tonic-gate * UE/BERR/TO: Call our bus nexus friends to check for 1439*0Sstevel@tonic-gate * IO errors that may have resulted in this trap. 1440*0Sstevel@tonic-gate */ 1441*0Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1442*0Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected); 1443*0Sstevel@tonic-gate } 1444*0Sstevel@tonic-gate 1445*0Sstevel@tonic-gate /* 1446*0Sstevel@tonic-gate * Handle UE: If the UE is in memory, we need to flush the bad line from 1447*0Sstevel@tonic-gate * the E-cache. We also need to query the bus nexus for fatal errors. 1448*0Sstevel@tonic-gate * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1449*0Sstevel@tonic-gate * caches may introduce more parity errors (especially when the module 1450*0Sstevel@tonic-gate * is bad) and in sabre there is no guarantee that such errors 1451*0Sstevel@tonic-gate * (if introduced) are written back as poisoned data. 1452*0Sstevel@tonic-gate */ 1453*0Sstevel@tonic-gate if (t_afsr & P_AFSR_UE) { 1454*0Sstevel@tonic-gate int i; 1455*0Sstevel@tonic-gate 1456*0Sstevel@tonic-gate (void) strcat(pr_reason, "UE "); 1457*0Sstevel@tonic-gate 1458*0Sstevel@tonic-gate spf_flt.flt_type = CPU_UE_ERR; 1459*0Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1460*0Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1: 0; 1461*0Sstevel@tonic-gate 1462*0Sstevel@tonic-gate /* 1463*0Sstevel@tonic-gate * With UE, we have the PA of the fault. 1464*0Sstevel@tonic-gate * Let do a diagnostic read to get the ecache 1465*0Sstevel@tonic-gate * data and tag info of the bad line for logging. 1466*0Sstevel@tonic-gate */ 1467*0Sstevel@tonic-gate if (aflt->flt_in_memory) { 1468*0Sstevel@tonic-gate uint32_t ec_set_size; 1469*0Sstevel@tonic-gate uchar_t state; 1470*0Sstevel@tonic-gate uint32_t ecache_idx; 1471*0Sstevel@tonic-gate uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1472*0Sstevel@tonic-gate 1473*0Sstevel@tonic-gate /* touch the line to put it in ecache */ 1474*0Sstevel@tonic-gate acc_afsr |= read_and_clear_afsr(); 1475*0Sstevel@tonic-gate (void) lddphys(faultpa); 1476*0Sstevel@tonic-gate acc_afsr |= (read_and_clear_afsr() & 1477*0Sstevel@tonic-gate ~(P_AFSR_EDP | P_AFSR_UE)); 1478*0Sstevel@tonic-gate 1479*0Sstevel@tonic-gate ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1480*0Sstevel@tonic-gate ecache_associativity; 1481*0Sstevel@tonic-gate 1482*0Sstevel@tonic-gate for (i = 0; i < ecache_associativity; i++) { 1483*0Sstevel@tonic-gate ecache_idx = i * ec_set_size + 1484*0Sstevel@tonic-gate (aflt->flt_addr % ec_set_size); 1485*0Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1486*0Sstevel@tonic-gate (uint64_t *)&spf_flt.flt_ec_data[0], 1487*0Sstevel@tonic-gate &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1488*0Sstevel@tonic-gate acc_afsr |= oafsr; 1489*0Sstevel@tonic-gate 1490*0Sstevel@tonic-gate state = (uchar_t)((spf_flt.flt_ec_tag & 1491*0Sstevel@tonic-gate cpu_ec_state_mask) >> cpu_ec_state_shift); 1492*0Sstevel@tonic-gate 1493*0Sstevel@tonic-gate if ((state & cpu_ec_state_valid) && 1494*0Sstevel@tonic-gate ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1495*0Sstevel@tonic-gate ((uint64_t)aflt->flt_addr >> 1496*0Sstevel@tonic-gate cpu_ec_tag_shift))) 1497*0Sstevel@tonic-gate break; 1498*0Sstevel@tonic-gate } 1499*0Sstevel@tonic-gate 1500*0Sstevel@tonic-gate /* 1501*0Sstevel@tonic-gate * Check to see if the ecache tag is valid for the 1502*0Sstevel@tonic-gate * fault PA. In the very unlikely event where the 1503*0Sstevel@tonic-gate * line could be victimized, no ecache info will be 1504*0Sstevel@tonic-gate * available. If this is the case, capture the line 1505*0Sstevel@tonic-gate * from memory instead. 1506*0Sstevel@tonic-gate */ 1507*0Sstevel@tonic-gate if ((state & cpu_ec_state_valid) == 0 || 1508*0Sstevel@tonic-gate (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1509*0Sstevel@tonic-gate ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1510*0Sstevel@tonic-gate for (i = 0; i < 8; i++, faultpa += 8) { 1511*0Sstevel@tonic-gate ec_data_t *ecdptr; 1512*0Sstevel@tonic-gate 1513*0Sstevel@tonic-gate ecdptr = &spf_flt.flt_ec_data[i]; 1514*0Sstevel@tonic-gate acc_afsr |= read_and_clear_afsr(); 1515*0Sstevel@tonic-gate ecdptr->ec_d8 = lddphys(faultpa); 1516*0Sstevel@tonic-gate acc_afsr |= (read_and_clear_afsr() & 1517*0Sstevel@tonic-gate ~(P_AFSR_EDP | P_AFSR_UE)); 1518*0Sstevel@tonic-gate ecdptr->ec_afsr = 0; 1519*0Sstevel@tonic-gate /* null afsr value */ 1520*0Sstevel@tonic-gate } 1521*0Sstevel@tonic-gate 1522*0Sstevel@tonic-gate /* 1523*0Sstevel@tonic-gate * Mark tag invalid to indicate mem dump 1524*0Sstevel@tonic-gate * when we print out the info. 1525*0Sstevel@tonic-gate */ 1526*0Sstevel@tonic-gate spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1527*0Sstevel@tonic-gate } 1528*0Sstevel@tonic-gate spf_flt.flt_ec_lcnt = 1; 1529*0Sstevel@tonic-gate 1530*0Sstevel@tonic-gate /* 1531*0Sstevel@tonic-gate * Flush out the bad line 1532*0Sstevel@tonic-gate */ 1533*0Sstevel@tonic-gate flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1534*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 1535*0Sstevel@tonic-gate 1536*0Sstevel@tonic-gate acc_afsr |= clear_errors(NULL, NULL); 1537*0Sstevel@tonic-gate } 1538*0Sstevel@tonic-gate 1539*0Sstevel@tonic-gate /* 1540*0Sstevel@tonic-gate * Ask our bus nexus friends if they have any fatal errors. If 1541*0Sstevel@tonic-gate * so, they will log appropriate error messages and panic as a 1542*0Sstevel@tonic-gate * result. We then queue an event for each UDB that reports a 1543*0Sstevel@tonic-gate * UE. Each UE reported in a UDB will have its own log message. 1544*0Sstevel@tonic-gate * 1545*0Sstevel@tonic-gate * Note from kbn: In the case where there are multiple UEs 1546*0Sstevel@tonic-gate * (ME bit is set) - the AFAR address is only accurate to 1547*0Sstevel@tonic-gate * the 16-byte granularity. One cannot tell whether the AFAR 1548*0Sstevel@tonic-gate * belongs to the UDBH or UDBL syndromes. In this case, we 1549*0Sstevel@tonic-gate * always report the AFAR address to be 16-byte aligned. 1550*0Sstevel@tonic-gate * 1551*0Sstevel@tonic-gate * If we're on a Sabre, there is no SDBL, but it will always 1552*0Sstevel@tonic-gate * read as zero, so the sdbl test below will safely fail. 1553*0Sstevel@tonic-gate */ 1554*0Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1555*0Sstevel@tonic-gate aflt->flt_panic = 1; 1556*0Sstevel@tonic-gate 1557*0Sstevel@tonic-gate if (sdbh & P_DER_UE) { 1558*0Sstevel@tonic-gate aflt->flt_synd = sdbh & P_DER_E_SYND; 1559*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1560*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1561*0Sstevel@tonic-gate aflt->flt_panic); 1562*0Sstevel@tonic-gate } 1563*0Sstevel@tonic-gate if (sdbl & P_DER_UE) { 1564*0Sstevel@tonic-gate aflt->flt_synd = sdbl & P_DER_E_SYND; 1565*0Sstevel@tonic-gate aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1566*0Sstevel@tonic-gate if (!(aflt->flt_stat & P_AFSR_ME)) 1567*0Sstevel@tonic-gate aflt->flt_addr |= 0x8; 1568*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1569*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1570*0Sstevel@tonic-gate aflt->flt_panic); 1571*0Sstevel@tonic-gate } 1572*0Sstevel@tonic-gate 1573*0Sstevel@tonic-gate /* 1574*0Sstevel@tonic-gate * We got a UE and are panicking, save the fault PA in a known 1575*0Sstevel@tonic-gate * location so that the platform specific panic code can check 1576*0Sstevel@tonic-gate * for copyback errors. 1577*0Sstevel@tonic-gate */ 1578*0Sstevel@tonic-gate if (aflt->flt_panic && aflt->flt_in_memory) { 1579*0Sstevel@tonic-gate panic_aflt = *aflt; 1580*0Sstevel@tonic-gate } 1581*0Sstevel@tonic-gate } 1582*0Sstevel@tonic-gate 1583*0Sstevel@tonic-gate /* 1584*0Sstevel@tonic-gate * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1585*0Sstevel@tonic-gate * async error for logging. For Sabre, we panic on EDP or LDP. 1586*0Sstevel@tonic-gate */ 1587*0Sstevel@tonic-gate if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1588*0Sstevel@tonic-gate spf_flt.flt_type = CPU_EDP_LDP_ERR; 1589*0Sstevel@tonic-gate 1590*0Sstevel@tonic-gate if (t_afsr & P_AFSR_EDP) 1591*0Sstevel@tonic-gate (void) strcat(pr_reason, "EDP "); 1592*0Sstevel@tonic-gate 1593*0Sstevel@tonic-gate if (t_afsr & P_AFSR_LDP) 1594*0Sstevel@tonic-gate (void) strcat(pr_reason, "LDP "); 1595*0Sstevel@tonic-gate 1596*0Sstevel@tonic-gate /* 1597*0Sstevel@tonic-gate * Here we have no PA to work with. 1598*0Sstevel@tonic-gate * Scan each line in the ecache to look for 1599*0Sstevel@tonic-gate * the one with bad parity. 1600*0Sstevel@tonic-gate */ 1601*0Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 1602*0Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1603*0Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1604*0Sstevel@tonic-gate acc_afsr |= (oafsr & ~P_AFSR_WP); 1605*0Sstevel@tonic-gate 1606*0Sstevel@tonic-gate /* 1607*0Sstevel@tonic-gate * If we found a bad PA, update the state to indicate if it is 1608*0Sstevel@tonic-gate * memory or I/O space. This code will be important if we ever 1609*0Sstevel@tonic-gate * support cacheable frame buffers. 1610*0Sstevel@tonic-gate */ 1611*0Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 1612*0Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1613*0Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 1614*0Sstevel@tonic-gate } 1615*0Sstevel@tonic-gate 1616*0Sstevel@tonic-gate if (isus2i || isus2e) 1617*0Sstevel@tonic-gate aflt->flt_panic = 1; 1618*0Sstevel@tonic-gate 1619*0Sstevel@tonic-gate cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1620*0Sstevel@tonic-gate FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1621*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1622*0Sstevel@tonic-gate aflt->flt_panic); 1623*0Sstevel@tonic-gate } 1624*0Sstevel@tonic-gate 1625*0Sstevel@tonic-gate /* 1626*0Sstevel@tonic-gate * Timeout and bus error handling. There are two cases to consider: 1627*0Sstevel@tonic-gate * 1628*0Sstevel@tonic-gate * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1629*0Sstevel@tonic-gate * have already modified the saved registers so that we will return 1630*0Sstevel@tonic-gate * from the trap to the appropriate trampoline routine; otherwise panic. 1631*0Sstevel@tonic-gate * 1632*0Sstevel@tonic-gate * (2) In user mode, we can simply use our AST mechanism to deliver 1633*0Sstevel@tonic-gate * a SIGBUS. We do not log the occurence - processes performing 1634*0Sstevel@tonic-gate * device control would generate lots of uninteresting messages. 1635*0Sstevel@tonic-gate */ 1636*0Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1637*0Sstevel@tonic-gate if (t_afsr & P_AFSR_TO) 1638*0Sstevel@tonic-gate (void) strcat(pr_reason, "BTO "); 1639*0Sstevel@tonic-gate 1640*0Sstevel@tonic-gate if (t_afsr & P_AFSR_BERR) 1641*0Sstevel@tonic-gate (void) strcat(pr_reason, "BERR "); 1642*0Sstevel@tonic-gate 1643*0Sstevel@tonic-gate spf_flt.flt_type = CPU_BTO_BERR_ERR; 1644*0Sstevel@tonic-gate if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1645*0Sstevel@tonic-gate cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1646*0Sstevel@tonic-gate FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1647*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1648*0Sstevel@tonic-gate aflt->flt_panic); 1649*0Sstevel@tonic-gate } 1650*0Sstevel@tonic-gate } 1651*0Sstevel@tonic-gate 1652*0Sstevel@tonic-gate /* 1653*0Sstevel@tonic-gate * Handle WP: WP happens when the ecache is victimized and a parity 1654*0Sstevel@tonic-gate * error was detected on a writeback. The data in question will be 1655*0Sstevel@tonic-gate * poisoned as a UE will be written back. The PA is not logged and 1656*0Sstevel@tonic-gate * it is possible that it doesn't belong to the trapped thread. The 1657*0Sstevel@tonic-gate * WP trap is not fatal, but it could be fatal to someone that 1658*0Sstevel@tonic-gate * subsequently accesses the toxic page. We set read_all_memscrub 1659*0Sstevel@tonic-gate * to force the memscrubber to read all of memory when it awakens. 1660*0Sstevel@tonic-gate * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1661*0Sstevel@tonic-gate * UE back to poison the data. 1662*0Sstevel@tonic-gate */ 1663*0Sstevel@tonic-gate if (t_afsr & P_AFSR_WP) { 1664*0Sstevel@tonic-gate (void) strcat(pr_reason, "WP "); 1665*0Sstevel@tonic-gate if (isus2i || isus2e) { 1666*0Sstevel@tonic-gate aflt->flt_panic = 1; 1667*0Sstevel@tonic-gate } else { 1668*0Sstevel@tonic-gate read_all_memscrub = 1; 1669*0Sstevel@tonic-gate } 1670*0Sstevel@tonic-gate spf_flt.flt_type = CPU_WP_ERR; 1671*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1672*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1673*0Sstevel@tonic-gate aflt->flt_panic); 1674*0Sstevel@tonic-gate } 1675*0Sstevel@tonic-gate 1676*0Sstevel@tonic-gate /* 1677*0Sstevel@tonic-gate * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1678*0Sstevel@tonic-gate * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1679*0Sstevel@tonic-gate * This is fatal. 1680*0Sstevel@tonic-gate */ 1681*0Sstevel@tonic-gate 1682*0Sstevel@tonic-gate if (t_afsr & P_AFSR_CP) { 1683*0Sstevel@tonic-gate if (isus2i || isus2e) { 1684*0Sstevel@tonic-gate (void) strcat(pr_reason, "CP "); 1685*0Sstevel@tonic-gate aflt->flt_panic = 1; 1686*0Sstevel@tonic-gate spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1687*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1688*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1689*0Sstevel@tonic-gate aflt->flt_panic); 1690*0Sstevel@tonic-gate } else { 1691*0Sstevel@tonic-gate /* 1692*0Sstevel@tonic-gate * Orphan CP: Happens due to signal integrity problem 1693*0Sstevel@tonic-gate * on a CPU, where a CP is reported, without reporting 1694*0Sstevel@tonic-gate * its associated UE. This is handled by locating the 1695*0Sstevel@tonic-gate * bad parity line and would kick off the memscrubber 1696*0Sstevel@tonic-gate * to find the UE if in memory or in another's cache. 1697*0Sstevel@tonic-gate */ 1698*0Sstevel@tonic-gate spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1699*0Sstevel@tonic-gate (void) strcat(pr_reason, "ORPHAN_CP "); 1700*0Sstevel@tonic-gate 1701*0Sstevel@tonic-gate /* 1702*0Sstevel@tonic-gate * Here we have no PA to work with. 1703*0Sstevel@tonic-gate * Scan each line in the ecache to look for 1704*0Sstevel@tonic-gate * the one with bad parity. 1705*0Sstevel@tonic-gate */ 1706*0Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 1707*0Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1708*0Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1709*0Sstevel@tonic-gate &oafsr); 1710*0Sstevel@tonic-gate acc_afsr |= oafsr; 1711*0Sstevel@tonic-gate 1712*0Sstevel@tonic-gate /* 1713*0Sstevel@tonic-gate * If we found a bad PA, update the state to indicate 1714*0Sstevel@tonic-gate * if it is memory or I/O space. 1715*0Sstevel@tonic-gate */ 1716*0Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 1717*0Sstevel@tonic-gate aflt->flt_in_memory = 1718*0Sstevel@tonic-gate (pf_is_memory(aflt->flt_addr >> 1719*0Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 1720*0Sstevel@tonic-gate } 1721*0Sstevel@tonic-gate read_all_memscrub = 1; 1722*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1723*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1724*0Sstevel@tonic-gate aflt->flt_panic); 1725*0Sstevel@tonic-gate 1726*0Sstevel@tonic-gate } 1727*0Sstevel@tonic-gate } 1728*0Sstevel@tonic-gate 1729*0Sstevel@tonic-gate /* 1730*0Sstevel@tonic-gate * If we queued an error other than WP or CP and we are going to return 1731*0Sstevel@tonic-gate * from the trap and the error was in user mode or inside of a 1732*0Sstevel@tonic-gate * copy routine, set AST flag so the queue will be drained before 1733*0Sstevel@tonic-gate * returning to user mode. 1734*0Sstevel@tonic-gate * 1735*0Sstevel@tonic-gate * For UE/LDP/EDP, the AST processing will SIGKILL the process 1736*0Sstevel@tonic-gate * and send an event to its process contract. 1737*0Sstevel@tonic-gate * 1738*0Sstevel@tonic-gate * For BERR/BTO, the AST processing will SIGBUS the process. There 1739*0Sstevel@tonic-gate * will have been no error queued in this case. 1740*0Sstevel@tonic-gate */ 1741*0Sstevel@tonic-gate if ((t_afsr & 1742*0Sstevel@tonic-gate (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1743*0Sstevel@tonic-gate (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1744*0Sstevel@tonic-gate int pcb_flag = 0; 1745*0Sstevel@tonic-gate 1746*0Sstevel@tonic-gate if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1747*0Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR; 1748*0Sstevel@tonic-gate 1749*0Sstevel@tonic-gate if (t_afsr & P_AFSR_BERR) 1750*0Sstevel@tonic-gate pcb_flag |= ASYNC_BERR; 1751*0Sstevel@tonic-gate 1752*0Sstevel@tonic-gate if (t_afsr & P_AFSR_TO) 1753*0Sstevel@tonic-gate pcb_flag |= ASYNC_BTO; 1754*0Sstevel@tonic-gate 1755*0Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1756*0Sstevel@tonic-gate aston(curthread); 1757*0Sstevel@tonic-gate action = ACTION_AST_FLAGS; 1758*0Sstevel@tonic-gate } 1759*0Sstevel@tonic-gate 1760*0Sstevel@tonic-gate /* 1761*0Sstevel@tonic-gate * In response to a deferred error, we must do one of three things: 1762*0Sstevel@tonic-gate * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1763*0Sstevel@tonic-gate * set in cases (1) and (2) - check that either action is set or 1764*0Sstevel@tonic-gate * (3) is true. 1765*0Sstevel@tonic-gate * 1766*0Sstevel@tonic-gate * On II, the WP writes poisoned data back to memory, which will 1767*0Sstevel@tonic-gate * cause a UE and a panic or reboot when read. In this case, we 1768*0Sstevel@tonic-gate * don't need to panic at this time. On IIi and IIe, 1769*0Sstevel@tonic-gate * aflt->flt_panic is already set above. 1770*0Sstevel@tonic-gate */ 1771*0Sstevel@tonic-gate ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1772*0Sstevel@tonic-gate (t_afsr & P_AFSR_WP)); 1773*0Sstevel@tonic-gate 1774*0Sstevel@tonic-gate /* 1775*0Sstevel@tonic-gate * Make a final sanity check to make sure we did not get any more async 1776*0Sstevel@tonic-gate * errors and accumulate the afsr. 1777*0Sstevel@tonic-gate */ 1778*0Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1779*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 1780*0Sstevel@tonic-gate (void) clear_errors(&spf_flt, NULL); 1781*0Sstevel@tonic-gate 1782*0Sstevel@tonic-gate /* 1783*0Sstevel@tonic-gate * Take care of a special case: If there is a UE in the ecache flush 1784*0Sstevel@tonic-gate * area, we'll see it in flush_ecache(). This will trigger the 1785*0Sstevel@tonic-gate * CPU_ADDITIONAL_ERRORS case below. 1786*0Sstevel@tonic-gate * 1787*0Sstevel@tonic-gate * This could occur if the original error was a UE in the flush area, 1788*0Sstevel@tonic-gate * or if the original error was an E$ error that was flushed out of 1789*0Sstevel@tonic-gate * the E$ in scan_ecache(). 1790*0Sstevel@tonic-gate * 1791*0Sstevel@tonic-gate * If it's at the same address that we're already logging, then it's 1792*0Sstevel@tonic-gate * probably one of these cases. Clear the bit so we don't trip over 1793*0Sstevel@tonic-gate * it on the additional errors case, which could cause an unnecessary 1794*0Sstevel@tonic-gate * panic. 1795*0Sstevel@tonic-gate */ 1796*0Sstevel@tonic-gate if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1797*0Sstevel@tonic-gate acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1798*0Sstevel@tonic-gate else 1799*0Sstevel@tonic-gate acc_afsr |= aflt->flt_stat; 1800*0Sstevel@tonic-gate 1801*0Sstevel@tonic-gate /* 1802*0Sstevel@tonic-gate * Check the acumulated afsr for the important bits. 1803*0Sstevel@tonic-gate * Make sure the spf_flt.flt_type value is set, and 1804*0Sstevel@tonic-gate * enque an error. 1805*0Sstevel@tonic-gate */ 1806*0Sstevel@tonic-gate if (acc_afsr & 1807*0Sstevel@tonic-gate (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1808*0Sstevel@tonic-gate if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1809*0Sstevel@tonic-gate P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1810*0Sstevel@tonic-gate P_AFSR_ISAP)) 1811*0Sstevel@tonic-gate aflt->flt_panic = 1; 1812*0Sstevel@tonic-gate 1813*0Sstevel@tonic-gate spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1814*0Sstevel@tonic-gate aflt->flt_stat = acc_afsr; 1815*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1816*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1817*0Sstevel@tonic-gate aflt->flt_panic); 1818*0Sstevel@tonic-gate } 1819*0Sstevel@tonic-gate 1820*0Sstevel@tonic-gate /* 1821*0Sstevel@tonic-gate * If aflt->flt_panic is set at this point, we need to panic as the 1822*0Sstevel@tonic-gate * result of a trap at TL > 0, or an error we determined to be fatal. 1823*0Sstevel@tonic-gate * We've already enqueued the error in one of the if-clauses above, 1824*0Sstevel@tonic-gate * and it will be dequeued and logged as part of the panic flow. 1825*0Sstevel@tonic-gate */ 1826*0Sstevel@tonic-gate if (aflt->flt_panic) { 1827*0Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1828*0Sstevel@tonic-gate "See previous message(s) for details", " %sError(s)", 1829*0Sstevel@tonic-gate pr_reason); 1830*0Sstevel@tonic-gate } 1831*0Sstevel@tonic-gate 1832*0Sstevel@tonic-gate /* 1833*0Sstevel@tonic-gate * Before returning, we must re-enable errors, and 1834*0Sstevel@tonic-gate * reset the caches to their boot-up state. 1835*0Sstevel@tonic-gate */ 1836*0Sstevel@tonic-gate set_lsu(get_lsu() | cache_boot_state); 1837*0Sstevel@tonic-gate set_error_enable(EER_ENABLE); 1838*0Sstevel@tonic-gate } 1839*0Sstevel@tonic-gate 1840*0Sstevel@tonic-gate /* 1841*0Sstevel@tonic-gate * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1842*0Sstevel@tonic-gate * This routine is shared by the CE and UE handling code. 1843*0Sstevel@tonic-gate */ 1844*0Sstevel@tonic-gate static void 1845*0Sstevel@tonic-gate check_misc_err(spitf_async_flt *spf_flt) 1846*0Sstevel@tonic-gate { 1847*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 1848*0Sstevel@tonic-gate char *fatal_str = NULL; 1849*0Sstevel@tonic-gate 1850*0Sstevel@tonic-gate /* 1851*0Sstevel@tonic-gate * The ISAP and ETP errors are supposed to cause a POR 1852*0Sstevel@tonic-gate * from the system, so in theory we never, ever see these messages. 1853*0Sstevel@tonic-gate * ISAP, ETP and IVUE are considered to be fatal. 1854*0Sstevel@tonic-gate */ 1855*0Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_ISAP) 1856*0Sstevel@tonic-gate fatal_str = " System Address Parity Error on"; 1857*0Sstevel@tonic-gate else if (aflt->flt_stat & P_AFSR_ETP) 1858*0Sstevel@tonic-gate fatal_str = " Ecache Tag Parity Error on"; 1859*0Sstevel@tonic-gate else if (aflt->flt_stat & P_AFSR_IVUE) 1860*0Sstevel@tonic-gate fatal_str = " Interrupt Vector Uncorrectable Error on"; 1861*0Sstevel@tonic-gate if (fatal_str != NULL) { 1862*0Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1863*0Sstevel@tonic-gate NULL, fatal_str); 1864*0Sstevel@tonic-gate } 1865*0Sstevel@tonic-gate } 1866*0Sstevel@tonic-gate 1867*0Sstevel@tonic-gate /* 1868*0Sstevel@tonic-gate * Routine to convert a syndrome into a syndrome code. 1869*0Sstevel@tonic-gate */ 1870*0Sstevel@tonic-gate static int 1871*0Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd) 1872*0Sstevel@tonic-gate { 1873*0Sstevel@tonic-gate if (synd_status != AFLT_STAT_VALID) 1874*0Sstevel@tonic-gate return (-1); 1875*0Sstevel@tonic-gate 1876*0Sstevel@tonic-gate /* 1877*0Sstevel@tonic-gate * Use the 8-bit syndrome to index the ecc_syndrome_tab 1878*0Sstevel@tonic-gate * to get the code indicating which bit(s) is(are) bad. 1879*0Sstevel@tonic-gate */ 1880*0Sstevel@tonic-gate if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1881*0Sstevel@tonic-gate return (-1); 1882*0Sstevel@tonic-gate else 1883*0Sstevel@tonic-gate return (ecc_syndrome_tab[synd]); 1884*0Sstevel@tonic-gate } 1885*0Sstevel@tonic-gate 1886*0Sstevel@tonic-gate /* 1887*0Sstevel@tonic-gate * Routine to return a string identifying the physical name 1888*0Sstevel@tonic-gate * associated with a memory/cache error. 1889*0Sstevel@tonic-gate */ 1890*0Sstevel@tonic-gate /* ARGSUSED */ 1891*0Sstevel@tonic-gate int 1892*0Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1893*0Sstevel@tonic-gate uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1894*0Sstevel@tonic-gate char *buf, int buflen, int *lenp) 1895*0Sstevel@tonic-gate { 1896*0Sstevel@tonic-gate short synd_code; 1897*0Sstevel@tonic-gate int ret; 1898*0Sstevel@tonic-gate 1899*0Sstevel@tonic-gate if (flt_in_memory) { 1900*0Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, synd); 1901*0Sstevel@tonic-gate if (synd_code == -1) { 1902*0Sstevel@tonic-gate ret = EINVAL; 1903*0Sstevel@tonic-gate } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1904*0Sstevel@tonic-gate buf, buflen, lenp) != 0) { 1905*0Sstevel@tonic-gate ret = EIO; 1906*0Sstevel@tonic-gate } else if (*lenp <= 1) { 1907*0Sstevel@tonic-gate ret = EINVAL; 1908*0Sstevel@tonic-gate } else { 1909*0Sstevel@tonic-gate ret = 0; 1910*0Sstevel@tonic-gate } 1911*0Sstevel@tonic-gate } else { 1912*0Sstevel@tonic-gate ret = ENOTSUP; 1913*0Sstevel@tonic-gate } 1914*0Sstevel@tonic-gate 1915*0Sstevel@tonic-gate if (ret != 0) { 1916*0Sstevel@tonic-gate buf[0] = '\0'; 1917*0Sstevel@tonic-gate *lenp = 0; 1918*0Sstevel@tonic-gate } 1919*0Sstevel@tonic-gate 1920*0Sstevel@tonic-gate return (ret); 1921*0Sstevel@tonic-gate } 1922*0Sstevel@tonic-gate 1923*0Sstevel@tonic-gate /* 1924*0Sstevel@tonic-gate * Wrapper for cpu_get_mem_unum() routine that takes an 1925*0Sstevel@tonic-gate * async_flt struct rather than explicit arguments. 1926*0Sstevel@tonic-gate */ 1927*0Sstevel@tonic-gate int 1928*0Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1929*0Sstevel@tonic-gate char *buf, int buflen, int *lenp) 1930*0Sstevel@tonic-gate { 1931*0Sstevel@tonic-gate return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1932*0Sstevel@tonic-gate aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1933*0Sstevel@tonic-gate aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1934*0Sstevel@tonic-gate } 1935*0Sstevel@tonic-gate 1936*0Sstevel@tonic-gate /* 1937*0Sstevel@tonic-gate * This routine is a more generic interface to cpu_get_mem_unum(), 1938*0Sstevel@tonic-gate * that may be used by other modules (e.g. mm). 1939*0Sstevel@tonic-gate */ 1940*0Sstevel@tonic-gate int 1941*0Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1942*0Sstevel@tonic-gate char *buf, int buflen, int *lenp) 1943*0Sstevel@tonic-gate { 1944*0Sstevel@tonic-gate int synd_status, flt_in_memory, ret; 1945*0Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 1946*0Sstevel@tonic-gate 1947*0Sstevel@tonic-gate /* 1948*0Sstevel@tonic-gate * Check for an invalid address. 1949*0Sstevel@tonic-gate */ 1950*0Sstevel@tonic-gate if (afar == (uint64_t)-1) 1951*0Sstevel@tonic-gate return (ENXIO); 1952*0Sstevel@tonic-gate 1953*0Sstevel@tonic-gate if (synd == (uint64_t)-1) 1954*0Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID; 1955*0Sstevel@tonic-gate else 1956*0Sstevel@tonic-gate synd_status = AFLT_STAT_VALID; 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1959*0Sstevel@tonic-gate 1960*0Sstevel@tonic-gate if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1961*0Sstevel@tonic-gate CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1962*0Sstevel@tonic-gate != 0) 1963*0Sstevel@tonic-gate return (ret); 1964*0Sstevel@tonic-gate 1965*0Sstevel@tonic-gate if (*lenp >= buflen) 1966*0Sstevel@tonic-gate return (ENAMETOOLONG); 1967*0Sstevel@tonic-gate 1968*0Sstevel@tonic-gate (void) strncpy(buf, unum, buflen); 1969*0Sstevel@tonic-gate 1970*0Sstevel@tonic-gate return (0); 1971*0Sstevel@tonic-gate } 1972*0Sstevel@tonic-gate 1973*0Sstevel@tonic-gate /* 1974*0Sstevel@tonic-gate * Routine to return memory information associated 1975*0Sstevel@tonic-gate * with a physical address and syndrome. 1976*0Sstevel@tonic-gate */ 1977*0Sstevel@tonic-gate /* ARGSUSED */ 1978*0Sstevel@tonic-gate int 1979*0Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar, 1980*0Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1981*0Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp) 1982*0Sstevel@tonic-gate { 1983*0Sstevel@tonic-gate return (ENOTSUP); 1984*0Sstevel@tonic-gate } 1985*0Sstevel@tonic-gate 1986*0Sstevel@tonic-gate /* 1987*0Sstevel@tonic-gate * Routine to return a string identifying the physical 1988*0Sstevel@tonic-gate * name associated with a cpuid. 1989*0Sstevel@tonic-gate */ 1990*0Sstevel@tonic-gate /* ARGSUSED */ 1991*0Sstevel@tonic-gate int 1992*0Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1993*0Sstevel@tonic-gate { 1994*0Sstevel@tonic-gate return (ENOTSUP); 1995*0Sstevel@tonic-gate } 1996*0Sstevel@tonic-gate 1997*0Sstevel@tonic-gate /* 1998*0Sstevel@tonic-gate * This routine returns the size of the kernel's FRU name buffer. 1999*0Sstevel@tonic-gate */ 2000*0Sstevel@tonic-gate size_t 2001*0Sstevel@tonic-gate cpu_get_name_bufsize() 2002*0Sstevel@tonic-gate { 2003*0Sstevel@tonic-gate return (UNUM_NAMLEN); 2004*0Sstevel@tonic-gate } 2005*0Sstevel@tonic-gate 2006*0Sstevel@tonic-gate /* 2007*0Sstevel@tonic-gate * Cpu specific log func for UEs. 2008*0Sstevel@tonic-gate */ 2009*0Sstevel@tonic-gate static void 2010*0Sstevel@tonic-gate log_ue_err(struct async_flt *aflt, char *unum) 2011*0Sstevel@tonic-gate { 2012*0Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2013*0Sstevel@tonic-gate int len = 0; 2014*0Sstevel@tonic-gate 2015*0Sstevel@tonic-gate #ifdef DEBUG 2016*0Sstevel@tonic-gate int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2017*0Sstevel@tonic-gate 2018*0Sstevel@tonic-gate /* 2019*0Sstevel@tonic-gate * Paranoid Check for priv mismatch 2020*0Sstevel@tonic-gate * Only applicable for UEs 2021*0Sstevel@tonic-gate */ 2022*0Sstevel@tonic-gate if (afsr_priv != aflt->flt_priv) { 2023*0Sstevel@tonic-gate /* 2024*0Sstevel@tonic-gate * The priv bits in %tstate and %afsr did not match; we expect 2025*0Sstevel@tonic-gate * this to be very rare, so flag it with a message. 2026*0Sstevel@tonic-gate */ 2027*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2028*0Sstevel@tonic-gate ": PRIV bit in TSTATE and AFSR mismatched; " 2029*0Sstevel@tonic-gate "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2030*0Sstevel@tonic-gate 2031*0Sstevel@tonic-gate /* update saved afsr to reflect the correct priv */ 2032*0Sstevel@tonic-gate aflt->flt_stat &= ~P_AFSR_PRIV; 2033*0Sstevel@tonic-gate if (aflt->flt_priv) 2034*0Sstevel@tonic-gate aflt->flt_stat |= P_AFSR_PRIV; 2035*0Sstevel@tonic-gate } 2036*0Sstevel@tonic-gate #endif /* DEBUG */ 2037*0Sstevel@tonic-gate 2038*0Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2039*0Sstevel@tonic-gate UNUM_NAMLEN, &len); 2040*0Sstevel@tonic-gate 2041*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2042*0Sstevel@tonic-gate " Uncorrectable Memory Error on"); 2043*0Sstevel@tonic-gate 2044*0Sstevel@tonic-gate if (SYND(aflt->flt_synd) == 0x3) { 2045*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2046*0Sstevel@tonic-gate " Syndrome 0x3 indicates that this may not be a " 2047*0Sstevel@tonic-gate "memory module problem"); 2048*0Sstevel@tonic-gate } 2049*0Sstevel@tonic-gate 2050*0Sstevel@tonic-gate if (aflt->flt_in_memory) 2051*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2052*0Sstevel@tonic-gate } 2053*0Sstevel@tonic-gate 2054*0Sstevel@tonic-gate 2055*0Sstevel@tonic-gate /* 2056*0Sstevel@tonic-gate * The cpu_async_log_err() function is called via the ue_drain() function to 2057*0Sstevel@tonic-gate * handle logging for CPU events that are dequeued. As such, it can be invoked 2058*0Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the 2059*0Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and log appropriate messages. 2060*0Sstevel@tonic-gate */ 2061*0Sstevel@tonic-gate static void 2062*0Sstevel@tonic-gate cpu_async_log_err(void *flt) 2063*0Sstevel@tonic-gate { 2064*0Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2065*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)flt; 2066*0Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 2067*0Sstevel@tonic-gate char *space; 2068*0Sstevel@tonic-gate char *ecache_scrub_logstr = NULL; 2069*0Sstevel@tonic-gate 2070*0Sstevel@tonic-gate switch (spf_flt->flt_type) { 2071*0Sstevel@tonic-gate case CPU_UE_ERR: 2072*0Sstevel@tonic-gate /* 2073*0Sstevel@tonic-gate * We want to skip logging only if ALL the following 2074*0Sstevel@tonic-gate * conditions are true: 2075*0Sstevel@tonic-gate * 2076*0Sstevel@tonic-gate * 1. We are not panicking 2077*0Sstevel@tonic-gate * 2. There is only one error 2078*0Sstevel@tonic-gate * 3. That error is a memory error 2079*0Sstevel@tonic-gate * 4. The error is caused by the memory scrubber (in 2080*0Sstevel@tonic-gate * which case the error will have occurred under 2081*0Sstevel@tonic-gate * on_trap protection) 2082*0Sstevel@tonic-gate * 5. The error is on a retired page 2083*0Sstevel@tonic-gate * 2084*0Sstevel@tonic-gate * Note 1: AFLT_PROT_EC is used places other than the memory 2085*0Sstevel@tonic-gate * scrubber. However, none of those errors should occur 2086*0Sstevel@tonic-gate * on a retired page. 2087*0Sstevel@tonic-gate * 2088*0Sstevel@tonic-gate * Note 2: In the CE case, these errors are discarded before 2089*0Sstevel@tonic-gate * the errorq. In the UE case, we must wait until now -- 2090*0Sstevel@tonic-gate * softcall() grabs a mutex, which we can't do at a high PIL. 2091*0Sstevel@tonic-gate */ 2092*0Sstevel@tonic-gate if (!panicstr && 2093*0Sstevel@tonic-gate (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2094*0Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) { 2095*0Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 2096*0Sstevel@tonic-gate (aflt->flt_addr >> MMU_PAGESHIFT)); 2097*0Sstevel@tonic-gate 2098*0Sstevel@tonic-gate if (pp != NULL && page_isretired(pp)) { 2099*0Sstevel@tonic-gate 2100*0Sstevel@tonic-gate /* Zero the address to clear the error */ 2101*0Sstevel@tonic-gate softcall(ecc_page_zero, (void *)aflt->flt_addr); 2102*0Sstevel@tonic-gate return; 2103*0Sstevel@tonic-gate } 2104*0Sstevel@tonic-gate } 2105*0Sstevel@tonic-gate 2106*0Sstevel@tonic-gate /* 2107*0Sstevel@tonic-gate * Log the UE and check for causes of this UE error that 2108*0Sstevel@tonic-gate * don't cause a trap (Copyback error). cpu_async_error() 2109*0Sstevel@tonic-gate * has already checked the i/o buses for us. 2110*0Sstevel@tonic-gate */ 2111*0Sstevel@tonic-gate log_ue_err(aflt, unum); 2112*0Sstevel@tonic-gate if (aflt->flt_in_memory) 2113*0Sstevel@tonic-gate cpu_check_allcpus(aflt); 2114*0Sstevel@tonic-gate break; 2115*0Sstevel@tonic-gate 2116*0Sstevel@tonic-gate case CPU_EDP_LDP_ERR: 2117*0Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_EDP) 2118*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2119*0Sstevel@tonic-gate NULL, " EDP event on"); 2120*0Sstevel@tonic-gate 2121*0Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_LDP) 2122*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2123*0Sstevel@tonic-gate NULL, " LDP event on"); 2124*0Sstevel@tonic-gate 2125*0Sstevel@tonic-gate /* Log ecache info if exist */ 2126*0Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) { 2127*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2128*0Sstevel@tonic-gate 2129*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2130*0Sstevel@tonic-gate NULL, " AFAR was derived from E$Tag"); 2131*0Sstevel@tonic-gate } else { 2132*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2133*0Sstevel@tonic-gate NULL, " No error found in ecache (No fault " 2134*0Sstevel@tonic-gate "PA available)"); 2135*0Sstevel@tonic-gate } 2136*0Sstevel@tonic-gate break; 2137*0Sstevel@tonic-gate 2138*0Sstevel@tonic-gate case CPU_WP_ERR: 2139*0Sstevel@tonic-gate /* 2140*0Sstevel@tonic-gate * If the memscrub thread hasn't yet read 2141*0Sstevel@tonic-gate * all of memory, as we requested in the 2142*0Sstevel@tonic-gate * trap handler, then give it a kick to 2143*0Sstevel@tonic-gate * make sure it does. 2144*0Sstevel@tonic-gate */ 2145*0Sstevel@tonic-gate if (!isus2i && !isus2e && read_all_memscrub) 2146*0Sstevel@tonic-gate memscrub_run(); 2147*0Sstevel@tonic-gate 2148*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2149*0Sstevel@tonic-gate " WP event on"); 2150*0Sstevel@tonic-gate return; 2151*0Sstevel@tonic-gate 2152*0Sstevel@tonic-gate case CPU_BTO_BERR_ERR: 2153*0Sstevel@tonic-gate /* 2154*0Sstevel@tonic-gate * A bus timeout or error occurred that was in user mode or not 2155*0Sstevel@tonic-gate * in a protected kernel code region. 2156*0Sstevel@tonic-gate */ 2157*0Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_BERR) { 2158*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2159*0Sstevel@tonic-gate spf_flt, BERRTO_LFLAGS, NULL, 2160*0Sstevel@tonic-gate " Bus Error on System Bus in %s mode from", 2161*0Sstevel@tonic-gate aflt->flt_priv ? "privileged" : "user"); 2162*0Sstevel@tonic-gate } 2163*0Sstevel@tonic-gate 2164*0Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_TO) { 2165*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2166*0Sstevel@tonic-gate spf_flt, BERRTO_LFLAGS, NULL, 2167*0Sstevel@tonic-gate " Timeout on System Bus in %s mode from", 2168*0Sstevel@tonic-gate aflt->flt_priv ? "privileged" : "user"); 2169*0Sstevel@tonic-gate } 2170*0Sstevel@tonic-gate 2171*0Sstevel@tonic-gate return; 2172*0Sstevel@tonic-gate 2173*0Sstevel@tonic-gate case CPU_PANIC_CP_ERR: 2174*0Sstevel@tonic-gate /* 2175*0Sstevel@tonic-gate * Process the Copyback (CP) error info (if any) obtained from 2176*0Sstevel@tonic-gate * polling all the cpus in the panic flow. This case is only 2177*0Sstevel@tonic-gate * entered if we are panicking. 2178*0Sstevel@tonic-gate */ 2179*0Sstevel@tonic-gate ASSERT(panicstr != NULL); 2180*0Sstevel@tonic-gate ASSERT(aflt->flt_id == panic_aflt.flt_id); 2181*0Sstevel@tonic-gate 2182*0Sstevel@tonic-gate /* See which space - this info may not exist */ 2183*0Sstevel@tonic-gate if (panic_aflt.flt_status & ECC_D_TRAP) 2184*0Sstevel@tonic-gate space = "Data "; 2185*0Sstevel@tonic-gate else if (panic_aflt.flt_status & ECC_I_TRAP) 2186*0Sstevel@tonic-gate space = "Instruction "; 2187*0Sstevel@tonic-gate else 2188*0Sstevel@tonic-gate space = ""; 2189*0Sstevel@tonic-gate 2190*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2191*0Sstevel@tonic-gate " AFAR was derived from UE report," 2192*0Sstevel@tonic-gate " CP event on CPU%d (caused %saccess error on %s%d)", 2193*0Sstevel@tonic-gate aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2194*0Sstevel@tonic-gate "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2195*0Sstevel@tonic-gate 2196*0Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) 2197*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2198*0Sstevel@tonic-gate else 2199*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2200*0Sstevel@tonic-gate NULL, " No cache dump available"); 2201*0Sstevel@tonic-gate 2202*0Sstevel@tonic-gate return; 2203*0Sstevel@tonic-gate 2204*0Sstevel@tonic-gate case CPU_TRAPPING_CP_ERR: 2205*0Sstevel@tonic-gate /* 2206*0Sstevel@tonic-gate * For sabre only. This is a copyback ecache parity error due 2207*0Sstevel@tonic-gate * to a PCI DMA read. We should be panicking if we get here. 2208*0Sstevel@tonic-gate */ 2209*0Sstevel@tonic-gate ASSERT(panicstr != NULL); 2210*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2211*0Sstevel@tonic-gate " AFAR was derived from UE report," 2212*0Sstevel@tonic-gate " CP event on CPU%d (caused Data access error " 2213*0Sstevel@tonic-gate "on PCIBus)", aflt->flt_inst); 2214*0Sstevel@tonic-gate return; 2215*0Sstevel@tonic-gate 2216*0Sstevel@tonic-gate /* 2217*0Sstevel@tonic-gate * We log the ecache lines of the following states, 2218*0Sstevel@tonic-gate * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2219*0Sstevel@tonic-gate * dirty_bad_busy if ecache_scrub_verbose is set and panic 2220*0Sstevel@tonic-gate * in addition to logging if ecache_scrub_panic is set. 2221*0Sstevel@tonic-gate */ 2222*0Sstevel@tonic-gate case CPU_BADLINE_CI_ERR: 2223*0Sstevel@tonic-gate ecache_scrub_logstr = "CBI"; 2224*0Sstevel@tonic-gate /* FALLTHRU */ 2225*0Sstevel@tonic-gate 2226*0Sstevel@tonic-gate case CPU_BADLINE_CB_ERR: 2227*0Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 2228*0Sstevel@tonic-gate ecache_scrub_logstr = "CBB"; 2229*0Sstevel@tonic-gate /* FALLTHRU */ 2230*0Sstevel@tonic-gate 2231*0Sstevel@tonic-gate case CPU_BADLINE_DI_ERR: 2232*0Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 2233*0Sstevel@tonic-gate ecache_scrub_logstr = "DBI"; 2234*0Sstevel@tonic-gate /* FALLTHRU */ 2235*0Sstevel@tonic-gate 2236*0Sstevel@tonic-gate case CPU_BADLINE_DB_ERR: 2237*0Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 2238*0Sstevel@tonic-gate ecache_scrub_logstr = "DBB"; 2239*0Sstevel@tonic-gate 2240*0Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, 2241*0Sstevel@tonic-gate (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2242*0Sstevel@tonic-gate " %s event on", ecache_scrub_logstr); 2243*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2244*0Sstevel@tonic-gate 2245*0Sstevel@tonic-gate return; 2246*0Sstevel@tonic-gate 2247*0Sstevel@tonic-gate case CPU_ORPHAN_CP_ERR: 2248*0Sstevel@tonic-gate /* 2249*0Sstevel@tonic-gate * Orphan CPs, where the CP bit is set, but when a CPU 2250*0Sstevel@tonic-gate * doesn't report a UE. 2251*0Sstevel@tonic-gate */ 2252*0Sstevel@tonic-gate if (read_all_memscrub) 2253*0Sstevel@tonic-gate memscrub_run(); 2254*0Sstevel@tonic-gate 2255*0Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2256*0Sstevel@tonic-gate NULL, " Orphan CP event on"); 2257*0Sstevel@tonic-gate 2258*0Sstevel@tonic-gate /* Log ecache info if exist */ 2259*0Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) 2260*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2261*0Sstevel@tonic-gate else 2262*0Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, 2263*0Sstevel@tonic-gate (CP_LFLAGS | CPU_FLTCPU), NULL, 2264*0Sstevel@tonic-gate " No error found in ecache (No fault " 2265*0Sstevel@tonic-gate "PA available"); 2266*0Sstevel@tonic-gate return; 2267*0Sstevel@tonic-gate 2268*0Sstevel@tonic-gate case CPU_ECACHE_ADDR_PAR_ERR: 2269*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2270*0Sstevel@tonic-gate " E$ Tag Address Parity error on"); 2271*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2272*0Sstevel@tonic-gate return; 2273*0Sstevel@tonic-gate 2274*0Sstevel@tonic-gate case CPU_ECACHE_STATE_ERR: 2275*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2276*0Sstevel@tonic-gate " E$ Tag State Parity error on"); 2277*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2278*0Sstevel@tonic-gate return; 2279*0Sstevel@tonic-gate 2280*0Sstevel@tonic-gate case CPU_ECACHE_TAG_ERR: 2281*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2282*0Sstevel@tonic-gate " E$ Tag scrub event on"); 2283*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2284*0Sstevel@tonic-gate return; 2285*0Sstevel@tonic-gate 2286*0Sstevel@tonic-gate case CPU_ECACHE_ETP_ETS_ERR: 2287*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2288*0Sstevel@tonic-gate " AFSR.ETP is set and AFSR.ETS is zero on"); 2289*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2290*0Sstevel@tonic-gate return; 2291*0Sstevel@tonic-gate 2292*0Sstevel@tonic-gate 2293*0Sstevel@tonic-gate case CPU_ADDITIONAL_ERR: 2294*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2295*0Sstevel@tonic-gate " Additional errors detected during error processing on"); 2296*0Sstevel@tonic-gate return; 2297*0Sstevel@tonic-gate 2298*0Sstevel@tonic-gate default: 2299*0Sstevel@tonic-gate cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2300*0Sstevel@tonic-gate "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2301*0Sstevel@tonic-gate return; 2302*0Sstevel@tonic-gate } 2303*0Sstevel@tonic-gate 2304*0Sstevel@tonic-gate /* ... fall through from the UE, EDP, or LDP cases */ 2305*0Sstevel@tonic-gate 2306*0Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2307*0Sstevel@tonic-gate if (!panicstr) { 2308*0Sstevel@tonic-gate /* 2309*0Sstevel@tonic-gate * Retire the bad page that caused the error 2310*0Sstevel@tonic-gate */ 2311*0Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 2312*0Sstevel@tonic-gate (aflt->flt_addr >> MMU_PAGESHIFT)); 2313*0Sstevel@tonic-gate 2314*0Sstevel@tonic-gate if (pp != NULL) { 2315*0Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 2316*0Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_TOXIC); 2317*0Sstevel@tonic-gate } else { 2318*0Sstevel@tonic-gate uint64_t pa = 2319*0Sstevel@tonic-gate P2ALIGN(aflt->flt_addr, MMU_PAGESIZE); 2320*0Sstevel@tonic-gate 2321*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, 2322*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 2323*0Sstevel@tonic-gate ": cannot schedule clearing of error on " 2324*0Sstevel@tonic-gate "page 0x%08x.%08x; page not in VM system", 2325*0Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa); 2326*0Sstevel@tonic-gate } 2327*0Sstevel@tonic-gate } else { 2328*0Sstevel@tonic-gate /* 2329*0Sstevel@tonic-gate * Clear UEs on panic so that we don't 2330*0Sstevel@tonic-gate * get haunted by them during panic or 2331*0Sstevel@tonic-gate * after reboot 2332*0Sstevel@tonic-gate */ 2333*0Sstevel@tonic-gate clearphys(P2ALIGN(aflt->flt_addr, 64), 2334*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size, 2335*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 2336*0Sstevel@tonic-gate 2337*0Sstevel@tonic-gate (void) clear_errors(NULL, NULL); 2338*0Sstevel@tonic-gate } 2339*0Sstevel@tonic-gate } 2340*0Sstevel@tonic-gate 2341*0Sstevel@tonic-gate /* 2342*0Sstevel@tonic-gate * Log final recover message 2343*0Sstevel@tonic-gate */ 2344*0Sstevel@tonic-gate if (!panicstr) { 2345*0Sstevel@tonic-gate if (!aflt->flt_priv) { 2346*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2347*0Sstevel@tonic-gate NULL, " Above Error is in User Mode" 2348*0Sstevel@tonic-gate "\n and is fatal: " 2349*0Sstevel@tonic-gate "will SIGKILL process and notify contract"); 2350*0Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2351*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2352*0Sstevel@tonic-gate NULL, " Above Error detected while dumping core;" 2353*0Sstevel@tonic-gate "\n core file will be truncated"); 2354*0Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2355*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2356*0Sstevel@tonic-gate NULL, " Above Error is due to Kernel access" 2357*0Sstevel@tonic-gate "\n to User space and is fatal: " 2358*0Sstevel@tonic-gate "will SIGKILL process and notify contract"); 2359*0Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_EC) { 2360*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2361*0Sstevel@tonic-gate " Above Error detected by protected Kernel code" 2362*0Sstevel@tonic-gate "\n that will try to clear error from system"); 2363*0Sstevel@tonic-gate } 2364*0Sstevel@tonic-gate } 2365*0Sstevel@tonic-gate } 2366*0Sstevel@tonic-gate 2367*0Sstevel@tonic-gate 2368*0Sstevel@tonic-gate /* 2369*0Sstevel@tonic-gate * Check all cpus for non-trapping UE-causing errors 2370*0Sstevel@tonic-gate * In Ultra I/II, we look for copyback errors (CPs) 2371*0Sstevel@tonic-gate */ 2372*0Sstevel@tonic-gate void 2373*0Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt) 2374*0Sstevel@tonic-gate { 2375*0Sstevel@tonic-gate spitf_async_flt cp; 2376*0Sstevel@tonic-gate spitf_async_flt *spf_cpflt = &cp; 2377*0Sstevel@tonic-gate struct async_flt *cpflt = (struct async_flt *)&cp; 2378*0Sstevel@tonic-gate int pix; 2379*0Sstevel@tonic-gate 2380*0Sstevel@tonic-gate cpflt->flt_id = aflt->flt_id; 2381*0Sstevel@tonic-gate cpflt->flt_addr = aflt->flt_addr; 2382*0Sstevel@tonic-gate 2383*0Sstevel@tonic-gate for (pix = 0; pix < NCPU; pix++) { 2384*0Sstevel@tonic-gate if (CPU_XCALL_READY(pix)) { 2385*0Sstevel@tonic-gate xc_one(pix, (xcfunc_t *)get_cpu_status, 2386*0Sstevel@tonic-gate (uint64_t)cpflt, 0); 2387*0Sstevel@tonic-gate 2388*0Sstevel@tonic-gate if (cpflt->flt_stat & P_AFSR_CP) { 2389*0Sstevel@tonic-gate char *space; 2390*0Sstevel@tonic-gate 2391*0Sstevel@tonic-gate /* See which space - this info may not exist */ 2392*0Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 2393*0Sstevel@tonic-gate space = "Data "; 2394*0Sstevel@tonic-gate else if (aflt->flt_status & ECC_I_TRAP) 2395*0Sstevel@tonic-gate space = "Instruction "; 2396*0Sstevel@tonic-gate else 2397*0Sstevel@tonic-gate space = ""; 2398*0Sstevel@tonic-gate 2399*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2400*0Sstevel@tonic-gate NULL, " AFAR was derived from UE report," 2401*0Sstevel@tonic-gate " CP event on CPU%d (caused %saccess " 2402*0Sstevel@tonic-gate "error on %s%d)", pix, space, 2403*0Sstevel@tonic-gate (aflt->flt_status & ECC_IOBUS) ? 2404*0Sstevel@tonic-gate "IOBUS" : "CPU", aflt->flt_bus_id); 2405*0Sstevel@tonic-gate 2406*0Sstevel@tonic-gate if (spf_cpflt->flt_ec_lcnt > 0) 2407*0Sstevel@tonic-gate cpu_log_ecmem_info(spf_cpflt); 2408*0Sstevel@tonic-gate else 2409*0Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2410*0Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 2411*0Sstevel@tonic-gate " No cache dump available"); 2412*0Sstevel@tonic-gate } 2413*0Sstevel@tonic-gate } 2414*0Sstevel@tonic-gate } 2415*0Sstevel@tonic-gate } 2416*0Sstevel@tonic-gate 2417*0Sstevel@tonic-gate #ifdef DEBUG 2418*0Sstevel@tonic-gate int test_mp_cp = 0; 2419*0Sstevel@tonic-gate #endif 2420*0Sstevel@tonic-gate 2421*0Sstevel@tonic-gate /* 2422*0Sstevel@tonic-gate * Cross-call callback routine to tell a CPU to read its own %afsr to check 2423*0Sstevel@tonic-gate * for copyback errors and capture relevant information. 2424*0Sstevel@tonic-gate */ 2425*0Sstevel@tonic-gate static uint_t 2426*0Sstevel@tonic-gate get_cpu_status(uint64_t arg) 2427*0Sstevel@tonic-gate { 2428*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)arg; 2429*0Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2430*0Sstevel@tonic-gate uint64_t afsr; 2431*0Sstevel@tonic-gate uint32_t ec_idx; 2432*0Sstevel@tonic-gate uint64_t sdbh, sdbl; 2433*0Sstevel@tonic-gate int i; 2434*0Sstevel@tonic-gate uint32_t ec_set_size; 2435*0Sstevel@tonic-gate uchar_t valid; 2436*0Sstevel@tonic-gate ec_data_t ec_data[8]; 2437*0Sstevel@tonic-gate uint64_t ec_tag, flt_addr_tag, oafsr; 2438*0Sstevel@tonic-gate uint64_t *acc_afsr = NULL; 2439*0Sstevel@tonic-gate 2440*0Sstevel@tonic-gate get_asyncflt(&afsr); 2441*0Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) { 2442*0Sstevel@tonic-gate acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2443*0Sstevel@tonic-gate afsr |= *acc_afsr; 2444*0Sstevel@tonic-gate *acc_afsr = 0; 2445*0Sstevel@tonic-gate } 2446*0Sstevel@tonic-gate 2447*0Sstevel@tonic-gate #ifdef DEBUG 2448*0Sstevel@tonic-gate if (test_mp_cp) 2449*0Sstevel@tonic-gate afsr |= P_AFSR_CP; 2450*0Sstevel@tonic-gate #endif 2451*0Sstevel@tonic-gate aflt->flt_stat = afsr; 2452*0Sstevel@tonic-gate 2453*0Sstevel@tonic-gate if (afsr & P_AFSR_CP) { 2454*0Sstevel@tonic-gate /* 2455*0Sstevel@tonic-gate * Capture the UDBs 2456*0Sstevel@tonic-gate */ 2457*0Sstevel@tonic-gate get_udb_errors(&sdbh, &sdbl); 2458*0Sstevel@tonic-gate spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2459*0Sstevel@tonic-gate spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2460*0Sstevel@tonic-gate 2461*0Sstevel@tonic-gate /* 2462*0Sstevel@tonic-gate * Clear CP bit before capturing ecache data 2463*0Sstevel@tonic-gate * and AFSR info. 2464*0Sstevel@tonic-gate */ 2465*0Sstevel@tonic-gate set_asyncflt(P_AFSR_CP); 2466*0Sstevel@tonic-gate 2467*0Sstevel@tonic-gate /* 2468*0Sstevel@tonic-gate * See if we can capture the ecache line for the 2469*0Sstevel@tonic-gate * fault PA. 2470*0Sstevel@tonic-gate * 2471*0Sstevel@tonic-gate * Return a valid matching ecache line, if any. 2472*0Sstevel@tonic-gate * Otherwise, return the first matching ecache 2473*0Sstevel@tonic-gate * line marked invalid. 2474*0Sstevel@tonic-gate */ 2475*0Sstevel@tonic-gate flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2476*0Sstevel@tonic-gate ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2477*0Sstevel@tonic-gate ecache_associativity; 2478*0Sstevel@tonic-gate spf_flt->flt_ec_lcnt = 0; 2479*0Sstevel@tonic-gate 2480*0Sstevel@tonic-gate for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2481*0Sstevel@tonic-gate i < ecache_associativity; i++, ec_idx += ec_set_size) { 2482*0Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(ec_idx, 64), 2483*0Sstevel@tonic-gate (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2484*0Sstevel@tonic-gate acc_afsr); 2485*0Sstevel@tonic-gate 2486*0Sstevel@tonic-gate if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2487*0Sstevel@tonic-gate continue; 2488*0Sstevel@tonic-gate 2489*0Sstevel@tonic-gate valid = cpu_ec_state_valid & 2490*0Sstevel@tonic-gate (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2491*0Sstevel@tonic-gate cpu_ec_state_shift); 2492*0Sstevel@tonic-gate 2493*0Sstevel@tonic-gate if (valid || spf_flt->flt_ec_lcnt == 0) { 2494*0Sstevel@tonic-gate spf_flt->flt_ec_tag = ec_tag; 2495*0Sstevel@tonic-gate bcopy(&ec_data, &spf_flt->flt_ec_data, 2496*0Sstevel@tonic-gate sizeof (ec_data)); 2497*0Sstevel@tonic-gate spf_flt->flt_ec_lcnt = 1; 2498*0Sstevel@tonic-gate 2499*0Sstevel@tonic-gate if (valid) 2500*0Sstevel@tonic-gate break; 2501*0Sstevel@tonic-gate } 2502*0Sstevel@tonic-gate } 2503*0Sstevel@tonic-gate } 2504*0Sstevel@tonic-gate return (0); 2505*0Sstevel@tonic-gate } 2506*0Sstevel@tonic-gate 2507*0Sstevel@tonic-gate /* 2508*0Sstevel@tonic-gate * CPU-module callback for the non-panicking CPUs. This routine is invoked 2509*0Sstevel@tonic-gate * from panic_idle() as part of the other CPUs stopping themselves when a 2510*0Sstevel@tonic-gate * panic occurs. We need to be VERY careful what we do here, since panicstr 2511*0Sstevel@tonic-gate * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2512*0Sstevel@tonic-gate * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2513*0Sstevel@tonic-gate * CP error information. 2514*0Sstevel@tonic-gate */ 2515*0Sstevel@tonic-gate void 2516*0Sstevel@tonic-gate cpu_async_panic_callb(void) 2517*0Sstevel@tonic-gate { 2518*0Sstevel@tonic-gate spitf_async_flt cp; 2519*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)&cp; 2520*0Sstevel@tonic-gate uint64_t *scrub_afsr; 2521*0Sstevel@tonic-gate 2522*0Sstevel@tonic-gate if (panic_aflt.flt_id != 0) { 2523*0Sstevel@tonic-gate aflt->flt_addr = panic_aflt.flt_addr; 2524*0Sstevel@tonic-gate (void) get_cpu_status((uint64_t)aflt); 2525*0Sstevel@tonic-gate 2526*0Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) { 2527*0Sstevel@tonic-gate scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2528*0Sstevel@tonic-gate if (*scrub_afsr & P_AFSR_CP) { 2529*0Sstevel@tonic-gate aflt->flt_stat |= *scrub_afsr; 2530*0Sstevel@tonic-gate *scrub_afsr = 0; 2531*0Sstevel@tonic-gate } 2532*0Sstevel@tonic-gate } 2533*0Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_CP) { 2534*0Sstevel@tonic-gate aflt->flt_id = panic_aflt.flt_id; 2535*0Sstevel@tonic-gate aflt->flt_panic = 1; 2536*0Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 2537*0Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 2538*0Sstevel@tonic-gate cp.flt_type = CPU_PANIC_CP_ERR; 2539*0Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2540*0Sstevel@tonic-gate (void *)&cp, sizeof (cp), ue_queue, 2541*0Sstevel@tonic-gate aflt->flt_panic); 2542*0Sstevel@tonic-gate } 2543*0Sstevel@tonic-gate } 2544*0Sstevel@tonic-gate } 2545*0Sstevel@tonic-gate 2546*0Sstevel@tonic-gate /* 2547*0Sstevel@tonic-gate * Turn off all cpu error detection, normally only used for panics. 2548*0Sstevel@tonic-gate */ 2549*0Sstevel@tonic-gate void 2550*0Sstevel@tonic-gate cpu_disable_errors(void) 2551*0Sstevel@tonic-gate { 2552*0Sstevel@tonic-gate xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2553*0Sstevel@tonic-gate } 2554*0Sstevel@tonic-gate 2555*0Sstevel@tonic-gate /* 2556*0Sstevel@tonic-gate * Enable errors. 2557*0Sstevel@tonic-gate */ 2558*0Sstevel@tonic-gate void 2559*0Sstevel@tonic-gate cpu_enable_errors(void) 2560*0Sstevel@tonic-gate { 2561*0Sstevel@tonic-gate xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2562*0Sstevel@tonic-gate } 2563*0Sstevel@tonic-gate 2564*0Sstevel@tonic-gate static void 2565*0Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2566*0Sstevel@tonic-gate { 2567*0Sstevel@tonic-gate uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2568*0Sstevel@tonic-gate int i, loop = 1; 2569*0Sstevel@tonic-gate ushort_t ecc_0; 2570*0Sstevel@tonic-gate uint64_t paddr; 2571*0Sstevel@tonic-gate uint64_t data; 2572*0Sstevel@tonic-gate 2573*0Sstevel@tonic-gate if (verbose) 2574*0Sstevel@tonic-gate loop = 8; 2575*0Sstevel@tonic-gate for (i = 0; i < loop; i++) { 2576*0Sstevel@tonic-gate paddr = aligned_addr + (i * 8); 2577*0Sstevel@tonic-gate data = lddphys(paddr); 2578*0Sstevel@tonic-gate if (verbose) { 2579*0Sstevel@tonic-gate if (ce_err) { 2580*0Sstevel@tonic-gate ecc_0 = ecc_gen((uint32_t)(data>>32), 2581*0Sstevel@tonic-gate (uint32_t)data); 2582*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2583*0Sstevel@tonic-gate NULL, " Paddr 0x%" PRIx64 ", " 2584*0Sstevel@tonic-gate "Data 0x%08x.%08x, ECC 0x%x", paddr, 2585*0Sstevel@tonic-gate (uint32_t)(data>>32), (uint32_t)data, ecc_0); 2586*0Sstevel@tonic-gate } else { 2587*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2588*0Sstevel@tonic-gate NULL, " Paddr 0x%" PRIx64 ", " 2589*0Sstevel@tonic-gate "Data 0x%08x.%08x", paddr, 2590*0Sstevel@tonic-gate (uint32_t)(data>>32), (uint32_t)data); 2591*0Sstevel@tonic-gate } 2592*0Sstevel@tonic-gate } 2593*0Sstevel@tonic-gate } 2594*0Sstevel@tonic-gate } 2595*0Sstevel@tonic-gate 2596*0Sstevel@tonic-gate static struct { /* sec-ded-s4ed ecc code */ 2597*0Sstevel@tonic-gate uint_t hi, lo; 2598*0Sstevel@tonic-gate } ecc_code[8] = { 2599*0Sstevel@tonic-gate { 0xee55de23U, 0x16161161U }, 2600*0Sstevel@tonic-gate { 0x55eede93U, 0x61612212U }, 2601*0Sstevel@tonic-gate { 0xbb557b8cU, 0x49494494U }, 2602*0Sstevel@tonic-gate { 0x55bb7b6cU, 0x94948848U }, 2603*0Sstevel@tonic-gate { 0x16161161U, 0xee55de23U }, 2604*0Sstevel@tonic-gate { 0x61612212U, 0x55eede93U }, 2605*0Sstevel@tonic-gate { 0x49494494U, 0xbb557b8cU }, 2606*0Sstevel@tonic-gate { 0x94948848U, 0x55bb7b6cU } 2607*0Sstevel@tonic-gate }; 2608*0Sstevel@tonic-gate 2609*0Sstevel@tonic-gate static ushort_t 2610*0Sstevel@tonic-gate ecc_gen(uint_t high_bytes, uint_t low_bytes) 2611*0Sstevel@tonic-gate { 2612*0Sstevel@tonic-gate int i, j; 2613*0Sstevel@tonic-gate uchar_t checker, bit_mask; 2614*0Sstevel@tonic-gate struct { 2615*0Sstevel@tonic-gate uint_t hi, lo; 2616*0Sstevel@tonic-gate } hex_data, masked_data[8]; 2617*0Sstevel@tonic-gate 2618*0Sstevel@tonic-gate hex_data.hi = high_bytes; 2619*0Sstevel@tonic-gate hex_data.lo = low_bytes; 2620*0Sstevel@tonic-gate 2621*0Sstevel@tonic-gate /* mask out bits according to sec-ded-s4ed ecc code */ 2622*0Sstevel@tonic-gate for (i = 0; i < 8; i++) { 2623*0Sstevel@tonic-gate masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2624*0Sstevel@tonic-gate masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2625*0Sstevel@tonic-gate } 2626*0Sstevel@tonic-gate 2627*0Sstevel@tonic-gate /* 2628*0Sstevel@tonic-gate * xor all bits in masked_data[i] to get bit_i of checker, 2629*0Sstevel@tonic-gate * where i = 0 to 7 2630*0Sstevel@tonic-gate */ 2631*0Sstevel@tonic-gate checker = 0; 2632*0Sstevel@tonic-gate for (i = 0; i < 8; i++) { 2633*0Sstevel@tonic-gate bit_mask = 1 << i; 2634*0Sstevel@tonic-gate for (j = 0; j < 32; j++) { 2635*0Sstevel@tonic-gate if (masked_data[i].lo & 1) checker ^= bit_mask; 2636*0Sstevel@tonic-gate if (masked_data[i].hi & 1) checker ^= bit_mask; 2637*0Sstevel@tonic-gate masked_data[i].hi >>= 1; 2638*0Sstevel@tonic-gate masked_data[i].lo >>= 1; 2639*0Sstevel@tonic-gate } 2640*0Sstevel@tonic-gate } 2641*0Sstevel@tonic-gate return (checker); 2642*0Sstevel@tonic-gate } 2643*0Sstevel@tonic-gate 2644*0Sstevel@tonic-gate /* 2645*0Sstevel@tonic-gate * Flush the entire ecache using displacement flush by reading through a 2646*0Sstevel@tonic-gate * physical address range as large as the ecache. 2647*0Sstevel@tonic-gate */ 2648*0Sstevel@tonic-gate void 2649*0Sstevel@tonic-gate cpu_flush_ecache(void) 2650*0Sstevel@tonic-gate { 2651*0Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2652*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 2653*0Sstevel@tonic-gate } 2654*0Sstevel@tonic-gate 2655*0Sstevel@tonic-gate /* 2656*0Sstevel@tonic-gate * read and display the data in the cache line where the 2657*0Sstevel@tonic-gate * original ce error occurred. 2658*0Sstevel@tonic-gate * This routine is mainly used for debugging new hardware. 2659*0Sstevel@tonic-gate */ 2660*0Sstevel@tonic-gate void 2661*0Sstevel@tonic-gate read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2662*0Sstevel@tonic-gate { 2663*0Sstevel@tonic-gate kpreempt_disable(); 2664*0Sstevel@tonic-gate /* disable ECC error traps */ 2665*0Sstevel@tonic-gate set_error_enable(EER_ECC_DISABLE); 2666*0Sstevel@tonic-gate 2667*0Sstevel@tonic-gate /* 2668*0Sstevel@tonic-gate * flush the ecache 2669*0Sstevel@tonic-gate * read the data 2670*0Sstevel@tonic-gate * check to see if an ECC error occured 2671*0Sstevel@tonic-gate */ 2672*0Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2673*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 2674*0Sstevel@tonic-gate set_lsu(get_lsu() | cache_boot_state); 2675*0Sstevel@tonic-gate cpu_read_paddr(ecc, verbose, ce_err); 2676*0Sstevel@tonic-gate (void) check_ecc(ecc); 2677*0Sstevel@tonic-gate 2678*0Sstevel@tonic-gate /* enable ECC error traps */ 2679*0Sstevel@tonic-gate set_error_enable(EER_ENABLE); 2680*0Sstevel@tonic-gate kpreempt_enable(); 2681*0Sstevel@tonic-gate } 2682*0Sstevel@tonic-gate 2683*0Sstevel@tonic-gate /* 2684*0Sstevel@tonic-gate * Check the AFSR bits for UE/CE persistence. 2685*0Sstevel@tonic-gate * If UE or CE errors are detected, the routine will 2686*0Sstevel@tonic-gate * clears all the AFSR sticky bits (except CP for 2687*0Sstevel@tonic-gate * spitfire/blackbird) and the UDBs. 2688*0Sstevel@tonic-gate * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2689*0Sstevel@tonic-gate */ 2690*0Sstevel@tonic-gate static int 2691*0Sstevel@tonic-gate check_ecc(struct async_flt *ecc) 2692*0Sstevel@tonic-gate { 2693*0Sstevel@tonic-gate uint64_t t_afsr; 2694*0Sstevel@tonic-gate uint64_t t_afar; 2695*0Sstevel@tonic-gate uint64_t udbh; 2696*0Sstevel@tonic-gate uint64_t udbl; 2697*0Sstevel@tonic-gate ushort_t udb; 2698*0Sstevel@tonic-gate int persistent = 0; 2699*0Sstevel@tonic-gate 2700*0Sstevel@tonic-gate /* 2701*0Sstevel@tonic-gate * Capture the AFSR, AFAR and UDBs info 2702*0Sstevel@tonic-gate */ 2703*0Sstevel@tonic-gate get_asyncflt(&t_afsr); 2704*0Sstevel@tonic-gate get_asyncaddr(&t_afar); 2705*0Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; 2706*0Sstevel@tonic-gate get_udb_errors(&udbh, &udbl); 2707*0Sstevel@tonic-gate 2708*0Sstevel@tonic-gate if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2709*0Sstevel@tonic-gate /* 2710*0Sstevel@tonic-gate * Clear the errors 2711*0Sstevel@tonic-gate */ 2712*0Sstevel@tonic-gate clr_datapath(); 2713*0Sstevel@tonic-gate 2714*0Sstevel@tonic-gate if (isus2i || isus2e) 2715*0Sstevel@tonic-gate set_asyncflt(t_afsr); 2716*0Sstevel@tonic-gate else 2717*0Sstevel@tonic-gate set_asyncflt(t_afsr & ~P_AFSR_CP); 2718*0Sstevel@tonic-gate 2719*0Sstevel@tonic-gate /* 2720*0Sstevel@tonic-gate * determine whether to check UDBH or UDBL for persistence 2721*0Sstevel@tonic-gate */ 2722*0Sstevel@tonic-gate if (ecc->flt_synd & UDBL_REG) { 2723*0Sstevel@tonic-gate udb = (ushort_t)udbl; 2724*0Sstevel@tonic-gate t_afar |= 0x8; 2725*0Sstevel@tonic-gate } else { 2726*0Sstevel@tonic-gate udb = (ushort_t)udbh; 2727*0Sstevel@tonic-gate } 2728*0Sstevel@tonic-gate 2729*0Sstevel@tonic-gate if (ce_debug || ue_debug) { 2730*0Sstevel@tonic-gate spitf_async_flt spf_flt; /* for logging */ 2731*0Sstevel@tonic-gate struct async_flt *aflt = 2732*0Sstevel@tonic-gate (struct async_flt *)&spf_flt; 2733*0Sstevel@tonic-gate 2734*0Sstevel@tonic-gate /* Package the info nicely in the spf_flt struct */ 2735*0Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 2736*0Sstevel@tonic-gate aflt->flt_stat = t_afsr; 2737*0Sstevel@tonic-gate aflt->flt_addr = t_afar; 2738*0Sstevel@tonic-gate spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2739*0Sstevel@tonic-gate spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2740*0Sstevel@tonic-gate 2741*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2742*0Sstevel@tonic-gate CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2743*0Sstevel@tonic-gate " check_ecc: Dumping captured error states ..."); 2744*0Sstevel@tonic-gate } 2745*0Sstevel@tonic-gate 2746*0Sstevel@tonic-gate /* 2747*0Sstevel@tonic-gate * if the fault addresses don't match, not persistent 2748*0Sstevel@tonic-gate */ 2749*0Sstevel@tonic-gate if (t_afar != ecc->flt_addr) { 2750*0Sstevel@tonic-gate return (persistent); 2751*0Sstevel@tonic-gate } 2752*0Sstevel@tonic-gate 2753*0Sstevel@tonic-gate /* 2754*0Sstevel@tonic-gate * check for UE persistence 2755*0Sstevel@tonic-gate * since all DIMMs in the bank are identified for a UE, 2756*0Sstevel@tonic-gate * there's no reason to check the syndrome 2757*0Sstevel@tonic-gate */ 2758*0Sstevel@tonic-gate if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2759*0Sstevel@tonic-gate persistent = 1; 2760*0Sstevel@tonic-gate } 2761*0Sstevel@tonic-gate 2762*0Sstevel@tonic-gate /* 2763*0Sstevel@tonic-gate * check for CE persistence 2764*0Sstevel@tonic-gate */ 2765*0Sstevel@tonic-gate if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2766*0Sstevel@tonic-gate if ((udb & P_DER_E_SYND) == 2767*0Sstevel@tonic-gate (ecc->flt_synd & P_DER_E_SYND)) { 2768*0Sstevel@tonic-gate persistent = 1; 2769*0Sstevel@tonic-gate } 2770*0Sstevel@tonic-gate } 2771*0Sstevel@tonic-gate } 2772*0Sstevel@tonic-gate return (persistent); 2773*0Sstevel@tonic-gate } 2774*0Sstevel@tonic-gate 2775*0Sstevel@tonic-gate #ifdef HUMMINGBIRD 2776*0Sstevel@tonic-gate #define HB_FULL_DIV 1 2777*0Sstevel@tonic-gate #define HB_HALF_DIV 2 2778*0Sstevel@tonic-gate #define HB_LOWEST_DIV 8 2779*0Sstevel@tonic-gate #define HB_ECLK_INVALID 0xdeadbad 2780*0Sstevel@tonic-gate static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2781*0Sstevel@tonic-gate HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2782*0Sstevel@tonic-gate HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2783*0Sstevel@tonic-gate HB_ECLK_8 }; 2784*0Sstevel@tonic-gate 2785*0Sstevel@tonic-gate #define HB_SLOW_DOWN 0 2786*0Sstevel@tonic-gate #define HB_SPEED_UP 1 2787*0Sstevel@tonic-gate 2788*0Sstevel@tonic-gate #define SET_ESTAR_MODE(mode) \ 2789*0Sstevel@tonic-gate stdphysio(HB_ESTAR_MODE, (mode)); \ 2790*0Sstevel@tonic-gate /* \ 2791*0Sstevel@tonic-gate * PLL logic requires minimum of 16 clock \ 2792*0Sstevel@tonic-gate * cycles to lock to the new clock speed. \ 2793*0Sstevel@tonic-gate * Wait 1 usec to satisfy this requirement. \ 2794*0Sstevel@tonic-gate */ \ 2795*0Sstevel@tonic-gate drv_usecwait(1); 2796*0Sstevel@tonic-gate 2797*0Sstevel@tonic-gate #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2798*0Sstevel@tonic-gate { \ 2799*0Sstevel@tonic-gate volatile uint64_t data; \ 2800*0Sstevel@tonic-gate uint64_t count, new_count; \ 2801*0Sstevel@tonic-gate clock_t delay; \ 2802*0Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2803*0Sstevel@tonic-gate count = (data & HB_REFRESH_COUNT_MASK) >> \ 2804*0Sstevel@tonic-gate HB_REFRESH_COUNT_SHIFT; \ 2805*0Sstevel@tonic-gate new_count = (HB_REFRESH_INTERVAL * \ 2806*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].clock_freq) / \ 2807*0Sstevel@tonic-gate (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2808*0Sstevel@tonic-gate data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2809*0Sstevel@tonic-gate (new_count << HB_REFRESH_COUNT_SHIFT); \ 2810*0Sstevel@tonic-gate stdphysio(HB_MEM_CNTRL0, data); \ 2811*0Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2812*0Sstevel@tonic-gate /* \ 2813*0Sstevel@tonic-gate * If we are slowing down the cpu and Memory \ 2814*0Sstevel@tonic-gate * Self Refresh is not enabled, it is required \ 2815*0Sstevel@tonic-gate * to wait for old refresh count to count-down and \ 2816*0Sstevel@tonic-gate * new refresh count to go into effect (let new value \ 2817*0Sstevel@tonic-gate * counts down once). \ 2818*0Sstevel@tonic-gate */ \ 2819*0Sstevel@tonic-gate if ((direction) == HB_SLOW_DOWN && \ 2820*0Sstevel@tonic-gate (data & HB_SELF_REFRESH_MASK) == 0) { \ 2821*0Sstevel@tonic-gate /* \ 2822*0Sstevel@tonic-gate * Each count takes 64 cpu clock cycles \ 2823*0Sstevel@tonic-gate * to decrement. Wait for current refresh \ 2824*0Sstevel@tonic-gate * count plus new refresh count at current \ 2825*0Sstevel@tonic-gate * cpu speed to count down to zero. Round \ 2826*0Sstevel@tonic-gate * up the delay time. \ 2827*0Sstevel@tonic-gate */ \ 2828*0Sstevel@tonic-gate delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2829*0Sstevel@tonic-gate (count + new_count) * MICROSEC * (cur_div)) /\ 2830*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2831*0Sstevel@tonic-gate drv_usecwait(delay); \ 2832*0Sstevel@tonic-gate } \ 2833*0Sstevel@tonic-gate } 2834*0Sstevel@tonic-gate 2835*0Sstevel@tonic-gate #define SET_SELF_REFRESH(bit) \ 2836*0Sstevel@tonic-gate { \ 2837*0Sstevel@tonic-gate volatile uint64_t data; \ 2838*0Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2839*0Sstevel@tonic-gate data = (data & ~HB_SELF_REFRESH_MASK) | \ 2840*0Sstevel@tonic-gate ((bit) << HB_SELF_REFRESH_SHIFT); \ 2841*0Sstevel@tonic-gate stdphysio(HB_MEM_CNTRL0, data); \ 2842*0Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2843*0Sstevel@tonic-gate } 2844*0Sstevel@tonic-gate #endif /* HUMMINGBIRD */ 2845*0Sstevel@tonic-gate 2846*0Sstevel@tonic-gate /* ARGSUSED */ 2847*0Sstevel@tonic-gate void 2848*0Sstevel@tonic-gate cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2849*0Sstevel@tonic-gate { 2850*0Sstevel@tonic-gate #ifdef HUMMINGBIRD 2851*0Sstevel@tonic-gate uint64_t cur_mask, cur_divisor = 0; 2852*0Sstevel@tonic-gate volatile uint64_t reg; 2853*0Sstevel@tonic-gate int index; 2854*0Sstevel@tonic-gate 2855*0Sstevel@tonic-gate if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2856*0Sstevel@tonic-gate (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2857*0Sstevel@tonic-gate cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2858*0Sstevel@tonic-gate new_divisor); 2859*0Sstevel@tonic-gate return; 2860*0Sstevel@tonic-gate } 2861*0Sstevel@tonic-gate 2862*0Sstevel@tonic-gate reg = lddphysio(HB_ESTAR_MODE); 2863*0Sstevel@tonic-gate cur_mask = reg & HB_ECLK_MASK; 2864*0Sstevel@tonic-gate for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2865*0Sstevel@tonic-gate if (hb_eclk[index] == cur_mask) { 2866*0Sstevel@tonic-gate cur_divisor = index; 2867*0Sstevel@tonic-gate break; 2868*0Sstevel@tonic-gate } 2869*0Sstevel@tonic-gate } 2870*0Sstevel@tonic-gate 2871*0Sstevel@tonic-gate if (cur_divisor == 0) 2872*0Sstevel@tonic-gate cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2873*0Sstevel@tonic-gate "can't be determined!"); 2874*0Sstevel@tonic-gate 2875*0Sstevel@tonic-gate /* 2876*0Sstevel@tonic-gate * If we are already at the requested divisor speed, just 2877*0Sstevel@tonic-gate * return. 2878*0Sstevel@tonic-gate */ 2879*0Sstevel@tonic-gate if (cur_divisor == new_divisor) 2880*0Sstevel@tonic-gate return; 2881*0Sstevel@tonic-gate 2882*0Sstevel@tonic-gate if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2883*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2884*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2885*0Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2886*0Sstevel@tonic-gate 2887*0Sstevel@tonic-gate } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2888*0Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2889*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2890*0Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2891*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2892*0Sstevel@tonic-gate 2893*0Sstevel@tonic-gate } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2894*0Sstevel@tonic-gate /* 2895*0Sstevel@tonic-gate * Transition to 1/2 speed first, then to 2896*0Sstevel@tonic-gate * lower speed. 2897*0Sstevel@tonic-gate */ 2898*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2899*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2900*0Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2901*0Sstevel@tonic-gate 2902*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2903*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2904*0Sstevel@tonic-gate 2905*0Sstevel@tonic-gate } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2906*0Sstevel@tonic-gate /* 2907*0Sstevel@tonic-gate * Transition to 1/2 speed first, then to 2908*0Sstevel@tonic-gate * full speed. 2909*0Sstevel@tonic-gate */ 2910*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2911*0Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2912*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2913*0Sstevel@tonic-gate 2914*0Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2915*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2916*0Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2917*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2918*0Sstevel@tonic-gate 2919*0Sstevel@tonic-gate } else if (cur_divisor < new_divisor) { 2920*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2921*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2922*0Sstevel@tonic-gate 2923*0Sstevel@tonic-gate } else if (cur_divisor > new_divisor) { 2924*0Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2925*0Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2926*0Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2927*0Sstevel@tonic-gate } 2928*0Sstevel@tonic-gate CPU->cpu_m.divisor = (uchar_t)new_divisor; 2929*0Sstevel@tonic-gate #endif 2930*0Sstevel@tonic-gate } 2931*0Sstevel@tonic-gate 2932*0Sstevel@tonic-gate /* 2933*0Sstevel@tonic-gate * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2934*0Sstevel@tonic-gate * we clear all the sticky bits. If a non-null pointer to a async fault 2935*0Sstevel@tonic-gate * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2936*0Sstevel@tonic-gate * info will be returned in the structure. If a non-null pointer to a 2937*0Sstevel@tonic-gate * uint64_t is passed in, this will be updated if the CP bit is set in the 2938*0Sstevel@tonic-gate * AFSR. The afsr will be returned. 2939*0Sstevel@tonic-gate */ 2940*0Sstevel@tonic-gate static uint64_t 2941*0Sstevel@tonic-gate clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2942*0Sstevel@tonic-gate { 2943*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 2944*0Sstevel@tonic-gate uint64_t afsr; 2945*0Sstevel@tonic-gate uint64_t udbh, udbl; 2946*0Sstevel@tonic-gate 2947*0Sstevel@tonic-gate get_asyncflt(&afsr); 2948*0Sstevel@tonic-gate 2949*0Sstevel@tonic-gate if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2950*0Sstevel@tonic-gate *acc_afsr |= afsr; 2951*0Sstevel@tonic-gate 2952*0Sstevel@tonic-gate if (spf_flt != NULL) { 2953*0Sstevel@tonic-gate aflt->flt_stat = afsr; 2954*0Sstevel@tonic-gate get_asyncaddr(&aflt->flt_addr); 2955*0Sstevel@tonic-gate aflt->flt_addr &= SABRE_AFAR_PA; 2956*0Sstevel@tonic-gate 2957*0Sstevel@tonic-gate get_udb_errors(&udbh, &udbl); 2958*0Sstevel@tonic-gate spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2959*0Sstevel@tonic-gate spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2960*0Sstevel@tonic-gate } 2961*0Sstevel@tonic-gate 2962*0Sstevel@tonic-gate set_asyncflt(afsr); /* clear afsr */ 2963*0Sstevel@tonic-gate clr_datapath(); /* clear udbs */ 2964*0Sstevel@tonic-gate return (afsr); 2965*0Sstevel@tonic-gate } 2966*0Sstevel@tonic-gate 2967*0Sstevel@tonic-gate /* 2968*0Sstevel@tonic-gate * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2969*0Sstevel@tonic-gate * tag of the first bad line will be returned. We also return the old-afsr 2970*0Sstevel@tonic-gate * (before clearing the sticky bits). The linecnt data will be updated to 2971*0Sstevel@tonic-gate * indicate the number of bad lines detected. 2972*0Sstevel@tonic-gate */ 2973*0Sstevel@tonic-gate static void 2974*0Sstevel@tonic-gate scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2975*0Sstevel@tonic-gate uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2976*0Sstevel@tonic-gate { 2977*0Sstevel@tonic-gate ec_data_t t_ecdata[8]; 2978*0Sstevel@tonic-gate uint64_t t_etag, oafsr; 2979*0Sstevel@tonic-gate uint64_t pa = AFLT_INV_ADDR; 2980*0Sstevel@tonic-gate uint32_t i, j, ecache_sz; 2981*0Sstevel@tonic-gate uint64_t acc_afsr = 0; 2982*0Sstevel@tonic-gate uint64_t *cpu_afsr = NULL; 2983*0Sstevel@tonic-gate 2984*0Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) 2985*0Sstevel@tonic-gate cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2986*0Sstevel@tonic-gate 2987*0Sstevel@tonic-gate *linecnt = 0; 2988*0Sstevel@tonic-gate ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2989*0Sstevel@tonic-gate 2990*0Sstevel@tonic-gate for (i = 0; i < ecache_sz; i += 64) { 2991*0Sstevel@tonic-gate get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2992*0Sstevel@tonic-gate cpu_afsr); 2993*0Sstevel@tonic-gate acc_afsr |= oafsr; 2994*0Sstevel@tonic-gate 2995*0Sstevel@tonic-gate /* 2996*0Sstevel@tonic-gate * Scan through the whole 64 bytes line in 8 8-byte chunks 2997*0Sstevel@tonic-gate * looking for the first occurrence of an EDP error. The AFSR 2998*0Sstevel@tonic-gate * info is captured for each 8-byte chunk. Note that for 2999*0Sstevel@tonic-gate * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 3000*0Sstevel@tonic-gate * 16-byte chunk granularity (i.e. the AFSR will be the same 3001*0Sstevel@tonic-gate * for the high and low 8-byte words within the 16-byte chunk). 3002*0Sstevel@tonic-gate * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 3003*0Sstevel@tonic-gate * granularity and only PSYND bits [7:0] are used. 3004*0Sstevel@tonic-gate */ 3005*0Sstevel@tonic-gate for (j = 0; j < 8; j++) { 3006*0Sstevel@tonic-gate ec_data_t *ecdptr = &t_ecdata[j]; 3007*0Sstevel@tonic-gate 3008*0Sstevel@tonic-gate if (ecdptr->ec_afsr & P_AFSR_EDP) { 3009*0Sstevel@tonic-gate uint64_t errpa; 3010*0Sstevel@tonic-gate ushort_t psynd; 3011*0Sstevel@tonic-gate uint32_t ec_set_size = ecache_sz / 3012*0Sstevel@tonic-gate ecache_associativity; 3013*0Sstevel@tonic-gate 3014*0Sstevel@tonic-gate /* 3015*0Sstevel@tonic-gate * For Spitfire/Blackbird, we need to look at 3016*0Sstevel@tonic-gate * the PSYND to make sure that this 8-byte chunk 3017*0Sstevel@tonic-gate * is the right one. PSYND bits [15:8] belong 3018*0Sstevel@tonic-gate * to the upper 8-byte (even) chunk. Bits 3019*0Sstevel@tonic-gate * [7:0] belong to the lower 8-byte chunk (odd). 3020*0Sstevel@tonic-gate */ 3021*0Sstevel@tonic-gate psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3022*0Sstevel@tonic-gate if (!isus2i && !isus2e) { 3023*0Sstevel@tonic-gate if (j & 0x1) 3024*0Sstevel@tonic-gate psynd = psynd & 0xFF; 3025*0Sstevel@tonic-gate else 3026*0Sstevel@tonic-gate psynd = psynd >> 8; 3027*0Sstevel@tonic-gate 3028*0Sstevel@tonic-gate if (!psynd) 3029*0Sstevel@tonic-gate continue; /* wrong chunk */ 3030*0Sstevel@tonic-gate } 3031*0Sstevel@tonic-gate 3032*0Sstevel@tonic-gate /* Construct the PA */ 3033*0Sstevel@tonic-gate errpa = ((t_etag & cpu_ec_tag_mask) << 3034*0Sstevel@tonic-gate cpu_ec_tag_shift) | ((i | (j << 3)) % 3035*0Sstevel@tonic-gate ec_set_size); 3036*0Sstevel@tonic-gate 3037*0Sstevel@tonic-gate /* clean up the cache line */ 3038*0Sstevel@tonic-gate flushecacheline(P2ALIGN(errpa, 64), 3039*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 3040*0Sstevel@tonic-gate 3041*0Sstevel@tonic-gate oafsr = clear_errors(NULL, cpu_afsr); 3042*0Sstevel@tonic-gate acc_afsr |= oafsr; 3043*0Sstevel@tonic-gate 3044*0Sstevel@tonic-gate (*linecnt)++; 3045*0Sstevel@tonic-gate 3046*0Sstevel@tonic-gate /* 3047*0Sstevel@tonic-gate * Capture the PA for the first bad line found. 3048*0Sstevel@tonic-gate * Return the ecache dump and tag info. 3049*0Sstevel@tonic-gate */ 3050*0Sstevel@tonic-gate if (pa == AFLT_INV_ADDR) { 3051*0Sstevel@tonic-gate int k; 3052*0Sstevel@tonic-gate 3053*0Sstevel@tonic-gate pa = errpa; 3054*0Sstevel@tonic-gate for (k = 0; k < 8; k++) 3055*0Sstevel@tonic-gate ecache_data[k] = t_ecdata[k]; 3056*0Sstevel@tonic-gate *ecache_tag = t_etag; 3057*0Sstevel@tonic-gate } 3058*0Sstevel@tonic-gate break; 3059*0Sstevel@tonic-gate } 3060*0Sstevel@tonic-gate } 3061*0Sstevel@tonic-gate } 3062*0Sstevel@tonic-gate *t_afar = pa; 3063*0Sstevel@tonic-gate *t_afsr = acc_afsr; 3064*0Sstevel@tonic-gate } 3065*0Sstevel@tonic-gate 3066*0Sstevel@tonic-gate static void 3067*0Sstevel@tonic-gate cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3068*0Sstevel@tonic-gate { 3069*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 3070*0Sstevel@tonic-gate uint64_t ecache_tag = spf_flt->flt_ec_tag; 3071*0Sstevel@tonic-gate char linestr[30]; 3072*0Sstevel@tonic-gate char *state_str; 3073*0Sstevel@tonic-gate int i; 3074*0Sstevel@tonic-gate 3075*0Sstevel@tonic-gate /* 3076*0Sstevel@tonic-gate * Check the ecache tag to make sure it 3077*0Sstevel@tonic-gate * is valid. If invalid, a memory dump was 3078*0Sstevel@tonic-gate * captured instead of a ecache dump. 3079*0Sstevel@tonic-gate */ 3080*0Sstevel@tonic-gate if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3081*0Sstevel@tonic-gate uchar_t eparity = (uchar_t) 3082*0Sstevel@tonic-gate ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3083*0Sstevel@tonic-gate 3084*0Sstevel@tonic-gate uchar_t estate = (uchar_t) 3085*0Sstevel@tonic-gate ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3086*0Sstevel@tonic-gate 3087*0Sstevel@tonic-gate if (estate == cpu_ec_state_shr) 3088*0Sstevel@tonic-gate state_str = "Shared"; 3089*0Sstevel@tonic-gate else if (estate == cpu_ec_state_exl) 3090*0Sstevel@tonic-gate state_str = "Exclusive"; 3091*0Sstevel@tonic-gate else if (estate == cpu_ec_state_own) 3092*0Sstevel@tonic-gate state_str = "Owner"; 3093*0Sstevel@tonic-gate else if (estate == cpu_ec_state_mod) 3094*0Sstevel@tonic-gate state_str = "Modified"; 3095*0Sstevel@tonic-gate else 3096*0Sstevel@tonic-gate state_str = "Invalid"; 3097*0Sstevel@tonic-gate 3098*0Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 1) { 3099*0Sstevel@tonic-gate (void) snprintf(linestr, sizeof (linestr), 3100*0Sstevel@tonic-gate "Badlines found=%d", spf_flt->flt_ec_lcnt); 3101*0Sstevel@tonic-gate } else { 3102*0Sstevel@tonic-gate linestr[0] = '\0'; 3103*0Sstevel@tonic-gate } 3104*0Sstevel@tonic-gate 3105*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3106*0Sstevel@tonic-gate " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3107*0Sstevel@tonic-gate "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3108*0Sstevel@tonic-gate (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3109*0Sstevel@tonic-gate (uint32_t)ecache_tag, state_str, 3110*0Sstevel@tonic-gate (uint32_t)eparity, linestr); 3111*0Sstevel@tonic-gate } else { 3112*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3113*0Sstevel@tonic-gate " E$tag != PA from AFAR; E$line was victimized" 3114*0Sstevel@tonic-gate "\n dumping memory from PA 0x%08x.%08x instead", 3115*0Sstevel@tonic-gate (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3116*0Sstevel@tonic-gate (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3117*0Sstevel@tonic-gate } 3118*0Sstevel@tonic-gate 3119*0Sstevel@tonic-gate /* 3120*0Sstevel@tonic-gate * Dump out all 8 8-byte ecache data captured 3121*0Sstevel@tonic-gate * For each 8-byte data captured, we check the 3122*0Sstevel@tonic-gate * captured afsr's parity syndrome to find out 3123*0Sstevel@tonic-gate * which 8-byte chunk is bad. For memory dump, the 3124*0Sstevel@tonic-gate * AFSR values were initialized to 0. 3125*0Sstevel@tonic-gate */ 3126*0Sstevel@tonic-gate for (i = 0; i < 8; i++) { 3127*0Sstevel@tonic-gate ec_data_t *ecdptr; 3128*0Sstevel@tonic-gate uint_t offset; 3129*0Sstevel@tonic-gate ushort_t psynd; 3130*0Sstevel@tonic-gate ushort_t bad; 3131*0Sstevel@tonic-gate uint64_t edp; 3132*0Sstevel@tonic-gate 3133*0Sstevel@tonic-gate offset = i << 3; /* multiply by 8 */ 3134*0Sstevel@tonic-gate ecdptr = &spf_flt->flt_ec_data[i]; 3135*0Sstevel@tonic-gate psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3136*0Sstevel@tonic-gate edp = ecdptr->ec_afsr & P_AFSR_EDP; 3137*0Sstevel@tonic-gate 3138*0Sstevel@tonic-gate /* 3139*0Sstevel@tonic-gate * For Sabre/Hummingbird, parity synd is captured only 3140*0Sstevel@tonic-gate * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3141*0Sstevel@tonic-gate * For spitfire/blackbird, AFSR.PSYND is captured 3142*0Sstevel@tonic-gate * in 16-byte granularity. [15:8] represent 3143*0Sstevel@tonic-gate * the upper 8 byte and [7:0] the lower 8 byte. 3144*0Sstevel@tonic-gate */ 3145*0Sstevel@tonic-gate if (isus2i || isus2e || (i & 0x1)) 3146*0Sstevel@tonic-gate bad = (psynd & 0xFF); /* check bits [7:0] */ 3147*0Sstevel@tonic-gate else 3148*0Sstevel@tonic-gate bad = (psynd & 0xFF00); /* check bits [15:8] */ 3149*0Sstevel@tonic-gate 3150*0Sstevel@tonic-gate if (bad && edp) { 3151*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3152*0Sstevel@tonic-gate " E$Data (0x%02x): 0x%08x.%08x " 3153*0Sstevel@tonic-gate "*Bad* PSYND=0x%04x", offset, 3154*0Sstevel@tonic-gate (uint32_t)(ecdptr->ec_d8 >> 32), 3155*0Sstevel@tonic-gate (uint32_t)ecdptr->ec_d8, psynd); 3156*0Sstevel@tonic-gate } else { 3157*0Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3158*0Sstevel@tonic-gate " E$Data (0x%02x): 0x%08x.%08x", offset, 3159*0Sstevel@tonic-gate (uint32_t)(ecdptr->ec_d8 >> 32), 3160*0Sstevel@tonic-gate (uint32_t)ecdptr->ec_d8); 3161*0Sstevel@tonic-gate } 3162*0Sstevel@tonic-gate } 3163*0Sstevel@tonic-gate } 3164*0Sstevel@tonic-gate 3165*0Sstevel@tonic-gate /* 3166*0Sstevel@tonic-gate * Common logging function for all cpu async errors. This function allows the 3167*0Sstevel@tonic-gate * caller to generate a single cmn_err() call that logs the appropriate items 3168*0Sstevel@tonic-gate * from the fault structure, and implements our rules for AFT logging levels. 3169*0Sstevel@tonic-gate * 3170*0Sstevel@tonic-gate * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3171*0Sstevel@tonic-gate * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3172*0Sstevel@tonic-gate * spflt: pointer to spitfire async fault structure 3173*0Sstevel@tonic-gate * logflags: bitflags indicating what to output 3174*0Sstevel@tonic-gate * endstr: a end string to appear at the end of this log 3175*0Sstevel@tonic-gate * fmt: a format string to appear at the beginning of the log 3176*0Sstevel@tonic-gate * 3177*0Sstevel@tonic-gate * The logflags allows the construction of predetermined output from the spflt 3178*0Sstevel@tonic-gate * structure. The individual data items always appear in a consistent order. 3179*0Sstevel@tonic-gate * Note that either or both of the spflt structure pointer and logflags may be 3180*0Sstevel@tonic-gate * NULL or zero respectively, indicating that the predetermined output 3181*0Sstevel@tonic-gate * substrings are not requested in this log. The output looks like this: 3182*0Sstevel@tonic-gate * 3183*0Sstevel@tonic-gate * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3184*0Sstevel@tonic-gate * <CPU_SPACE><CPU_ERRID> 3185*0Sstevel@tonic-gate * newline+4spaces<CPU_AFSR><CPU_AFAR> 3186*0Sstevel@tonic-gate * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3187*0Sstevel@tonic-gate * newline+4spaces<CPU_UDBH><CPU_UDBL> 3188*0Sstevel@tonic-gate * newline+4spaces<CPU_SYND> 3189*0Sstevel@tonic-gate * newline+4spaces<endstr> 3190*0Sstevel@tonic-gate * 3191*0Sstevel@tonic-gate * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3192*0Sstevel@tonic-gate * it is assumed that <endstr> will be the unum string in this case. The size 3193*0Sstevel@tonic-gate * of our intermediate formatting buf[] is based on the worst case of all flags 3194*0Sstevel@tonic-gate * being enabled. We pass the caller's varargs directly to vcmn_err() for 3195*0Sstevel@tonic-gate * formatting so we don't need additional stack space to format them here. 3196*0Sstevel@tonic-gate */ 3197*0Sstevel@tonic-gate /*PRINTFLIKE6*/ 3198*0Sstevel@tonic-gate static void 3199*0Sstevel@tonic-gate cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3200*0Sstevel@tonic-gate const char *endstr, const char *fmt, ...) 3201*0Sstevel@tonic-gate { 3202*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spflt; 3203*0Sstevel@tonic-gate char buf[400], *p, *q; /* see comments about buf[] size above */ 3204*0Sstevel@tonic-gate va_list ap; 3205*0Sstevel@tonic-gate int console_log_flag; 3206*0Sstevel@tonic-gate 3207*0Sstevel@tonic-gate if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3208*0Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_LEVEL1)) || 3209*0Sstevel@tonic-gate (aflt->flt_panic)) { 3210*0Sstevel@tonic-gate console_log_flag = (tagnum < 2) || aft_verbose; 3211*0Sstevel@tonic-gate } else { 3212*0Sstevel@tonic-gate int verbose = ((aflt->flt_class == BUS_FAULT) || 3213*0Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_CE)) ? 3214*0Sstevel@tonic-gate ce_verbose_memory : ce_verbose_other; 3215*0Sstevel@tonic-gate 3216*0Sstevel@tonic-gate if (!verbose) 3217*0Sstevel@tonic-gate return; 3218*0Sstevel@tonic-gate 3219*0Sstevel@tonic-gate console_log_flag = (verbose > 1); 3220*0Sstevel@tonic-gate } 3221*0Sstevel@tonic-gate 3222*0Sstevel@tonic-gate if (console_log_flag) 3223*0Sstevel@tonic-gate (void) sprintf(buf, "[AFT%d]", tagnum); 3224*0Sstevel@tonic-gate else 3225*0Sstevel@tonic-gate (void) sprintf(buf, "![AFT%d]", tagnum); 3226*0Sstevel@tonic-gate 3227*0Sstevel@tonic-gate p = buf + strlen(buf); /* current buffer position */ 3228*0Sstevel@tonic-gate q = buf + sizeof (buf); /* pointer past end of buffer */ 3229*0Sstevel@tonic-gate 3230*0Sstevel@tonic-gate if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3231*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3232*0Sstevel@tonic-gate (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3233*0Sstevel@tonic-gate p += strlen(p); 3234*0Sstevel@tonic-gate } 3235*0Sstevel@tonic-gate 3236*0Sstevel@tonic-gate /* 3237*0Sstevel@tonic-gate * Copy the caller's format string verbatim into buf[]. It will be 3238*0Sstevel@tonic-gate * formatted by the call to vcmn_err() at the end of this function. 3239*0Sstevel@tonic-gate */ 3240*0Sstevel@tonic-gate if (fmt != NULL && p < q) { 3241*0Sstevel@tonic-gate (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3242*0Sstevel@tonic-gate buf[sizeof (buf) - 1] = '\0'; 3243*0Sstevel@tonic-gate p += strlen(p); 3244*0Sstevel@tonic-gate } 3245*0Sstevel@tonic-gate 3246*0Sstevel@tonic-gate if (spflt != NULL) { 3247*0Sstevel@tonic-gate if (logflags & CPU_FLTCPU) { 3248*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " CPU%d", 3249*0Sstevel@tonic-gate aflt->flt_inst); 3250*0Sstevel@tonic-gate p += strlen(p); 3251*0Sstevel@tonic-gate } 3252*0Sstevel@tonic-gate 3253*0Sstevel@tonic-gate if (logflags & CPU_SPACE) { 3254*0Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 3255*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3256*0Sstevel@tonic-gate " Data access"); 3257*0Sstevel@tonic-gate else if (aflt->flt_status & ECC_I_TRAP) 3258*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3259*0Sstevel@tonic-gate " Instruction access"); 3260*0Sstevel@tonic-gate p += strlen(p); 3261*0Sstevel@tonic-gate } 3262*0Sstevel@tonic-gate 3263*0Sstevel@tonic-gate if (logflags & CPU_TL) { 3264*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " at TL%s", 3265*0Sstevel@tonic-gate aflt->flt_tl ? ">0" : "=0"); 3266*0Sstevel@tonic-gate p += strlen(p); 3267*0Sstevel@tonic-gate } 3268*0Sstevel@tonic-gate 3269*0Sstevel@tonic-gate if (logflags & CPU_ERRID) { 3270*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3271*0Sstevel@tonic-gate ", errID 0x%08x.%08x", 3272*0Sstevel@tonic-gate (uint32_t)(aflt->flt_id >> 32), 3273*0Sstevel@tonic-gate (uint32_t)aflt->flt_id); 3274*0Sstevel@tonic-gate p += strlen(p); 3275*0Sstevel@tonic-gate } 3276*0Sstevel@tonic-gate 3277*0Sstevel@tonic-gate if (logflags & CPU_AFSR) { 3278*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3279*0Sstevel@tonic-gate "\n AFSR 0x%08b.%08b", 3280*0Sstevel@tonic-gate (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3281*0Sstevel@tonic-gate (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3282*0Sstevel@tonic-gate p += strlen(p); 3283*0Sstevel@tonic-gate } 3284*0Sstevel@tonic-gate 3285*0Sstevel@tonic-gate if (logflags & CPU_AFAR) { 3286*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3287*0Sstevel@tonic-gate (uint32_t)(aflt->flt_addr >> 32), 3288*0Sstevel@tonic-gate (uint32_t)aflt->flt_addr); 3289*0Sstevel@tonic-gate p += strlen(p); 3290*0Sstevel@tonic-gate } 3291*0Sstevel@tonic-gate 3292*0Sstevel@tonic-gate if (logflags & CPU_AF_PSYND) { 3293*0Sstevel@tonic-gate ushort_t psynd = (ushort_t) 3294*0Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_P_SYND); 3295*0Sstevel@tonic-gate 3296*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3297*0Sstevel@tonic-gate "\n AFSR.PSYND 0x%04x(Score %02d)", 3298*0Sstevel@tonic-gate psynd, ecc_psynd_score(psynd)); 3299*0Sstevel@tonic-gate p += strlen(p); 3300*0Sstevel@tonic-gate } 3301*0Sstevel@tonic-gate 3302*0Sstevel@tonic-gate if (logflags & CPU_AF_ETS) { 3303*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3304*0Sstevel@tonic-gate (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3305*0Sstevel@tonic-gate p += strlen(p); 3306*0Sstevel@tonic-gate } 3307*0Sstevel@tonic-gate 3308*0Sstevel@tonic-gate if (logflags & CPU_FAULTPC) { 3309*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3310*0Sstevel@tonic-gate (void *)aflt->flt_pc); 3311*0Sstevel@tonic-gate p += strlen(p); 3312*0Sstevel@tonic-gate } 3313*0Sstevel@tonic-gate 3314*0Sstevel@tonic-gate if (logflags & CPU_UDBH) { 3315*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3316*0Sstevel@tonic-gate "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3317*0Sstevel@tonic-gate spflt->flt_sdbh, UDB_FMTSTR, 3318*0Sstevel@tonic-gate spflt->flt_sdbh & 0xFF); 3319*0Sstevel@tonic-gate p += strlen(p); 3320*0Sstevel@tonic-gate } 3321*0Sstevel@tonic-gate 3322*0Sstevel@tonic-gate if (logflags & CPU_UDBL) { 3323*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3324*0Sstevel@tonic-gate " UDBL 0x%04b UDBL.ESYND 0x%02x", 3325*0Sstevel@tonic-gate spflt->flt_sdbl, UDB_FMTSTR, 3326*0Sstevel@tonic-gate spflt->flt_sdbl & 0xFF); 3327*0Sstevel@tonic-gate p += strlen(p); 3328*0Sstevel@tonic-gate } 3329*0Sstevel@tonic-gate 3330*0Sstevel@tonic-gate if (logflags & CPU_SYND) { 3331*0Sstevel@tonic-gate ushort_t synd = SYND(aflt->flt_synd); 3332*0Sstevel@tonic-gate 3333*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3334*0Sstevel@tonic-gate "\n %s Syndrome 0x%x Memory Module ", 3335*0Sstevel@tonic-gate UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3336*0Sstevel@tonic-gate p += strlen(p); 3337*0Sstevel@tonic-gate } 3338*0Sstevel@tonic-gate } 3339*0Sstevel@tonic-gate 3340*0Sstevel@tonic-gate if (endstr != NULL) { 3341*0Sstevel@tonic-gate if (!(logflags & CPU_SYND)) 3342*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3343*0Sstevel@tonic-gate else 3344*0Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3345*0Sstevel@tonic-gate p += strlen(p); 3346*0Sstevel@tonic-gate } 3347*0Sstevel@tonic-gate 3348*0Sstevel@tonic-gate if (ce_code == CE_CONT && (p < q - 1)) 3349*0Sstevel@tonic-gate (void) strcpy(p, "\n"); /* add final \n if needed */ 3350*0Sstevel@tonic-gate 3351*0Sstevel@tonic-gate va_start(ap, fmt); 3352*0Sstevel@tonic-gate vcmn_err(ce_code, buf, ap); 3353*0Sstevel@tonic-gate va_end(ap); 3354*0Sstevel@tonic-gate } 3355*0Sstevel@tonic-gate 3356*0Sstevel@tonic-gate /* 3357*0Sstevel@tonic-gate * Ecache Scrubbing 3358*0Sstevel@tonic-gate * 3359*0Sstevel@tonic-gate * The basic idea is to prevent lines from sitting in the ecache long enough 3360*0Sstevel@tonic-gate * to build up soft errors which can lead to ecache parity errors. 3361*0Sstevel@tonic-gate * 3362*0Sstevel@tonic-gate * The following rules are observed when flushing the ecache: 3363*0Sstevel@tonic-gate * 3364*0Sstevel@tonic-gate * 1. When the system is busy, flush bad clean lines 3365*0Sstevel@tonic-gate * 2. When the system is idle, flush all clean lines 3366*0Sstevel@tonic-gate * 3. When the system is idle, flush good dirty lines 3367*0Sstevel@tonic-gate * 4. Never flush bad dirty lines. 3368*0Sstevel@tonic-gate * 3369*0Sstevel@tonic-gate * modify parity busy idle 3370*0Sstevel@tonic-gate * ---------------------------- 3371*0Sstevel@tonic-gate * clean good X 3372*0Sstevel@tonic-gate * clean bad X X 3373*0Sstevel@tonic-gate * dirty good X 3374*0Sstevel@tonic-gate * dirty bad 3375*0Sstevel@tonic-gate * 3376*0Sstevel@tonic-gate * Bad or good refers to whether a line has an E$ parity error or not. 3377*0Sstevel@tonic-gate * Clean or dirty refers to the state of the modified bit. We currently 3378*0Sstevel@tonic-gate * default the scan rate to 100 (scan 10% of the cache per second). 3379*0Sstevel@tonic-gate * 3380*0Sstevel@tonic-gate * The following are E$ states and actions. 3381*0Sstevel@tonic-gate * 3382*0Sstevel@tonic-gate * We encode our state as a 3-bit number, consisting of: 3383*0Sstevel@tonic-gate * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3384*0Sstevel@tonic-gate * ECACHE_STATE_PARITY (0=good, 1=bad) 3385*0Sstevel@tonic-gate * ECACHE_STATE_BUSY (0=idle, 1=busy) 3386*0Sstevel@tonic-gate * 3387*0Sstevel@tonic-gate * We associate a flushing and a logging action with each state. 3388*0Sstevel@tonic-gate * 3389*0Sstevel@tonic-gate * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3390*0Sstevel@tonic-gate * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3391*0Sstevel@tonic-gate * E$ only, in addition to value being set by ec_flush. 3392*0Sstevel@tonic-gate */ 3393*0Sstevel@tonic-gate 3394*0Sstevel@tonic-gate #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3395*0Sstevel@tonic-gate #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3396*0Sstevel@tonic-gate #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3397*0Sstevel@tonic-gate 3398*0Sstevel@tonic-gate struct { 3399*0Sstevel@tonic-gate char ec_flush; /* whether to flush or not */ 3400*0Sstevel@tonic-gate char ec_log; /* ecache logging */ 3401*0Sstevel@tonic-gate char ec_log_type; /* log type info */ 3402*0Sstevel@tonic-gate } ec_action[] = { /* states of the E$ line in M P B */ 3403*0Sstevel@tonic-gate { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3404*0Sstevel@tonic-gate { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3405*0Sstevel@tonic-gate { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3406*0Sstevel@tonic-gate { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3407*0Sstevel@tonic-gate { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3408*0Sstevel@tonic-gate { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3409*0Sstevel@tonic-gate { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3410*0Sstevel@tonic-gate { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3411*0Sstevel@tonic-gate }; 3412*0Sstevel@tonic-gate 3413*0Sstevel@tonic-gate /* 3414*0Sstevel@tonic-gate * Offsets into the ec_action[] that determines clean_good_busy and 3415*0Sstevel@tonic-gate * dirty_good_busy lines. 3416*0Sstevel@tonic-gate */ 3417*0Sstevel@tonic-gate #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3418*0Sstevel@tonic-gate #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3419*0Sstevel@tonic-gate 3420*0Sstevel@tonic-gate /* 3421*0Sstevel@tonic-gate * We are flushing lines which are Clean_Good_Busy and also the lines 3422*0Sstevel@tonic-gate * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3423*0Sstevel@tonic-gate */ 3424*0Sstevel@tonic-gate #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3425*0Sstevel@tonic-gate #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3426*0Sstevel@tonic-gate 3427*0Sstevel@tonic-gate #define ECACHE_STATE_MODIFIED 0x4 3428*0Sstevel@tonic-gate #define ECACHE_STATE_PARITY 0x2 3429*0Sstevel@tonic-gate #define ECACHE_STATE_BUSY 0x1 3430*0Sstevel@tonic-gate 3431*0Sstevel@tonic-gate /* 3432*0Sstevel@tonic-gate * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3433*0Sstevel@tonic-gate */ 3434*0Sstevel@tonic-gate int ecache_calls_a_sec_mirrored = 1; 3435*0Sstevel@tonic-gate int ecache_lines_per_call_mirrored = 1; 3436*0Sstevel@tonic-gate 3437*0Sstevel@tonic-gate int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3438*0Sstevel@tonic-gate int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3439*0Sstevel@tonic-gate int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3440*0Sstevel@tonic-gate int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3441*0Sstevel@tonic-gate int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3442*0Sstevel@tonic-gate int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3443*0Sstevel@tonic-gate int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3444*0Sstevel@tonic-gate int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3445*0Sstevel@tonic-gate 3446*0Sstevel@tonic-gate volatile int ec_timeout_calls = 1; /* timeout calls */ 3447*0Sstevel@tonic-gate 3448*0Sstevel@tonic-gate /* 3449*0Sstevel@tonic-gate * Interrupt number and pil for ecache scrubber cross-trap calls. 3450*0Sstevel@tonic-gate */ 3451*0Sstevel@tonic-gate static uint_t ecache_scrub_inum; 3452*0Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9; 3453*0Sstevel@tonic-gate 3454*0Sstevel@tonic-gate /* 3455*0Sstevel@tonic-gate * Kstats for the E$ scrubber. 3456*0Sstevel@tonic-gate */ 3457*0Sstevel@tonic-gate typedef struct ecache_kstat { 3458*0Sstevel@tonic-gate kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3459*0Sstevel@tonic-gate kstat_named_t clean_good_busy; /* # of lines skipped */ 3460*0Sstevel@tonic-gate kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3461*0Sstevel@tonic-gate kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3462*0Sstevel@tonic-gate kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3463*0Sstevel@tonic-gate kstat_named_t dirty_good_busy; /* # of lines skipped */ 3464*0Sstevel@tonic-gate kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3465*0Sstevel@tonic-gate kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3466*0Sstevel@tonic-gate kstat_named_t invalid_lines; /* # of invalid lines */ 3467*0Sstevel@tonic-gate kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3468*0Sstevel@tonic-gate kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3469*0Sstevel@tonic-gate kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3470*0Sstevel@tonic-gate } ecache_kstat_t; 3471*0Sstevel@tonic-gate 3472*0Sstevel@tonic-gate static ecache_kstat_t ec_kstat_template = { 3473*0Sstevel@tonic-gate { "clean_good_idle", KSTAT_DATA_ULONG }, 3474*0Sstevel@tonic-gate { "clean_good_busy", KSTAT_DATA_ULONG }, 3475*0Sstevel@tonic-gate { "clean_bad_idle", KSTAT_DATA_ULONG }, 3476*0Sstevel@tonic-gate { "clean_bad_busy", KSTAT_DATA_ULONG }, 3477*0Sstevel@tonic-gate { "dirty_good_idle", KSTAT_DATA_ULONG }, 3478*0Sstevel@tonic-gate { "dirty_good_busy", KSTAT_DATA_ULONG }, 3479*0Sstevel@tonic-gate { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3480*0Sstevel@tonic-gate { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3481*0Sstevel@tonic-gate { "invalid_lines", KSTAT_DATA_ULONG }, 3482*0Sstevel@tonic-gate { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3483*0Sstevel@tonic-gate { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3484*0Sstevel@tonic-gate { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3485*0Sstevel@tonic-gate }; 3486*0Sstevel@tonic-gate 3487*0Sstevel@tonic-gate struct kmem_cache *sf_private_cache; 3488*0Sstevel@tonic-gate 3489*0Sstevel@tonic-gate /* 3490*0Sstevel@tonic-gate * Called periodically on each CPU to scan the ecache once a sec. 3491*0Sstevel@tonic-gate * adjusting the ecache line index appropriately 3492*0Sstevel@tonic-gate */ 3493*0Sstevel@tonic-gate void 3494*0Sstevel@tonic-gate scrub_ecache_line() 3495*0Sstevel@tonic-gate { 3496*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3497*0Sstevel@tonic-gate int cpuid = CPU->cpu_id; 3498*0Sstevel@tonic-gate uint32_t index = ssmp->ecache_flush_index; 3499*0Sstevel@tonic-gate uint64_t ec_size = cpunodes[cpuid].ecache_size; 3500*0Sstevel@tonic-gate size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3501*0Sstevel@tonic-gate int nlines = ssmp->ecache_nlines; 3502*0Sstevel@tonic-gate uint32_t ec_set_size = ec_size / ecache_associativity; 3503*0Sstevel@tonic-gate int ec_mirror = ssmp->ecache_mirror; 3504*0Sstevel@tonic-gate ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3505*0Sstevel@tonic-gate 3506*0Sstevel@tonic-gate int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3507*0Sstevel@tonic-gate int mpb; /* encode Modified, Parity, Busy for action */ 3508*0Sstevel@tonic-gate uchar_t state; 3509*0Sstevel@tonic-gate uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3510*0Sstevel@tonic-gate uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3511*0Sstevel@tonic-gate ec_data_t ec_data[8]; 3512*0Sstevel@tonic-gate kstat_named_t *ec_knp; 3513*0Sstevel@tonic-gate 3514*0Sstevel@tonic-gate switch (ec_mirror) { 3515*0Sstevel@tonic-gate default: 3516*0Sstevel@tonic-gate case ECACHE_CPU_NON_MIRROR: 3517*0Sstevel@tonic-gate /* 3518*0Sstevel@tonic-gate * The E$ scan rate is expressed in units of tenths of 3519*0Sstevel@tonic-gate * a percent. ecache_scan_rate = 1000 (100%) means the 3520*0Sstevel@tonic-gate * whole cache is scanned every second. 3521*0Sstevel@tonic-gate */ 3522*0Sstevel@tonic-gate scan_lines = (nlines * ecache_scan_rate) / 3523*0Sstevel@tonic-gate (1000 * ecache_calls_a_sec); 3524*0Sstevel@tonic-gate if (!(ssmp->ecache_busy)) { 3525*0Sstevel@tonic-gate if (ecache_idle_factor > 0) { 3526*0Sstevel@tonic-gate scan_lines *= ecache_idle_factor; 3527*0Sstevel@tonic-gate } 3528*0Sstevel@tonic-gate } else { 3529*0Sstevel@tonic-gate flush_clean_busy = (scan_lines * 3530*0Sstevel@tonic-gate ecache_flush_clean_good_busy) / 100; 3531*0Sstevel@tonic-gate flush_dirty_busy = (scan_lines * 3532*0Sstevel@tonic-gate ecache_flush_dirty_good_busy) / 100; 3533*0Sstevel@tonic-gate } 3534*0Sstevel@tonic-gate 3535*0Sstevel@tonic-gate ec_timeout_calls = (ecache_calls_a_sec ? 3536*0Sstevel@tonic-gate ecache_calls_a_sec : 1); 3537*0Sstevel@tonic-gate break; 3538*0Sstevel@tonic-gate 3539*0Sstevel@tonic-gate case ECACHE_CPU_MIRROR: 3540*0Sstevel@tonic-gate scan_lines = ecache_lines_per_call_mirrored; 3541*0Sstevel@tonic-gate ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3542*0Sstevel@tonic-gate ecache_calls_a_sec_mirrored : 1); 3543*0Sstevel@tonic-gate break; 3544*0Sstevel@tonic-gate } 3545*0Sstevel@tonic-gate 3546*0Sstevel@tonic-gate /* 3547*0Sstevel@tonic-gate * The ecache scrubber algorithm operates by reading and 3548*0Sstevel@tonic-gate * decoding the E$ tag to determine whether the corresponding E$ line 3549*0Sstevel@tonic-gate * can be scrubbed. There is a implicit assumption in the scrubber 3550*0Sstevel@tonic-gate * logic that the E$ tag is valid. Unfortunately, this assertion is 3551*0Sstevel@tonic-gate * flawed since the E$ tag may also be corrupted and have parity errors 3552*0Sstevel@tonic-gate * The scrubber logic is enhanced to check the validity of the E$ tag 3553*0Sstevel@tonic-gate * before scrubbing. When a parity error is detected in the E$ tag, 3554*0Sstevel@tonic-gate * it is possible to recover and scrub the tag under certain conditions 3555*0Sstevel@tonic-gate * so that a ETP error condition can be avoided. 3556*0Sstevel@tonic-gate */ 3557*0Sstevel@tonic-gate 3558*0Sstevel@tonic-gate for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3559*0Sstevel@tonic-gate /* 3560*0Sstevel@tonic-gate * We get the old-AFSR before clearing the AFSR sticky bits 3561*0Sstevel@tonic-gate * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3562*0Sstevel@tonic-gate * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3563*0Sstevel@tonic-gate */ 3564*0Sstevel@tonic-gate ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3565*0Sstevel@tonic-gate state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3566*0Sstevel@tonic-gate cpu_ec_state_shift); 3567*0Sstevel@tonic-gate 3568*0Sstevel@tonic-gate /* 3569*0Sstevel@tonic-gate * ETP is set try to scrub the ecache tag. 3570*0Sstevel@tonic-gate */ 3571*0Sstevel@tonic-gate if (nafsr & P_AFSR_ETP) { 3572*0Sstevel@tonic-gate ecache_scrub_tag_err(nafsr, state, index); 3573*0Sstevel@tonic-gate } else if (state & cpu_ec_state_valid) { 3574*0Sstevel@tonic-gate /* 3575*0Sstevel@tonic-gate * ETP is not set, E$ tag is valid. 3576*0Sstevel@tonic-gate * Proceed with the E$ scrubbing. 3577*0Sstevel@tonic-gate */ 3578*0Sstevel@tonic-gate if (state & cpu_ec_state_dirty) 3579*0Sstevel@tonic-gate mpb |= ECACHE_STATE_MODIFIED; 3580*0Sstevel@tonic-gate 3581*0Sstevel@tonic-gate tafsr = check_ecache_line(index, acc_afsr); 3582*0Sstevel@tonic-gate 3583*0Sstevel@tonic-gate if (tafsr & P_AFSR_EDP) { 3584*0Sstevel@tonic-gate mpb |= ECACHE_STATE_PARITY; 3585*0Sstevel@tonic-gate 3586*0Sstevel@tonic-gate if (ecache_scrub_verbose || 3587*0Sstevel@tonic-gate ecache_scrub_panic) { 3588*0Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(index, 64), 3589*0Sstevel@tonic-gate (uint64_t *)&ec_data[0], 3590*0Sstevel@tonic-gate &ec_tag, &oafsr, acc_afsr); 3591*0Sstevel@tonic-gate } 3592*0Sstevel@tonic-gate } 3593*0Sstevel@tonic-gate 3594*0Sstevel@tonic-gate if (ssmp->ecache_busy) 3595*0Sstevel@tonic-gate mpb |= ECACHE_STATE_BUSY; 3596*0Sstevel@tonic-gate 3597*0Sstevel@tonic-gate ec_knp = (kstat_named_t *)ec_ksp + mpb; 3598*0Sstevel@tonic-gate ec_knp->value.ul++; 3599*0Sstevel@tonic-gate 3600*0Sstevel@tonic-gate paddr = ((ec_tag & cpu_ec_tag_mask) << 3601*0Sstevel@tonic-gate cpu_ec_tag_shift) | (index % ec_set_size); 3602*0Sstevel@tonic-gate 3603*0Sstevel@tonic-gate /* 3604*0Sstevel@tonic-gate * We flush the E$ lines depending on the ec_flush, 3605*0Sstevel@tonic-gate * we additionally flush clean_good_busy and 3606*0Sstevel@tonic-gate * dirty_good_busy lines for mirrored E$. 3607*0Sstevel@tonic-gate */ 3608*0Sstevel@tonic-gate if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3609*0Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3610*0Sstevel@tonic-gate } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3611*0Sstevel@tonic-gate (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3612*0Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3613*0Sstevel@tonic-gate } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3614*0Sstevel@tonic-gate softcall(ecache_page_retire, (void *)paddr); 3615*0Sstevel@tonic-gate } 3616*0Sstevel@tonic-gate 3617*0Sstevel@tonic-gate /* 3618*0Sstevel@tonic-gate * Conditionally flush both the clean_good and 3619*0Sstevel@tonic-gate * dirty_good lines when busy. 3620*0Sstevel@tonic-gate */ 3621*0Sstevel@tonic-gate if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3622*0Sstevel@tonic-gate flush_clean_busy--; 3623*0Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3624*0Sstevel@tonic-gate ec_ksp->clean_good_busy_flush.value.ul++; 3625*0Sstevel@tonic-gate } else if (DGB(mpb, ec_mirror) && 3626*0Sstevel@tonic-gate (flush_dirty_busy > 0)) { 3627*0Sstevel@tonic-gate flush_dirty_busy--; 3628*0Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3629*0Sstevel@tonic-gate ec_ksp->dirty_good_busy_flush.value.ul++; 3630*0Sstevel@tonic-gate } 3631*0Sstevel@tonic-gate 3632*0Sstevel@tonic-gate if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3633*0Sstevel@tonic-gate ecache_scrub_panic)) { 3634*0Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3635*0Sstevel@tonic-gate tafsr); 3636*0Sstevel@tonic-gate } 3637*0Sstevel@tonic-gate 3638*0Sstevel@tonic-gate } else { 3639*0Sstevel@tonic-gate ec_ksp->invalid_lines.value.ul++; 3640*0Sstevel@tonic-gate } 3641*0Sstevel@tonic-gate 3642*0Sstevel@tonic-gate if ((index += ec_linesize) >= ec_size) 3643*0Sstevel@tonic-gate index = 0; 3644*0Sstevel@tonic-gate 3645*0Sstevel@tonic-gate } 3646*0Sstevel@tonic-gate 3647*0Sstevel@tonic-gate /* 3648*0Sstevel@tonic-gate * set the ecache scrub index for the next time around 3649*0Sstevel@tonic-gate */ 3650*0Sstevel@tonic-gate ssmp->ecache_flush_index = index; 3651*0Sstevel@tonic-gate 3652*0Sstevel@tonic-gate if (*acc_afsr & P_AFSR_CP) { 3653*0Sstevel@tonic-gate uint64_t ret_afsr; 3654*0Sstevel@tonic-gate 3655*0Sstevel@tonic-gate ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3656*0Sstevel@tonic-gate if ((ret_afsr & P_AFSR_CP) == 0) 3657*0Sstevel@tonic-gate *acc_afsr = 0; 3658*0Sstevel@tonic-gate } 3659*0Sstevel@tonic-gate } 3660*0Sstevel@tonic-gate 3661*0Sstevel@tonic-gate /* 3662*0Sstevel@tonic-gate * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3663*0Sstevel@tonic-gate * we decrement the outstanding request count to zero. 3664*0Sstevel@tonic-gate */ 3665*0Sstevel@tonic-gate 3666*0Sstevel@tonic-gate /*ARGSUSED*/ 3667*0Sstevel@tonic-gate uint_t 3668*0Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3669*0Sstevel@tonic-gate { 3670*0Sstevel@tonic-gate int i; 3671*0Sstevel@tonic-gate int outstanding; 3672*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3673*0Sstevel@tonic-gate uint32_t *countp = &ssmp->ec_scrub_outstanding; 3674*0Sstevel@tonic-gate 3675*0Sstevel@tonic-gate do { 3676*0Sstevel@tonic-gate outstanding = *countp; 3677*0Sstevel@tonic-gate ASSERT(outstanding > 0); 3678*0Sstevel@tonic-gate for (i = 0; i < outstanding; i++) 3679*0Sstevel@tonic-gate scrub_ecache_line(); 3680*0Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding)); 3681*0Sstevel@tonic-gate 3682*0Sstevel@tonic-gate return (DDI_INTR_CLAIMED); 3683*0Sstevel@tonic-gate } 3684*0Sstevel@tonic-gate 3685*0Sstevel@tonic-gate /* 3686*0Sstevel@tonic-gate * force each cpu to perform an ecache scrub, called from a timeout 3687*0Sstevel@tonic-gate */ 3688*0Sstevel@tonic-gate extern xcfunc_t ecache_scrubreq_tl1; 3689*0Sstevel@tonic-gate 3690*0Sstevel@tonic-gate void 3691*0Sstevel@tonic-gate do_scrub_ecache_line(void) 3692*0Sstevel@tonic-gate { 3693*0Sstevel@tonic-gate long delta; 3694*0Sstevel@tonic-gate 3695*0Sstevel@tonic-gate if (ecache_calls_a_sec > hz) 3696*0Sstevel@tonic-gate ecache_calls_a_sec = hz; 3697*0Sstevel@tonic-gate else if (ecache_calls_a_sec <= 0) 3698*0Sstevel@tonic-gate ecache_calls_a_sec = 1; 3699*0Sstevel@tonic-gate 3700*0Sstevel@tonic-gate if (ecache_calls_a_sec_mirrored > hz) 3701*0Sstevel@tonic-gate ecache_calls_a_sec_mirrored = hz; 3702*0Sstevel@tonic-gate else if (ecache_calls_a_sec_mirrored <= 0) 3703*0Sstevel@tonic-gate ecache_calls_a_sec_mirrored = 1; 3704*0Sstevel@tonic-gate 3705*0Sstevel@tonic-gate if (ecache_scrub_enable) { 3706*0Sstevel@tonic-gate xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3707*0Sstevel@tonic-gate delta = hz / ec_timeout_calls; 3708*0Sstevel@tonic-gate } else { 3709*0Sstevel@tonic-gate delta = hz; 3710*0Sstevel@tonic-gate } 3711*0Sstevel@tonic-gate 3712*0Sstevel@tonic-gate (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3713*0Sstevel@tonic-gate delta); 3714*0Sstevel@tonic-gate } 3715*0Sstevel@tonic-gate 3716*0Sstevel@tonic-gate /* 3717*0Sstevel@tonic-gate * initialization for ecache scrubbing 3718*0Sstevel@tonic-gate * This routine is called AFTER all cpus have had cpu_init_private called 3719*0Sstevel@tonic-gate * to initialize their private data areas. 3720*0Sstevel@tonic-gate */ 3721*0Sstevel@tonic-gate void 3722*0Sstevel@tonic-gate cpu_init_cache_scrub(void) 3723*0Sstevel@tonic-gate { 3724*0Sstevel@tonic-gate if (ecache_calls_a_sec > hz) { 3725*0Sstevel@tonic-gate cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3726*0Sstevel@tonic-gate "resetting to hz (%d)", ecache_calls_a_sec, hz); 3727*0Sstevel@tonic-gate ecache_calls_a_sec = hz; 3728*0Sstevel@tonic-gate } 3729*0Sstevel@tonic-gate 3730*0Sstevel@tonic-gate /* 3731*0Sstevel@tonic-gate * Register softint for ecache scrubbing. 3732*0Sstevel@tonic-gate */ 3733*0Sstevel@tonic-gate ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3734*0Sstevel@tonic-gate scrub_ecache_line_intr, NULL); 3735*0Sstevel@tonic-gate 3736*0Sstevel@tonic-gate /* 3737*0Sstevel@tonic-gate * kick off the scrubbing using realtime timeout 3738*0Sstevel@tonic-gate */ 3739*0Sstevel@tonic-gate (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3740*0Sstevel@tonic-gate hz / ecache_calls_a_sec); 3741*0Sstevel@tonic-gate } 3742*0Sstevel@tonic-gate 3743*0Sstevel@tonic-gate /* 3744*0Sstevel@tonic-gate * Unset the busy flag for this cpu. 3745*0Sstevel@tonic-gate */ 3746*0Sstevel@tonic-gate void 3747*0Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp) 3748*0Sstevel@tonic-gate { 3749*0Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 3750*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3751*0Sstevel@tonic-gate sfpr_scrub_misc); 3752*0Sstevel@tonic-gate ssmp->ecache_busy = ECACHE_CPU_IDLE; 3753*0Sstevel@tonic-gate } 3754*0Sstevel@tonic-gate } 3755*0Sstevel@tonic-gate 3756*0Sstevel@tonic-gate /* 3757*0Sstevel@tonic-gate * Set the busy flag for this cpu. 3758*0Sstevel@tonic-gate */ 3759*0Sstevel@tonic-gate void 3760*0Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp) 3761*0Sstevel@tonic-gate { 3762*0Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 3763*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3764*0Sstevel@tonic-gate sfpr_scrub_misc); 3765*0Sstevel@tonic-gate ssmp->ecache_busy = ECACHE_CPU_BUSY; 3766*0Sstevel@tonic-gate } 3767*0Sstevel@tonic-gate } 3768*0Sstevel@tonic-gate 3769*0Sstevel@tonic-gate /* 3770*0Sstevel@tonic-gate * initialize the ecache scrubber data structures 3771*0Sstevel@tonic-gate * The global entry point cpu_init_private replaces this entry point. 3772*0Sstevel@tonic-gate * 3773*0Sstevel@tonic-gate */ 3774*0Sstevel@tonic-gate static void 3775*0Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp) 3776*0Sstevel@tonic-gate { 3777*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3778*0Sstevel@tonic-gate int cpuid = cp->cpu_id; 3779*0Sstevel@tonic-gate 3780*0Sstevel@tonic-gate /* 3781*0Sstevel@tonic-gate * intialize bookkeeping for cache scrubbing 3782*0Sstevel@tonic-gate */ 3783*0Sstevel@tonic-gate bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3784*0Sstevel@tonic-gate 3785*0Sstevel@tonic-gate ssmp->ecache_flush_index = 0; 3786*0Sstevel@tonic-gate 3787*0Sstevel@tonic-gate ssmp->ecache_nlines = 3788*0Sstevel@tonic-gate cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3789*0Sstevel@tonic-gate 3790*0Sstevel@tonic-gate /* 3791*0Sstevel@tonic-gate * Determine whether we are running on mirrored SRAM 3792*0Sstevel@tonic-gate */ 3793*0Sstevel@tonic-gate 3794*0Sstevel@tonic-gate if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3795*0Sstevel@tonic-gate ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3796*0Sstevel@tonic-gate else 3797*0Sstevel@tonic-gate ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3798*0Sstevel@tonic-gate 3799*0Sstevel@tonic-gate cpu_busy_ecache_scrub(cp); 3800*0Sstevel@tonic-gate 3801*0Sstevel@tonic-gate /* 3802*0Sstevel@tonic-gate * initialize the kstats 3803*0Sstevel@tonic-gate */ 3804*0Sstevel@tonic-gate ecache_kstat_init(cp); 3805*0Sstevel@tonic-gate } 3806*0Sstevel@tonic-gate 3807*0Sstevel@tonic-gate /* 3808*0Sstevel@tonic-gate * uninitialize the ecache scrubber data structures 3809*0Sstevel@tonic-gate * The global entry point cpu_uninit_private replaces this entry point. 3810*0Sstevel@tonic-gate */ 3811*0Sstevel@tonic-gate static void 3812*0Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3813*0Sstevel@tonic-gate { 3814*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3815*0Sstevel@tonic-gate 3816*0Sstevel@tonic-gate if (ssmp->ecache_ksp != NULL) { 3817*0Sstevel@tonic-gate kstat_delete(ssmp->ecache_ksp); 3818*0Sstevel@tonic-gate ssmp->ecache_ksp = NULL; 3819*0Sstevel@tonic-gate } 3820*0Sstevel@tonic-gate 3821*0Sstevel@tonic-gate /* 3822*0Sstevel@tonic-gate * un-initialize bookkeeping for cache scrubbing 3823*0Sstevel@tonic-gate */ 3824*0Sstevel@tonic-gate bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3825*0Sstevel@tonic-gate 3826*0Sstevel@tonic-gate cpu_idle_ecache_scrub(cp); 3827*0Sstevel@tonic-gate } 3828*0Sstevel@tonic-gate 3829*0Sstevel@tonic-gate struct kmem_cache *sf_private_cache; 3830*0Sstevel@tonic-gate 3831*0Sstevel@tonic-gate /* 3832*0Sstevel@tonic-gate * Cpu private initialization. This includes allocating the cpu_private 3833*0Sstevel@tonic-gate * data structure, initializing it, and initializing the scrubber for this 3834*0Sstevel@tonic-gate * cpu. This is called once for EVERY cpu, including CPU 0. This function 3835*0Sstevel@tonic-gate * calls cpu_init_ecache_scrub_dr to init the scrubber. 3836*0Sstevel@tonic-gate * We use kmem_cache_create for the spitfire private data structure because it 3837*0Sstevel@tonic-gate * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3838*0Sstevel@tonic-gate */ 3839*0Sstevel@tonic-gate void 3840*0Sstevel@tonic-gate cpu_init_private(struct cpu *cp) 3841*0Sstevel@tonic-gate { 3842*0Sstevel@tonic-gate spitfire_private_t *sfprp; 3843*0Sstevel@tonic-gate 3844*0Sstevel@tonic-gate ASSERT(CPU_PRIVATE(cp) == NULL); 3845*0Sstevel@tonic-gate 3846*0Sstevel@tonic-gate /* 3847*0Sstevel@tonic-gate * If the sf_private_cache has not been created, create it. 3848*0Sstevel@tonic-gate */ 3849*0Sstevel@tonic-gate if (sf_private_cache == NULL) { 3850*0Sstevel@tonic-gate sf_private_cache = kmem_cache_create("sf_private_cache", 3851*0Sstevel@tonic-gate sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3852*0Sstevel@tonic-gate NULL, NULL, NULL, NULL, 0); 3853*0Sstevel@tonic-gate ASSERT(sf_private_cache); 3854*0Sstevel@tonic-gate } 3855*0Sstevel@tonic-gate 3856*0Sstevel@tonic-gate sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3857*0Sstevel@tonic-gate 3858*0Sstevel@tonic-gate bzero(sfprp, sizeof (spitfire_private_t)); 3859*0Sstevel@tonic-gate 3860*0Sstevel@tonic-gate cpu_init_ecache_scrub_dr(cp); 3861*0Sstevel@tonic-gate } 3862*0Sstevel@tonic-gate 3863*0Sstevel@tonic-gate /* 3864*0Sstevel@tonic-gate * Cpu private unitialization. Uninitialize the Ecache scrubber and 3865*0Sstevel@tonic-gate * deallocate the scrubber data structures and cpu_private data structure. 3866*0Sstevel@tonic-gate * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3867*0Sstevel@tonic-gate * the scrubber for the specified cpu. 3868*0Sstevel@tonic-gate */ 3869*0Sstevel@tonic-gate void 3870*0Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp) 3871*0Sstevel@tonic-gate { 3872*0Sstevel@tonic-gate ASSERT(CPU_PRIVATE(cp)); 3873*0Sstevel@tonic-gate 3874*0Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(cp); 3875*0Sstevel@tonic-gate kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3876*0Sstevel@tonic-gate CPU_PRIVATE(cp) = NULL; 3877*0Sstevel@tonic-gate } 3878*0Sstevel@tonic-gate 3879*0Sstevel@tonic-gate /* 3880*0Sstevel@tonic-gate * initialize the ecache kstats for each cpu 3881*0Sstevel@tonic-gate */ 3882*0Sstevel@tonic-gate static void 3883*0Sstevel@tonic-gate ecache_kstat_init(struct cpu *cp) 3884*0Sstevel@tonic-gate { 3885*0Sstevel@tonic-gate struct kstat *ksp; 3886*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3887*0Sstevel@tonic-gate 3888*0Sstevel@tonic-gate ASSERT(ssmp != NULL); 3889*0Sstevel@tonic-gate 3890*0Sstevel@tonic-gate if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3891*0Sstevel@tonic-gate KSTAT_TYPE_NAMED, 3892*0Sstevel@tonic-gate sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3893*0Sstevel@tonic-gate KSTAT_FLAG_WRITABLE)) == NULL) { 3894*0Sstevel@tonic-gate ssmp->ecache_ksp = NULL; 3895*0Sstevel@tonic-gate cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3896*0Sstevel@tonic-gate return; 3897*0Sstevel@tonic-gate } 3898*0Sstevel@tonic-gate 3899*0Sstevel@tonic-gate ssmp->ecache_ksp = ksp; 3900*0Sstevel@tonic-gate bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3901*0Sstevel@tonic-gate kstat_install(ksp); 3902*0Sstevel@tonic-gate } 3903*0Sstevel@tonic-gate 3904*0Sstevel@tonic-gate /* 3905*0Sstevel@tonic-gate * log the bad ecache information 3906*0Sstevel@tonic-gate */ 3907*0Sstevel@tonic-gate static void 3908*0Sstevel@tonic-gate ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3909*0Sstevel@tonic-gate uint64_t afsr) 3910*0Sstevel@tonic-gate { 3911*0Sstevel@tonic-gate spitf_async_flt spf_flt; 3912*0Sstevel@tonic-gate struct async_flt *aflt; 3913*0Sstevel@tonic-gate int i; 3914*0Sstevel@tonic-gate char *class; 3915*0Sstevel@tonic-gate 3916*0Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 3917*0Sstevel@tonic-gate aflt = &spf_flt.cmn_asyncflt; 3918*0Sstevel@tonic-gate 3919*0Sstevel@tonic-gate for (i = 0; i < 8; i++) { 3920*0Sstevel@tonic-gate spf_flt.flt_ec_data[i] = ec_data[i]; 3921*0Sstevel@tonic-gate } 3922*0Sstevel@tonic-gate 3923*0Sstevel@tonic-gate spf_flt.flt_ec_tag = ec_tag; 3924*0Sstevel@tonic-gate 3925*0Sstevel@tonic-gate if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3926*0Sstevel@tonic-gate spf_flt.flt_type = ec_action[mpb].ec_log_type; 3927*0Sstevel@tonic-gate } else spf_flt.flt_type = (ushort_t)mpb; 3928*0Sstevel@tonic-gate 3929*0Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 3930*0Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 3931*0Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 3932*0Sstevel@tonic-gate aflt->flt_addr = paddr; 3933*0Sstevel@tonic-gate aflt->flt_stat = afsr; 3934*0Sstevel@tonic-gate aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3935*0Sstevel@tonic-gate 3936*0Sstevel@tonic-gate switch (mpb) { 3937*0Sstevel@tonic-gate case CPU_ECACHE_TAG_ERR: 3938*0Sstevel@tonic-gate case CPU_ECACHE_ADDR_PAR_ERR: 3939*0Sstevel@tonic-gate case CPU_ECACHE_ETP_ETS_ERR: 3940*0Sstevel@tonic-gate case CPU_ECACHE_STATE_ERR: 3941*0Sstevel@tonic-gate class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3942*0Sstevel@tonic-gate break; 3943*0Sstevel@tonic-gate default: 3944*0Sstevel@tonic-gate class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3945*0Sstevel@tonic-gate break; 3946*0Sstevel@tonic-gate } 3947*0Sstevel@tonic-gate 3948*0Sstevel@tonic-gate cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3949*0Sstevel@tonic-gate ue_queue, aflt->flt_panic); 3950*0Sstevel@tonic-gate 3951*0Sstevel@tonic-gate if (aflt->flt_panic) 3952*0Sstevel@tonic-gate cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3953*0Sstevel@tonic-gate "line detected"); 3954*0Sstevel@tonic-gate } 3955*0Sstevel@tonic-gate 3956*0Sstevel@tonic-gate /* 3957*0Sstevel@tonic-gate * Process an ecache error that occured during the E$ scrubbing. 3958*0Sstevel@tonic-gate * We do the ecache scan to find the bad line, flush the bad line 3959*0Sstevel@tonic-gate * and start the memscrubber to find any UE (in memory or in another cache) 3960*0Sstevel@tonic-gate */ 3961*0Sstevel@tonic-gate static uint64_t 3962*0Sstevel@tonic-gate ecache_scrub_misc_err(int type, uint64_t afsr) 3963*0Sstevel@tonic-gate { 3964*0Sstevel@tonic-gate spitf_async_flt spf_flt; 3965*0Sstevel@tonic-gate struct async_flt *aflt; 3966*0Sstevel@tonic-gate uint64_t oafsr; 3967*0Sstevel@tonic-gate 3968*0Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 3969*0Sstevel@tonic-gate aflt = &spf_flt.cmn_asyncflt; 3970*0Sstevel@tonic-gate 3971*0Sstevel@tonic-gate /* 3972*0Sstevel@tonic-gate * Scan each line in the cache to look for the one 3973*0Sstevel@tonic-gate * with bad parity 3974*0Sstevel@tonic-gate */ 3975*0Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 3976*0Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3977*0Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3978*0Sstevel@tonic-gate 3979*0Sstevel@tonic-gate if (oafsr & P_AFSR_CP) { 3980*0Sstevel@tonic-gate uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3981*0Sstevel@tonic-gate *cp_afsr |= oafsr; 3982*0Sstevel@tonic-gate } 3983*0Sstevel@tonic-gate 3984*0Sstevel@tonic-gate /* 3985*0Sstevel@tonic-gate * If we found a bad PA, update the state to indicate if it is 3986*0Sstevel@tonic-gate * memory or I/O space. 3987*0Sstevel@tonic-gate */ 3988*0Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 3989*0Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3990*0Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 3991*0Sstevel@tonic-gate } 3992*0Sstevel@tonic-gate 3993*0Sstevel@tonic-gate spf_flt.flt_type = (ushort_t)type; 3994*0Sstevel@tonic-gate 3995*0Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 3996*0Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 3997*0Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 3998*0Sstevel@tonic-gate aflt->flt_status = afsr; 3999*0Sstevel@tonic-gate aflt->flt_panic = (uchar_t)ecache_scrub_panic; 4000*0Sstevel@tonic-gate 4001*0Sstevel@tonic-gate /* 4002*0Sstevel@tonic-gate * We have the bad line, flush that line and start 4003*0Sstevel@tonic-gate * the memscrubber. 4004*0Sstevel@tonic-gate */ 4005*0Sstevel@tonic-gate if (spf_flt.flt_ec_lcnt > 0) { 4006*0Sstevel@tonic-gate flushecacheline(P2ALIGN(aflt->flt_addr, 64), 4007*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 4008*0Sstevel@tonic-gate read_all_memscrub = 1; 4009*0Sstevel@tonic-gate memscrub_run(); 4010*0Sstevel@tonic-gate } 4011*0Sstevel@tonic-gate 4012*0Sstevel@tonic-gate cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 4013*0Sstevel@tonic-gate FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 4014*0Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 4015*0Sstevel@tonic-gate 4016*0Sstevel@tonic-gate return (oafsr); 4017*0Sstevel@tonic-gate } 4018*0Sstevel@tonic-gate 4019*0Sstevel@tonic-gate static void 4020*0Sstevel@tonic-gate ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 4021*0Sstevel@tonic-gate { 4022*0Sstevel@tonic-gate ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 4023*0Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 4024*0Sstevel@tonic-gate ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 4025*0Sstevel@tonic-gate uint64_t ec_tag, paddr, oafsr; 4026*0Sstevel@tonic-gate ec_data_t ec_data[8]; 4027*0Sstevel@tonic-gate int cpuid = CPU->cpu_id; 4028*0Sstevel@tonic-gate uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4029*0Sstevel@tonic-gate ecache_associativity; 4030*0Sstevel@tonic-gate uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4031*0Sstevel@tonic-gate 4032*0Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4033*0Sstevel@tonic-gate &oafsr, cpu_afsr); 4034*0Sstevel@tonic-gate paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4035*0Sstevel@tonic-gate (index % ec_set_size); 4036*0Sstevel@tonic-gate 4037*0Sstevel@tonic-gate /* 4038*0Sstevel@tonic-gate * E$ tag state has good parity 4039*0Sstevel@tonic-gate */ 4040*0Sstevel@tonic-gate if ((afsr_ets & cpu_ec_state_parity) == 0) { 4041*0Sstevel@tonic-gate if (afsr_ets & cpu_ec_parity) { 4042*0Sstevel@tonic-gate /* 4043*0Sstevel@tonic-gate * E$ tag state bits indicate the line is clean, 4044*0Sstevel@tonic-gate * invalidate the E$ tag and continue. 4045*0Sstevel@tonic-gate */ 4046*0Sstevel@tonic-gate if (!(state & cpu_ec_state_dirty)) { 4047*0Sstevel@tonic-gate /* 4048*0Sstevel@tonic-gate * Zero the tag and mark the state invalid 4049*0Sstevel@tonic-gate * with good parity for the tag. 4050*0Sstevel@tonic-gate */ 4051*0Sstevel@tonic-gate if (isus2i || isus2e) 4052*0Sstevel@tonic-gate write_hb_ec_tag_parity(index); 4053*0Sstevel@tonic-gate else 4054*0Sstevel@tonic-gate write_ec_tag_parity(index); 4055*0Sstevel@tonic-gate 4056*0Sstevel@tonic-gate /* Sync with the dual tag */ 4057*0Sstevel@tonic-gate flushecacheline(0, 4058*0Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 4059*0Sstevel@tonic-gate ec_ksp->tags_cleared.value.ul++; 4060*0Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4061*0Sstevel@tonic-gate CPU_ECACHE_TAG_ERR, afsr); 4062*0Sstevel@tonic-gate return; 4063*0Sstevel@tonic-gate } else { 4064*0Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4065*0Sstevel@tonic-gate CPU_ECACHE_ADDR_PAR_ERR, afsr); 4066*0Sstevel@tonic-gate cmn_err(CE_PANIC, " E$ tag address has bad" 4067*0Sstevel@tonic-gate " parity"); 4068*0Sstevel@tonic-gate } 4069*0Sstevel@tonic-gate } else if ((afsr_ets & cpu_ec_parity) == 0) { 4070*0Sstevel@tonic-gate /* 4071*0Sstevel@tonic-gate * ETS is zero but ETP is set 4072*0Sstevel@tonic-gate */ 4073*0Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4074*0Sstevel@tonic-gate CPU_ECACHE_ETP_ETS_ERR, afsr); 4075*0Sstevel@tonic-gate cmn_err(CE_PANIC, "AFSR.ETP is set and" 4076*0Sstevel@tonic-gate " AFSR.ETS is zero"); 4077*0Sstevel@tonic-gate } 4078*0Sstevel@tonic-gate } else { 4079*0Sstevel@tonic-gate /* 4080*0Sstevel@tonic-gate * E$ tag state bit has a bad parity 4081*0Sstevel@tonic-gate */ 4082*0Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4083*0Sstevel@tonic-gate CPU_ECACHE_STATE_ERR, afsr); 4084*0Sstevel@tonic-gate cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4085*0Sstevel@tonic-gate } 4086*0Sstevel@tonic-gate } 4087*0Sstevel@tonic-gate 4088*0Sstevel@tonic-gate static void 4089*0Sstevel@tonic-gate ecache_page_retire(void *arg) 4090*0Sstevel@tonic-gate { 4091*0Sstevel@tonic-gate uint64_t paddr = (uint64_t)arg; 4092*0Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t)(paddr >> MMU_PAGESHIFT)); 4093*0Sstevel@tonic-gate 4094*0Sstevel@tonic-gate if (pp) { 4095*0Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 4096*0Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_TOXIC); 4097*0Sstevel@tonic-gate } 4098*0Sstevel@tonic-gate } 4099*0Sstevel@tonic-gate 4100*0Sstevel@tonic-gate void 4101*0Sstevel@tonic-gate sticksync_slave(void) 4102*0Sstevel@tonic-gate {} 4103*0Sstevel@tonic-gate 4104*0Sstevel@tonic-gate void 4105*0Sstevel@tonic-gate sticksync_master(void) 4106*0Sstevel@tonic-gate {} 4107*0Sstevel@tonic-gate 4108*0Sstevel@tonic-gate /*ARGSUSED*/ 4109*0Sstevel@tonic-gate void 4110*0Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4111*0Sstevel@tonic-gate {} 4112*0Sstevel@tonic-gate 4113*0Sstevel@tonic-gate void 4114*0Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4115*0Sstevel@tonic-gate { 4116*0Sstevel@tonic-gate int status; 4117*0Sstevel@tonic-gate ddi_fm_error_t de; 4118*0Sstevel@tonic-gate 4119*0Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t)); 4120*0Sstevel@tonic-gate 4121*0Sstevel@tonic-gate de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4122*0Sstevel@tonic-gate FM_ENA_FMT1); 4123*0Sstevel@tonic-gate de.fme_flag = expected; 4124*0Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr; 4125*0Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4126*0Sstevel@tonic-gate 4127*0Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4128*0Sstevel@tonic-gate aflt->flt_panic = 1; 4129*0Sstevel@tonic-gate } 4130*0Sstevel@tonic-gate 4131*0Sstevel@tonic-gate /*ARGSUSED*/ 4132*0Sstevel@tonic-gate void 4133*0Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4134*0Sstevel@tonic-gate errorq_t *eqp, uint_t flag) 4135*0Sstevel@tonic-gate { 4136*0Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)payload; 4137*0Sstevel@tonic-gate 4138*0Sstevel@tonic-gate aflt->flt_erpt_class = error_class; 4139*0Sstevel@tonic-gate errorq_dispatch(eqp, payload, payload_sz, flag); 4140*0Sstevel@tonic-gate } 4141*0Sstevel@tonic-gate 4142*0Sstevel@tonic-gate #define MAX_SIMM 8 4143*0Sstevel@tonic-gate 4144*0Sstevel@tonic-gate struct ce_info { 4145*0Sstevel@tonic-gate char name[UNUM_NAMLEN]; 4146*0Sstevel@tonic-gate uint64_t intermittent_total; 4147*0Sstevel@tonic-gate uint64_t persistent_total; 4148*0Sstevel@tonic-gate uint64_t sticky_total; 4149*0Sstevel@tonic-gate unsigned short leaky_bucket_cnt; 4150*0Sstevel@tonic-gate }; 4151*0Sstevel@tonic-gate 4152*0Sstevel@tonic-gate /* 4153*0Sstevel@tonic-gate * Separately-defined structure for use in reporting the ce_info 4154*0Sstevel@tonic-gate * to SunVTS without exposing the internal layout and implementation 4155*0Sstevel@tonic-gate * of struct ce_info. 4156*0Sstevel@tonic-gate */ 4157*0Sstevel@tonic-gate static struct ecc_error_info ecc_error_info_data = { 4158*0Sstevel@tonic-gate { "version", KSTAT_DATA_UINT32 }, 4159*0Sstevel@tonic-gate { "maxcount", KSTAT_DATA_UINT32 }, 4160*0Sstevel@tonic-gate { "count", KSTAT_DATA_UINT32 } 4161*0Sstevel@tonic-gate }; 4162*0Sstevel@tonic-gate static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4163*0Sstevel@tonic-gate sizeof (struct kstat_named); 4164*0Sstevel@tonic-gate 4165*0Sstevel@tonic-gate #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4166*0Sstevel@tonic-gate #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4167*0Sstevel@tonic-gate #endif 4168*0Sstevel@tonic-gate 4169*0Sstevel@tonic-gate struct ce_info *mem_ce_simm = NULL; 4170*0Sstevel@tonic-gate size_t mem_ce_simm_size = 0; 4171*0Sstevel@tonic-gate 4172*0Sstevel@tonic-gate /* 4173*0Sstevel@tonic-gate * Default values for the number of CE's allowed per interval. 4174*0Sstevel@tonic-gate * Interval is defined in minutes 4175*0Sstevel@tonic-gate * SOFTERR_MIN_TIMEOUT is defined in microseconds 4176*0Sstevel@tonic-gate */ 4177*0Sstevel@tonic-gate #define SOFTERR_LIMIT_DEFAULT 2 4178*0Sstevel@tonic-gate #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4179*0Sstevel@tonic-gate #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4180*0Sstevel@tonic-gate #define TIMEOUT_NONE ((timeout_id_t)0) 4181*0Sstevel@tonic-gate #define TIMEOUT_SET ((timeout_id_t)1) 4182*0Sstevel@tonic-gate 4183*0Sstevel@tonic-gate /* 4184*0Sstevel@tonic-gate * timeout identifer for leaky_bucket 4185*0Sstevel@tonic-gate */ 4186*0Sstevel@tonic-gate static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4187*0Sstevel@tonic-gate 4188*0Sstevel@tonic-gate /* 4189*0Sstevel@tonic-gate * Tunables for maximum number of allowed CE's in a given time 4190*0Sstevel@tonic-gate */ 4191*0Sstevel@tonic-gate int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4192*0Sstevel@tonic-gate int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4193*0Sstevel@tonic-gate 4194*0Sstevel@tonic-gate void 4195*0Sstevel@tonic-gate cpu_mp_init(void) 4196*0Sstevel@tonic-gate { 4197*0Sstevel@tonic-gate size_t size = cpu_aflt_size(); 4198*0Sstevel@tonic-gate size_t i; 4199*0Sstevel@tonic-gate kstat_t *ksp; 4200*0Sstevel@tonic-gate 4201*0Sstevel@tonic-gate /* 4202*0Sstevel@tonic-gate * Initialize the CE error handling buffers. 4203*0Sstevel@tonic-gate */ 4204*0Sstevel@tonic-gate mem_ce_simm_size = MAX_SIMM * max_ncpus; 4205*0Sstevel@tonic-gate size = sizeof (struct ce_info) * mem_ce_simm_size; 4206*0Sstevel@tonic-gate mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4207*0Sstevel@tonic-gate 4208*0Sstevel@tonic-gate ksp = kstat_create("unix", 0, "ecc-info", "misc", 4209*0Sstevel@tonic-gate KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4210*0Sstevel@tonic-gate if (ksp != NULL) { 4211*0Sstevel@tonic-gate ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4212*0Sstevel@tonic-gate ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4213*0Sstevel@tonic-gate ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4214*0Sstevel@tonic-gate ecc_error_info_data.count.value.ui32 = 0; 4215*0Sstevel@tonic-gate kstat_install(ksp); 4216*0Sstevel@tonic-gate } 4217*0Sstevel@tonic-gate 4218*0Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 4219*0Sstevel@tonic-gate struct kstat_ecc_mm_info *kceip; 4220*0Sstevel@tonic-gate 4221*0Sstevel@tonic-gate kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4222*0Sstevel@tonic-gate KM_SLEEP); 4223*0Sstevel@tonic-gate ksp = kstat_create("mm", i, "ecc-info", "misc", 4224*0Sstevel@tonic-gate KSTAT_TYPE_NAMED, 4225*0Sstevel@tonic-gate sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4226*0Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL); 4227*0Sstevel@tonic-gate if (ksp != NULL) { 4228*0Sstevel@tonic-gate /* 4229*0Sstevel@tonic-gate * Re-declare ks_data_size to include room for the 4230*0Sstevel@tonic-gate * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4231*0Sstevel@tonic-gate * set. 4232*0Sstevel@tonic-gate */ 4233*0Sstevel@tonic-gate ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4234*0Sstevel@tonic-gate KSTAT_CE_UNUM_NAMLEN; 4235*0Sstevel@tonic-gate ksp->ks_data = kceip; 4236*0Sstevel@tonic-gate kstat_named_init(&kceip->name, 4237*0Sstevel@tonic-gate "name", KSTAT_DATA_STRING); 4238*0Sstevel@tonic-gate kstat_named_init(&kceip->intermittent_total, 4239*0Sstevel@tonic-gate "intermittent_total", KSTAT_DATA_UINT64); 4240*0Sstevel@tonic-gate kstat_named_init(&kceip->persistent_total, 4241*0Sstevel@tonic-gate "persistent_total", KSTAT_DATA_UINT64); 4242*0Sstevel@tonic-gate kstat_named_init(&kceip->sticky_total, 4243*0Sstevel@tonic-gate "sticky_total", KSTAT_DATA_UINT64); 4244*0Sstevel@tonic-gate /* 4245*0Sstevel@tonic-gate * Use the default snapshot routine as it knows how to 4246*0Sstevel@tonic-gate * deal with named kstats with long strings. 4247*0Sstevel@tonic-gate */ 4248*0Sstevel@tonic-gate ksp->ks_update = ecc_kstat_update; 4249*0Sstevel@tonic-gate kstat_install(ksp); 4250*0Sstevel@tonic-gate } else { 4251*0Sstevel@tonic-gate kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4252*0Sstevel@tonic-gate } 4253*0Sstevel@tonic-gate } 4254*0Sstevel@tonic-gate } 4255*0Sstevel@tonic-gate 4256*0Sstevel@tonic-gate /*ARGSUSED*/ 4257*0Sstevel@tonic-gate static void 4258*0Sstevel@tonic-gate leaky_bucket_timeout(void *arg) 4259*0Sstevel@tonic-gate { 4260*0Sstevel@tonic-gate int i; 4261*0Sstevel@tonic-gate struct ce_info *psimm = mem_ce_simm; 4262*0Sstevel@tonic-gate 4263*0Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 4264*0Sstevel@tonic-gate if (psimm[i].leaky_bucket_cnt > 0) 4265*0Sstevel@tonic-gate atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4266*0Sstevel@tonic-gate } 4267*0Sstevel@tonic-gate add_leaky_bucket_timeout(); 4268*0Sstevel@tonic-gate } 4269*0Sstevel@tonic-gate 4270*0Sstevel@tonic-gate static void 4271*0Sstevel@tonic-gate add_leaky_bucket_timeout(void) 4272*0Sstevel@tonic-gate { 4273*0Sstevel@tonic-gate long timeout_in_microsecs; 4274*0Sstevel@tonic-gate 4275*0Sstevel@tonic-gate /* 4276*0Sstevel@tonic-gate * create timeout for next leak. 4277*0Sstevel@tonic-gate * 4278*0Sstevel@tonic-gate * The timeout interval is calculated as follows 4279*0Sstevel@tonic-gate * 4280*0Sstevel@tonic-gate * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4281*0Sstevel@tonic-gate * 4282*0Sstevel@tonic-gate * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4283*0Sstevel@tonic-gate * in a minute), then multiply this by MICROSEC to get the interval 4284*0Sstevel@tonic-gate * in microseconds. Divide this total by ecc_softerr_limit so that 4285*0Sstevel@tonic-gate * the timeout interval is accurate to within a few microseconds. 4286*0Sstevel@tonic-gate */ 4287*0Sstevel@tonic-gate 4288*0Sstevel@tonic-gate if (ecc_softerr_limit <= 0) 4289*0Sstevel@tonic-gate ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4290*0Sstevel@tonic-gate if (ecc_softerr_interval <= 0) 4291*0Sstevel@tonic-gate ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4292*0Sstevel@tonic-gate 4293*0Sstevel@tonic-gate timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4294*0Sstevel@tonic-gate ecc_softerr_limit; 4295*0Sstevel@tonic-gate 4296*0Sstevel@tonic-gate if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4297*0Sstevel@tonic-gate timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4298*0Sstevel@tonic-gate 4299*0Sstevel@tonic-gate leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4300*0Sstevel@tonic-gate (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4301*0Sstevel@tonic-gate } 4302*0Sstevel@tonic-gate 4303*0Sstevel@tonic-gate /* 4304*0Sstevel@tonic-gate * Legacy Correctable ECC Error Hash 4305*0Sstevel@tonic-gate * 4306*0Sstevel@tonic-gate * All of the code below this comment is used to implement a legacy array 4307*0Sstevel@tonic-gate * which counted intermittent, persistent, and sticky CE errors by unum, 4308*0Sstevel@tonic-gate * and then was later extended to publish the data as a kstat for SunVTS. 4309*0Sstevel@tonic-gate * All of this code is replaced by FMA, and remains here until such time 4310*0Sstevel@tonic-gate * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4311*0Sstevel@tonic-gate * 4312*0Sstevel@tonic-gate * Errors are saved in three buckets per-unum: 4313*0Sstevel@tonic-gate * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4314*0Sstevel@tonic-gate * This could represent a problem, and is immediately printed out. 4315*0Sstevel@tonic-gate * (2) persistent - was successfully scrubbed 4316*0Sstevel@tonic-gate * These errors use the leaky bucket algorithm to determine 4317*0Sstevel@tonic-gate * if there is a serious problem. 4318*0Sstevel@tonic-gate * (3) intermittent - may have originated from the cpu or upa/safari bus, 4319*0Sstevel@tonic-gate * and does not necessarily indicate any problem with the dimm itself, 4320*0Sstevel@tonic-gate * is critical information for debugging new hardware. 4321*0Sstevel@tonic-gate * Because we do not know if it came from the dimm, it would be 4322*0Sstevel@tonic-gate * inappropriate to include these in the leaky bucket counts. 4323*0Sstevel@tonic-gate * 4324*0Sstevel@tonic-gate * If the E$ line was modified before the scrub operation began, then the 4325*0Sstevel@tonic-gate * displacement flush at the beginning of scrubphys() will cause the modified 4326*0Sstevel@tonic-gate * line to be written out, which will clean up the CE. Then, any subsequent 4327*0Sstevel@tonic-gate * read will not cause an error, which will cause persistent errors to be 4328*0Sstevel@tonic-gate * identified as intermittent. 4329*0Sstevel@tonic-gate * 4330*0Sstevel@tonic-gate * If a DIMM is going bad, it will produce true persistents as well as 4331*0Sstevel@tonic-gate * false intermittents, so these intermittents can be safely ignored. 4332*0Sstevel@tonic-gate * 4333*0Sstevel@tonic-gate * If the error count is excessive for a DIMM, this function will return 4334*0Sstevel@tonic-gate * PAGE_IS_FAILING, and the CPU module may then decide to remove that page 4335*0Sstevel@tonic-gate * from use. 4336*0Sstevel@tonic-gate */ 4337*0Sstevel@tonic-gate static int 4338*0Sstevel@tonic-gate ce_count_unum(int status, int len, char *unum) 4339*0Sstevel@tonic-gate { 4340*0Sstevel@tonic-gate int i; 4341*0Sstevel@tonic-gate struct ce_info *psimm = mem_ce_simm; 4342*0Sstevel@tonic-gate int page_status = PAGE_IS_OK; 4343*0Sstevel@tonic-gate 4344*0Sstevel@tonic-gate ASSERT(psimm != NULL); 4345*0Sstevel@tonic-gate 4346*0Sstevel@tonic-gate if (len <= 0 || 4347*0Sstevel@tonic-gate (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4348*0Sstevel@tonic-gate return (page_status); 4349*0Sstevel@tonic-gate 4350*0Sstevel@tonic-gate /* 4351*0Sstevel@tonic-gate * Initialize the leaky_bucket timeout 4352*0Sstevel@tonic-gate */ 4353*0Sstevel@tonic-gate if (casptr(&leaky_bucket_timeout_id, 4354*0Sstevel@tonic-gate TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4355*0Sstevel@tonic-gate add_leaky_bucket_timeout(); 4356*0Sstevel@tonic-gate 4357*0Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 4358*0Sstevel@tonic-gate if (psimm[i].name[0] == '\0') { 4359*0Sstevel@tonic-gate /* 4360*0Sstevel@tonic-gate * Hit the end of the valid entries, add 4361*0Sstevel@tonic-gate * a new one. 4362*0Sstevel@tonic-gate */ 4363*0Sstevel@tonic-gate (void) strncpy(psimm[i].name, unum, len); 4364*0Sstevel@tonic-gate if (status & ECC_STICKY) { 4365*0Sstevel@tonic-gate /* 4366*0Sstevel@tonic-gate * Sticky - the leaky bucket is used to track 4367*0Sstevel@tonic-gate * soft errors. Since a sticky error is a 4368*0Sstevel@tonic-gate * hard error and likely to be retired soon, 4369*0Sstevel@tonic-gate * we do not count it in the leaky bucket. 4370*0Sstevel@tonic-gate */ 4371*0Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 0; 4372*0Sstevel@tonic-gate psimm[i].intermittent_total = 0; 4373*0Sstevel@tonic-gate psimm[i].persistent_total = 0; 4374*0Sstevel@tonic-gate psimm[i].sticky_total = 1; 4375*0Sstevel@tonic-gate cmn_err(CE_WARN, 4376*0Sstevel@tonic-gate "[AFT0] Sticky Softerror encountered " 4377*0Sstevel@tonic-gate "on Memory Module %s\n", unum); 4378*0Sstevel@tonic-gate page_status = PAGE_IS_FAILING; 4379*0Sstevel@tonic-gate } else if (status & ECC_PERSISTENT) { 4380*0Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 1; 4381*0Sstevel@tonic-gate psimm[i].intermittent_total = 0; 4382*0Sstevel@tonic-gate psimm[i].persistent_total = 1; 4383*0Sstevel@tonic-gate psimm[i].sticky_total = 0; 4384*0Sstevel@tonic-gate } else { 4385*0Sstevel@tonic-gate /* 4386*0Sstevel@tonic-gate * Intermittent - Because the scrub operation 4387*0Sstevel@tonic-gate * cannot find the error in the DIMM, we will 4388*0Sstevel@tonic-gate * not count these in the leaky bucket 4389*0Sstevel@tonic-gate */ 4390*0Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 0; 4391*0Sstevel@tonic-gate psimm[i].intermittent_total = 1; 4392*0Sstevel@tonic-gate psimm[i].persistent_total = 0; 4393*0Sstevel@tonic-gate psimm[i].sticky_total = 0; 4394*0Sstevel@tonic-gate } 4395*0Sstevel@tonic-gate ecc_error_info_data.count.value.ui32++; 4396*0Sstevel@tonic-gate break; 4397*0Sstevel@tonic-gate } else if (strncmp(unum, psimm[i].name, len) == 0) { 4398*0Sstevel@tonic-gate /* 4399*0Sstevel@tonic-gate * Found an existing entry for the current 4400*0Sstevel@tonic-gate * memory module, adjust the counts. 4401*0Sstevel@tonic-gate */ 4402*0Sstevel@tonic-gate if (status & ECC_STICKY) { 4403*0Sstevel@tonic-gate psimm[i].sticky_total++; 4404*0Sstevel@tonic-gate cmn_err(CE_WARN, 4405*0Sstevel@tonic-gate "[AFT0] Sticky Softerror encountered " 4406*0Sstevel@tonic-gate "on Memory Module %s\n", unum); 4407*0Sstevel@tonic-gate page_status = PAGE_IS_FAILING; 4408*0Sstevel@tonic-gate } else if (status & ECC_PERSISTENT) { 4409*0Sstevel@tonic-gate int new_value; 4410*0Sstevel@tonic-gate 4411*0Sstevel@tonic-gate new_value = atomic_add_16_nv( 4412*0Sstevel@tonic-gate &psimm[i].leaky_bucket_cnt, 1); 4413*0Sstevel@tonic-gate psimm[i].persistent_total++; 4414*0Sstevel@tonic-gate if (new_value > ecc_softerr_limit) { 4415*0Sstevel@tonic-gate cmn_err(CE_WARN, "[AFT0] Most recent %d" 4416*0Sstevel@tonic-gate " soft errors from Memory Module" 4417*0Sstevel@tonic-gate " %s exceed threshold (N=%d," 4418*0Sstevel@tonic-gate " T=%dh:%02dm) triggering page" 4419*0Sstevel@tonic-gate " retire", new_value, unum, 4420*0Sstevel@tonic-gate ecc_softerr_limit, 4421*0Sstevel@tonic-gate ecc_softerr_interval / 60, 4422*0Sstevel@tonic-gate ecc_softerr_interval % 60); 4423*0Sstevel@tonic-gate atomic_add_16( 4424*0Sstevel@tonic-gate &psimm[i].leaky_bucket_cnt, -1); 4425*0Sstevel@tonic-gate page_status = PAGE_IS_FAILING; 4426*0Sstevel@tonic-gate } 4427*0Sstevel@tonic-gate } else { /* Intermittent */ 4428*0Sstevel@tonic-gate psimm[i].intermittent_total++; 4429*0Sstevel@tonic-gate } 4430*0Sstevel@tonic-gate break; 4431*0Sstevel@tonic-gate } 4432*0Sstevel@tonic-gate } 4433*0Sstevel@tonic-gate 4434*0Sstevel@tonic-gate if (i >= mem_ce_simm_size) 4435*0Sstevel@tonic-gate cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4436*0Sstevel@tonic-gate "space.\n"); 4437*0Sstevel@tonic-gate 4438*0Sstevel@tonic-gate return (page_status); 4439*0Sstevel@tonic-gate } 4440*0Sstevel@tonic-gate 4441*0Sstevel@tonic-gate /* 4442*0Sstevel@tonic-gate * Function to support counting of IO detected CEs. 4443*0Sstevel@tonic-gate */ 4444*0Sstevel@tonic-gate void 4445*0Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4446*0Sstevel@tonic-gate { 4447*0Sstevel@tonic-gate if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING && 4448*0Sstevel@tonic-gate automatic_page_removal) { 4449*0Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 4450*0Sstevel@tonic-gate (ecc->flt_addr >> MMU_PAGESHIFT)); 4451*0Sstevel@tonic-gate 4452*0Sstevel@tonic-gate if (pp) { 4453*0Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 4454*0Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_FAILING); 4455*0Sstevel@tonic-gate } 4456*0Sstevel@tonic-gate } 4457*0Sstevel@tonic-gate } 4458*0Sstevel@tonic-gate 4459*0Sstevel@tonic-gate static int 4460*0Sstevel@tonic-gate ecc_kstat_update(kstat_t *ksp, int rw) 4461*0Sstevel@tonic-gate { 4462*0Sstevel@tonic-gate struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4463*0Sstevel@tonic-gate struct ce_info *ceip = mem_ce_simm; 4464*0Sstevel@tonic-gate int i = ksp->ks_instance; 4465*0Sstevel@tonic-gate 4466*0Sstevel@tonic-gate if (rw == KSTAT_WRITE) 4467*0Sstevel@tonic-gate return (EACCES); 4468*0Sstevel@tonic-gate 4469*0Sstevel@tonic-gate ASSERT(ksp->ks_data != NULL); 4470*0Sstevel@tonic-gate ASSERT(i < mem_ce_simm_size && i >= 0); 4471*0Sstevel@tonic-gate 4472*0Sstevel@tonic-gate /* 4473*0Sstevel@tonic-gate * Since we're not using locks, make sure that we don't get partial 4474*0Sstevel@tonic-gate * data. The name is always copied before the counters are incremented 4475*0Sstevel@tonic-gate * so only do this update routine if at least one of the counters is 4476*0Sstevel@tonic-gate * non-zero, which ensures that ce_count_unum() is done, and the 4477*0Sstevel@tonic-gate * string is fully copied. 4478*0Sstevel@tonic-gate */ 4479*0Sstevel@tonic-gate if (ceip[i].intermittent_total == 0 && 4480*0Sstevel@tonic-gate ceip[i].persistent_total == 0 && 4481*0Sstevel@tonic-gate ceip[i].sticky_total == 0) { 4482*0Sstevel@tonic-gate /* 4483*0Sstevel@tonic-gate * Uninitialized or partially initialized. Ignore. 4484*0Sstevel@tonic-gate * The ks_data buffer was allocated via kmem_zalloc, 4485*0Sstevel@tonic-gate * so no need to bzero it. 4486*0Sstevel@tonic-gate */ 4487*0Sstevel@tonic-gate return (0); 4488*0Sstevel@tonic-gate } 4489*0Sstevel@tonic-gate 4490*0Sstevel@tonic-gate kstat_named_setstr(&kceip->name, ceip[i].name); 4491*0Sstevel@tonic-gate kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4492*0Sstevel@tonic-gate kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4493*0Sstevel@tonic-gate kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4494*0Sstevel@tonic-gate 4495*0Sstevel@tonic-gate return (0); 4496*0Sstevel@tonic-gate } 4497*0Sstevel@tonic-gate 4498*0Sstevel@tonic-gate #define VIS_BLOCKSIZE 64 4499*0Sstevel@tonic-gate 4500*0Sstevel@tonic-gate int 4501*0Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4502*0Sstevel@tonic-gate { 4503*0Sstevel@tonic-gate int ret, watched; 4504*0Sstevel@tonic-gate 4505*0Sstevel@tonic-gate watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4506*0Sstevel@tonic-gate ret = dtrace_blksuword32(addr, data, 0); 4507*0Sstevel@tonic-gate if (watched) 4508*0Sstevel@tonic-gate watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4509*0Sstevel@tonic-gate 4510*0Sstevel@tonic-gate return (ret); 4511*0Sstevel@tonic-gate } 4512*0Sstevel@tonic-gate 4513*0Sstevel@tonic-gate /*ARGSUSED*/ 4514*0Sstevel@tonic-gate void 4515*0Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp) 4516*0Sstevel@tonic-gate { 4517*0Sstevel@tonic-gate } 4518*0Sstevel@tonic-gate 4519*0Sstevel@tonic-gate /*ARGSUSED*/ 4520*0Sstevel@tonic-gate void 4521*0Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp) 4522*0Sstevel@tonic-gate { 4523*0Sstevel@tonic-gate } 4524*0Sstevel@tonic-gate 4525*0Sstevel@tonic-gate static int mmu_disable_ism_large_pages = ((1 << TTE512K) | 4526*0Sstevel@tonic-gate (1 << TTE32M) | (1 << TTE256M)); 4527*0Sstevel@tonic-gate static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); 4528*0Sstevel@tonic-gate 4529*0Sstevel@tonic-gate /* 4530*0Sstevel@tonic-gate * The function returns the US_II mmu-specific values for the 4531*0Sstevel@tonic-gate * hat's disable_large_pages and disable_ism_large_pages variables. 4532*0Sstevel@tonic-gate */ 4533*0Sstevel@tonic-gate int 4534*0Sstevel@tonic-gate mmu_large_pages_disabled(uint_t flag) 4535*0Sstevel@tonic-gate { 4536*0Sstevel@tonic-gate int pages_disable = 0; 4537*0Sstevel@tonic-gate 4538*0Sstevel@tonic-gate if (flag == HAT_LOAD) { 4539*0Sstevel@tonic-gate pages_disable = mmu_disable_large_pages; 4540*0Sstevel@tonic-gate } else if (flag == HAT_LOAD_SHARE) { 4541*0Sstevel@tonic-gate pages_disable = mmu_disable_ism_large_pages; 4542*0Sstevel@tonic-gate } 4543*0Sstevel@tonic-gate return (pages_disable); 4544*0Sstevel@tonic-gate } 4545*0Sstevel@tonic-gate 4546*0Sstevel@tonic-gate /*ARGSUSED*/ 4547*0Sstevel@tonic-gate void 4548*0Sstevel@tonic-gate mmu_init_kernel_pgsz(struct hat *hat) 4549*0Sstevel@tonic-gate { 4550*0Sstevel@tonic-gate } 4551*0Sstevel@tonic-gate 4552*0Sstevel@tonic-gate size_t 4553*0Sstevel@tonic-gate mmu_get_kernel_lpsize(size_t lpsize) 4554*0Sstevel@tonic-gate { 4555*0Sstevel@tonic-gate uint_t tte; 4556*0Sstevel@tonic-gate 4557*0Sstevel@tonic-gate if (lpsize == 0) { 4558*0Sstevel@tonic-gate /* no setting for segkmem_lpsize in /etc/system: use default */ 4559*0Sstevel@tonic-gate return (MMU_PAGESIZE4M); 4560*0Sstevel@tonic-gate } 4561*0Sstevel@tonic-gate 4562*0Sstevel@tonic-gate for (tte = TTE8K; tte <= TTE4M; tte++) { 4563*0Sstevel@tonic-gate if (lpsize == TTEBYTES(tte)) 4564*0Sstevel@tonic-gate return (lpsize); 4565*0Sstevel@tonic-gate } 4566*0Sstevel@tonic-gate 4567*0Sstevel@tonic-gate return (TTEBYTES(TTE8K)); 4568*0Sstevel@tonic-gate } 4569