10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 230Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/types.h> 300Sstevel@tonic-gate #include <sys/systm.h> 310Sstevel@tonic-gate #include <sys/archsystm.h> 320Sstevel@tonic-gate #include <sys/machparam.h> 330Sstevel@tonic-gate #include <sys/machsystm.h> 340Sstevel@tonic-gate #include <sys/cpu.h> 350Sstevel@tonic-gate #include <sys/elf_SPARC.h> 360Sstevel@tonic-gate #include <vm/hat_sfmmu.h> 370Sstevel@tonic-gate #include <vm/page.h> 380Sstevel@tonic-gate #include <sys/cpuvar.h> 390Sstevel@tonic-gate #include <sys/spitregs.h> 400Sstevel@tonic-gate #include <sys/async.h> 410Sstevel@tonic-gate #include <sys/cmn_err.h> 420Sstevel@tonic-gate #include <sys/debug.h> 430Sstevel@tonic-gate #include <sys/dditypes.h> 440Sstevel@tonic-gate #include <sys/sunddi.h> 450Sstevel@tonic-gate #include <sys/cpu_module.h> 460Sstevel@tonic-gate #include <sys/prom_debug.h> 470Sstevel@tonic-gate #include <sys/vmsystm.h> 480Sstevel@tonic-gate #include <sys/prom_plat.h> 490Sstevel@tonic-gate #include <sys/sysmacros.h> 500Sstevel@tonic-gate #include <sys/intreg.h> 510Sstevel@tonic-gate #include <sys/machtrap.h> 520Sstevel@tonic-gate #include <sys/ontrap.h> 530Sstevel@tonic-gate #include <sys/ivintr.h> 540Sstevel@tonic-gate #include <sys/atomic.h> 550Sstevel@tonic-gate #include <sys/panic.h> 560Sstevel@tonic-gate #include <sys/ndifm.h> 570Sstevel@tonic-gate #include <sys/fm/protocol.h> 580Sstevel@tonic-gate #include <sys/fm/util.h> 590Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-II.h> 600Sstevel@tonic-gate #include <sys/ddi.h> 610Sstevel@tonic-gate #include <sys/ecc_kstat.h> 620Sstevel@tonic-gate #include <sys/watchpoint.h> 630Sstevel@tonic-gate #include <sys/dtrace.h> 640Sstevel@tonic-gate #include <sys/errclassify.h> 650Sstevel@tonic-gate 660Sstevel@tonic-gate uchar_t *ctx_pgsz_array = NULL; 670Sstevel@tonic-gate 680Sstevel@tonic-gate /* 690Sstevel@tonic-gate * Structure for the 8 byte ecache data dump and the associated AFSR state. 700Sstevel@tonic-gate * There will be 8 of these structures used to dump an ecache line (64 bytes). 710Sstevel@tonic-gate */ 720Sstevel@tonic-gate typedef struct sf_ec_data_elm { 730Sstevel@tonic-gate uint64_t ec_d8; 740Sstevel@tonic-gate uint64_t ec_afsr; 750Sstevel@tonic-gate } ec_data_t; 760Sstevel@tonic-gate 770Sstevel@tonic-gate /* 780Sstevel@tonic-gate * Define spitfire (Ultra I/II) specific asynchronous error structure 790Sstevel@tonic-gate */ 800Sstevel@tonic-gate typedef struct spitfire_async_flt { 810Sstevel@tonic-gate struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 820Sstevel@tonic-gate ushort_t flt_type; /* types of faults - cpu specific */ 830Sstevel@tonic-gate ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 840Sstevel@tonic-gate uint64_t flt_ec_tag; /* E$ tag info */ 850Sstevel@tonic-gate int flt_ec_lcnt; /* number of bad E$ lines */ 860Sstevel@tonic-gate ushort_t flt_sdbh; /* UDBH reg */ 870Sstevel@tonic-gate ushort_t flt_sdbl; /* UDBL reg */ 880Sstevel@tonic-gate } spitf_async_flt; 890Sstevel@tonic-gate 900Sstevel@tonic-gate /* 910Sstevel@tonic-gate * Prototypes for support routines in spitfire_asm.s: 920Sstevel@tonic-gate */ 930Sstevel@tonic-gate extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 940Sstevel@tonic-gate extern uint64_t get_lsu(void); 950Sstevel@tonic-gate extern void set_lsu(uint64_t ncc); 960Sstevel@tonic-gate extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 970Sstevel@tonic-gate uint64_t *oafsr, uint64_t *acc_afsr); 980Sstevel@tonic-gate extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 990Sstevel@tonic-gate extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 1000Sstevel@tonic-gate uint64_t *acc_afsr); 1010Sstevel@tonic-gate extern uint64_t read_and_clear_afsr(); 1020Sstevel@tonic-gate extern void write_ec_tag_parity(uint32_t id); 1030Sstevel@tonic-gate extern void write_hb_ec_tag_parity(uint32_t id); 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate /* 1060Sstevel@tonic-gate * Spitfire module routines: 1070Sstevel@tonic-gate */ 1080Sstevel@tonic-gate static void cpu_async_log_err(void *flt); 1090Sstevel@tonic-gate /*PRINTFLIKE6*/ 1100Sstevel@tonic-gate static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 1110Sstevel@tonic-gate uint_t logflags, const char *endstr, const char *fmt, ...); 1120Sstevel@tonic-gate 1130Sstevel@tonic-gate static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 1140Sstevel@tonic-gate static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 1150Sstevel@tonic-gate static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate static void log_ce_err(struct async_flt *aflt, char *unum); 1180Sstevel@tonic-gate static void log_ue_err(struct async_flt *aflt, char *unum); 1190Sstevel@tonic-gate static void check_misc_err(spitf_async_flt *spf_flt); 1200Sstevel@tonic-gate static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 1210Sstevel@tonic-gate static int check_ecc(struct async_flt *aflt); 1220Sstevel@tonic-gate static uint_t get_cpu_status(uint64_t arg); 1230Sstevel@tonic-gate static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 1240Sstevel@tonic-gate static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 1250Sstevel@tonic-gate int *m, uint64_t *afsr); 1260Sstevel@tonic-gate static void ecache_kstat_init(struct cpu *cp); 1270Sstevel@tonic-gate static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 1280Sstevel@tonic-gate uint64_t paddr, int mpb, uint64_t); 1290Sstevel@tonic-gate static uint64_t ecache_scrub_misc_err(int, uint64_t); 1300Sstevel@tonic-gate static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 1310Sstevel@tonic-gate static void ecache_page_retire(void *); 1320Sstevel@tonic-gate static int ecc_kstat_update(kstat_t *ksp, int rw); 1330Sstevel@tonic-gate static int ce_count_unum(int status, int len, char *unum); 1340Sstevel@tonic-gate static void add_leaky_bucket_timeout(void); 1350Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd); 1360Sstevel@tonic-gate 1370Sstevel@tonic-gate extern uint_t read_all_memscrub; 1380Sstevel@tonic-gate extern void memscrub_run(void); 1390Sstevel@tonic-gate 1400Sstevel@tonic-gate static uchar_t isus2i; /* set if sabre */ 1410Sstevel@tonic-gate static uchar_t isus2e; /* set if hummingbird */ 1420Sstevel@tonic-gate 1430Sstevel@tonic-gate /* 1440Sstevel@tonic-gate * Default ecache mask and shift settings for Spitfire. If we detect a 1450Sstevel@tonic-gate * different CPU implementation, we will modify these values at boot time. 1460Sstevel@tonic-gate */ 1470Sstevel@tonic-gate static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 1480Sstevel@tonic-gate static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 1490Sstevel@tonic-gate static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 1500Sstevel@tonic-gate static int cpu_ec_par_shift = S_ECPAR_SHIFT; 1510Sstevel@tonic-gate static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 1520Sstevel@tonic-gate static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 1530Sstevel@tonic-gate static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 1540Sstevel@tonic-gate static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 1550Sstevel@tonic-gate static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 1560Sstevel@tonic-gate static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 1570Sstevel@tonic-gate 1580Sstevel@tonic-gate /* 1590Sstevel@tonic-gate * Default ecache state bits for Spitfire. These individual bits indicate if 1600Sstevel@tonic-gate * the given line is in any of the valid or modified states, respectively. 1610Sstevel@tonic-gate * Again, we modify these at boot if we detect a different CPU. 1620Sstevel@tonic-gate */ 1630Sstevel@tonic-gate static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 1640Sstevel@tonic-gate static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 1650Sstevel@tonic-gate static uchar_t cpu_ec_parity = S_EC_PARITY; 1660Sstevel@tonic-gate static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 1670Sstevel@tonic-gate 1680Sstevel@tonic-gate /* 1690Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when an ECC 1700Sstevel@tonic-gate * error occurrs. The array is indexed an 8-bit syndrome. The entries 1710Sstevel@tonic-gate * of this array have the following semantics: 1720Sstevel@tonic-gate * 1730Sstevel@tonic-gate * 00-63 The number of the bad bit, when only one bit is bad. 1740Sstevel@tonic-gate * 64 ECC bit C0 is bad. 1750Sstevel@tonic-gate * 65 ECC bit C1 is bad. 1760Sstevel@tonic-gate * 66 ECC bit C2 is bad. 1770Sstevel@tonic-gate * 67 ECC bit C3 is bad. 1780Sstevel@tonic-gate * 68 ECC bit C4 is bad. 1790Sstevel@tonic-gate * 69 ECC bit C5 is bad. 1800Sstevel@tonic-gate * 70 ECC bit C6 is bad. 1810Sstevel@tonic-gate * 71 ECC bit C7 is bad. 1820Sstevel@tonic-gate * 72 Two bits are bad. 1830Sstevel@tonic-gate * 73 Three bits are bad. 1840Sstevel@tonic-gate * 74 Four bits are bad. 1850Sstevel@tonic-gate * 75 More than Four bits are bad. 1860Sstevel@tonic-gate * 76 NO bits are bad. 1870Sstevel@tonic-gate * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 1880Sstevel@tonic-gate */ 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate #define C0 64 1910Sstevel@tonic-gate #define C1 65 1920Sstevel@tonic-gate #define C2 66 1930Sstevel@tonic-gate #define C3 67 1940Sstevel@tonic-gate #define C4 68 1950Sstevel@tonic-gate #define C5 69 1960Sstevel@tonic-gate #define C6 70 1970Sstevel@tonic-gate #define C7 71 1980Sstevel@tonic-gate #define M2 72 1990Sstevel@tonic-gate #define M3 73 2000Sstevel@tonic-gate #define M4 74 2010Sstevel@tonic-gate #define MX 75 2020Sstevel@tonic-gate #define NA 76 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 2050Sstevel@tonic-gate (synd_code < C0)) 2060Sstevel@tonic-gate #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 2070Sstevel@tonic-gate (synd_code <= C7)) 2080Sstevel@tonic-gate 2090Sstevel@tonic-gate static char ecc_syndrome_tab[] = 2100Sstevel@tonic-gate { 2110Sstevel@tonic-gate NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 2120Sstevel@tonic-gate C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 2130Sstevel@tonic-gate C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 2140Sstevel@tonic-gate M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 2150Sstevel@tonic-gate C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 2160Sstevel@tonic-gate M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 2170Sstevel@tonic-gate M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 2180Sstevel@tonic-gate M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 2190Sstevel@tonic-gate C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 2200Sstevel@tonic-gate M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 2210Sstevel@tonic-gate M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 2220Sstevel@tonic-gate M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 2230Sstevel@tonic-gate M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 2240Sstevel@tonic-gate M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 2250Sstevel@tonic-gate M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 2260Sstevel@tonic-gate M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 2270Sstevel@tonic-gate }; 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate #define SYND_TBL_SIZE 256 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate /* 2320Sstevel@tonic-gate * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 2330Sstevel@tonic-gate * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 2340Sstevel@tonic-gate */ 2350Sstevel@tonic-gate #define UDBL_REG 0x8000 2360Sstevel@tonic-gate #define UDBL(synd) ((synd & UDBL_REG) >> 15) 2370Sstevel@tonic-gate #define SYND(synd) (synd & 0x7FFF) 2380Sstevel@tonic-gate 2390Sstevel@tonic-gate /* 2400Sstevel@tonic-gate * These error types are specific to Spitfire and are used internally for the 2410Sstevel@tonic-gate * spitfire fault structure flt_type field. 2420Sstevel@tonic-gate */ 2430Sstevel@tonic-gate #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 2440Sstevel@tonic-gate #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 2450Sstevel@tonic-gate #define CPU_WP_ERR 2 /* WP parity error */ 2460Sstevel@tonic-gate #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 2470Sstevel@tonic-gate #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 2480Sstevel@tonic-gate #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 2490Sstevel@tonic-gate #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 2500Sstevel@tonic-gate #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 2510Sstevel@tonic-gate #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 2520Sstevel@tonic-gate #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 2530Sstevel@tonic-gate #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 2540Sstevel@tonic-gate #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 2550Sstevel@tonic-gate #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 2560Sstevel@tonic-gate #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 2570Sstevel@tonic-gate #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 2580Sstevel@tonic-gate #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate /* 2610Sstevel@tonic-gate * Macro to access the "Spitfire cpu private" data structure. 2620Sstevel@tonic-gate */ 2630Sstevel@tonic-gate #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate /* 2660Sstevel@tonic-gate * set to 0 to disable automatic retiring of pages on 2670Sstevel@tonic-gate * DIMMs that have excessive soft errors 2680Sstevel@tonic-gate */ 2690Sstevel@tonic-gate int automatic_page_removal = 1; 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate /* 2720Sstevel@tonic-gate * Heuristic for figuring out which module to replace. 2730Sstevel@tonic-gate * Relative likelihood that this P_SYND indicates that this module is bad. 2740Sstevel@tonic-gate * We call it a "score", though, not a relative likelihood. 2750Sstevel@tonic-gate * 2760Sstevel@tonic-gate * Step 1. 2770Sstevel@tonic-gate * Assign a score to each byte of P_SYND according to the following rules: 2780Sstevel@tonic-gate * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 2790Sstevel@tonic-gate * If one bit on, give it a 95. 2800Sstevel@tonic-gate * If seven bits on, give it a 10. 2810Sstevel@tonic-gate * If two bits on: 2820Sstevel@tonic-gate * in different nybbles, a 90 2830Sstevel@tonic-gate * in same nybble, but unaligned, 85 2840Sstevel@tonic-gate * in same nybble and as an aligned pair, 80 2850Sstevel@tonic-gate * If six bits on, look at the bits that are off: 2860Sstevel@tonic-gate * in same nybble and as an aligned pair, 15 2870Sstevel@tonic-gate * in same nybble, but unaligned, 20 2880Sstevel@tonic-gate * in different nybbles, a 25 2890Sstevel@tonic-gate * If three bits on: 2900Sstevel@tonic-gate * in diferent nybbles, no aligned pairs, 75 2910Sstevel@tonic-gate * in diferent nybbles, one aligned pair, 70 2920Sstevel@tonic-gate * in the same nybble, 65 2930Sstevel@tonic-gate * If five bits on, look at the bits that are off: 2940Sstevel@tonic-gate * in the same nybble, 30 2950Sstevel@tonic-gate * in diferent nybbles, one aligned pair, 35 2960Sstevel@tonic-gate * in diferent nybbles, no aligned pairs, 40 2970Sstevel@tonic-gate * If four bits on: 2980Sstevel@tonic-gate * all in one nybble, 45 2990Sstevel@tonic-gate * as two aligned pairs, 50 3000Sstevel@tonic-gate * one aligned pair, 55 3010Sstevel@tonic-gate * no aligned pairs, 60 3020Sstevel@tonic-gate * 3030Sstevel@tonic-gate * Step 2: 3040Sstevel@tonic-gate * Take the higher of the two scores (one for each byte) as the score 3050Sstevel@tonic-gate * for the module. 3060Sstevel@tonic-gate * 3070Sstevel@tonic-gate * Print the score for each module, and field service should replace the 3080Sstevel@tonic-gate * module with the highest score. 3090Sstevel@tonic-gate */ 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate /* 3120Sstevel@tonic-gate * In the table below, the first row/column comment indicates the 3130Sstevel@tonic-gate * number of bits on in that nybble; the second row/column comment is 3140Sstevel@tonic-gate * the hex digit. 3150Sstevel@tonic-gate */ 3160Sstevel@tonic-gate 3170Sstevel@tonic-gate static int 3180Sstevel@tonic-gate p_synd_score_table[256] = { 3190Sstevel@tonic-gate /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 3200Sstevel@tonic-gate /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 3210Sstevel@tonic-gate /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 3220Sstevel@tonic-gate /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 3230Sstevel@tonic-gate /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 3240Sstevel@tonic-gate /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 3250Sstevel@tonic-gate /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 3260Sstevel@tonic-gate /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 3270Sstevel@tonic-gate /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 3280Sstevel@tonic-gate /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 3290Sstevel@tonic-gate /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 3300Sstevel@tonic-gate /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 3310Sstevel@tonic-gate /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 3320Sstevel@tonic-gate /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 3330Sstevel@tonic-gate /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 3340Sstevel@tonic-gate /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 3350Sstevel@tonic-gate /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 3360Sstevel@tonic-gate /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 3370Sstevel@tonic-gate }; 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate int 3400Sstevel@tonic-gate ecc_psynd_score(ushort_t p_synd) 3410Sstevel@tonic-gate { 3420Sstevel@tonic-gate int i, j, a, b; 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate i = p_synd & 0xFF; 3450Sstevel@tonic-gate j = (p_synd >> 8) & 0xFF; 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate a = p_synd_score_table[i]; 3480Sstevel@tonic-gate b = p_synd_score_table[j]; 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate return (a > b ? a : b); 3510Sstevel@tonic-gate } 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate /* 3540Sstevel@tonic-gate * Async Fault Logging 3550Sstevel@tonic-gate * 3560Sstevel@tonic-gate * To ease identifying, reading, and filtering async fault log messages, the 3570Sstevel@tonic-gate * label [AFT#] is now prepended to each async fault message. These messages 3580Sstevel@tonic-gate * and the logging rules are implemented by cpu_aflt_log(), below. 3590Sstevel@tonic-gate * 3600Sstevel@tonic-gate * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 3610Sstevel@tonic-gate * This includes both corrected ECC memory and ecache faults. 3620Sstevel@tonic-gate * 3630Sstevel@tonic-gate * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 3640Sstevel@tonic-gate * else except CE errors) with a priority of 1 (highest). This tag 3650Sstevel@tonic-gate * is also used for panic messages that result from an async fault. 3660Sstevel@tonic-gate * 3670Sstevel@tonic-gate * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 3680Sstevel@tonic-gate * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 3690Sstevel@tonic-gate * of the E-$ data and tags. 3700Sstevel@tonic-gate * 3710Sstevel@tonic-gate * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 3720Sstevel@tonic-gate * printed on the console. To send all AFT logs to both the log and the 3730Sstevel@tonic-gate * console, set aft_verbose = 1. 3740Sstevel@tonic-gate */ 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 3770Sstevel@tonic-gate #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 3780Sstevel@tonic-gate #define CPU_ERRID 0x0004 /* print flt_id */ 3790Sstevel@tonic-gate #define CPU_TL 0x0008 /* print flt_tl */ 3800Sstevel@tonic-gate #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 3810Sstevel@tonic-gate #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 3820Sstevel@tonic-gate #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 3830Sstevel@tonic-gate #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 3840Sstevel@tonic-gate #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 3850Sstevel@tonic-gate #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 3860Sstevel@tonic-gate #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 3870Sstevel@tonic-gate #define CPU_FAULTPC 0x0800 /* print flt_pc */ 3880Sstevel@tonic-gate #define CPU_SYND 0x1000 /* print flt_synd and unum */ 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 3910Sstevel@tonic-gate CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 3920Sstevel@tonic-gate CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 3930Sstevel@tonic-gate CPU_FAULTPC) 3940Sstevel@tonic-gate #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 3950Sstevel@tonic-gate #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 3960Sstevel@tonic-gate ~CPU_SPACE) 3970Sstevel@tonic-gate #define PARERR_LFLAGS (CMN_LFLAGS) 3980Sstevel@tonic-gate #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 3990Sstevel@tonic-gate #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 4000Sstevel@tonic-gate ~CPU_FLTCPU & ~CPU_FAULTPC) 4010Sstevel@tonic-gate #define BERRTO_LFLAGS (CMN_LFLAGS) 4020Sstevel@tonic-gate #define NO_LFLAGS (0) 4030Sstevel@tonic-gate 4040Sstevel@tonic-gate #define AFSR_FMTSTR0 "\020\1ME" 4050Sstevel@tonic-gate #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 4060Sstevel@tonic-gate "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 4070Sstevel@tonic-gate #define UDB_FMTSTR "\020\012UE\011CE" 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate /* 4100Sstevel@tonic-gate * Maximum number of contexts for Spitfire. 4110Sstevel@tonic-gate */ 4120Sstevel@tonic-gate #define MAX_NCTXS (1 << 13) 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate /* 4150Sstevel@tonic-gate * Save the cache bootup state for use when internal 4160Sstevel@tonic-gate * caches are to be re-enabled after an error occurs. 4170Sstevel@tonic-gate */ 4180Sstevel@tonic-gate uint64_t cache_boot_state = 0; 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate /* 4210Sstevel@tonic-gate * PA[31:0] represent Displacement in UPA configuration space. 4220Sstevel@tonic-gate */ 4230Sstevel@tonic-gate uint_t root_phys_addr_lo_mask = 0xffffffff; 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate /* 4260Sstevel@tonic-gate * Spitfire legacy globals 4270Sstevel@tonic-gate */ 4280Sstevel@tonic-gate int itlb_entries; 4290Sstevel@tonic-gate int dtlb_entries; 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate void 4320Sstevel@tonic-gate cpu_setup(void) 4330Sstevel@tonic-gate { 4340Sstevel@tonic-gate extern int page_retire_messages; 435*917Selowe extern int page_retire_first_ue; 4360Sstevel@tonic-gate extern int at_flags; 4370Sstevel@tonic-gate #if defined(SF_ERRATA_57) 4380Sstevel@tonic-gate extern caddr_t errata57_limit; 4390Sstevel@tonic-gate #endif 4400Sstevel@tonic-gate extern int disable_text_largepages; 4410Sstevel@tonic-gate extern int disable_initdata_largepages; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 4440Sstevel@tonic-gate 4450Sstevel@tonic-gate at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate /* 4480Sstevel@tonic-gate * Spitfire isn't currently FMA-aware, so we have to enable the 449*917Selowe * page retirement messages. We also change the default policy 450*917Selowe * for UE retirement to allow clearing of transient errors. 4510Sstevel@tonic-gate */ 4520Sstevel@tonic-gate page_retire_messages = 1; 453*917Selowe page_retire_first_ue = 0; 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate /* 4560Sstevel@tonic-gate * save the cache bootup state. 4570Sstevel@tonic-gate */ 4580Sstevel@tonic-gate cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate /* 4610Sstevel@tonic-gate * Use the maximum number of contexts available for Spitfire unless 4620Sstevel@tonic-gate * it has been tuned for debugging. 4630Sstevel@tonic-gate * We are checking against 0 here since this value can be patched 4640Sstevel@tonic-gate * while booting. It can not be patched via /etc/system since it 4650Sstevel@tonic-gate * will be patched too late and thus cause the system to panic. 4660Sstevel@tonic-gate */ 4670Sstevel@tonic-gate if (nctxs == 0) 4680Sstevel@tonic-gate nctxs = MAX_NCTXS; 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate if (use_page_coloring) { 4710Sstevel@tonic-gate do_pg_coloring = 1; 4720Sstevel@tonic-gate if (use_virtual_coloring) 4730Sstevel@tonic-gate do_virtual_coloring = 1; 4740Sstevel@tonic-gate } 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate /* 4770Sstevel@tonic-gate * Tune pp_slots to use up to 1/8th of the tlb entries. 4780Sstevel@tonic-gate */ 4790Sstevel@tonic-gate pp_slots = MIN(8, MAXPP_SLOTS); 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate /* 4820Sstevel@tonic-gate * Block stores invalidate all pages of the d$ so pagecopy 4830Sstevel@tonic-gate * et. al. do not need virtual translations with virtual 4840Sstevel@tonic-gate * coloring taken into consideration. 4850Sstevel@tonic-gate */ 4860Sstevel@tonic-gate pp_consistent_coloring = 0; 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate isa_list = 4890Sstevel@tonic-gate "sparcv9+vis sparcv9 " 4900Sstevel@tonic-gate "sparcv8plus+vis sparcv8plus " 4910Sstevel@tonic-gate "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 4920Sstevel@tonic-gate 4930Sstevel@tonic-gate cpu_hwcap_flags = AV_SPARC_VIS; 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate /* 4960Sstevel@tonic-gate * On Spitfire, there's a hole in the address space 4970Sstevel@tonic-gate * that we must never map (the hardware only support 44-bits of 4980Sstevel@tonic-gate * virtual address). Later CPUs are expected to have wider 4990Sstevel@tonic-gate * supported address ranges. 5000Sstevel@tonic-gate * 5010Sstevel@tonic-gate * See address map on p23 of the UltraSPARC 1 user's manual. 5020Sstevel@tonic-gate */ 5030Sstevel@tonic-gate hole_start = (caddr_t)0x80000000000ull; 5040Sstevel@tonic-gate hole_end = (caddr_t)0xfffff80000000000ull; 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /* 5070Sstevel@tonic-gate * A spitfire call bug requires us to be a further 4Gbytes of 5080Sstevel@tonic-gate * firewall from the spec. 5090Sstevel@tonic-gate * 5100Sstevel@tonic-gate * See Spitfire Errata #21 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 5130Sstevel@tonic-gate hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 5140Sstevel@tonic-gate 5150Sstevel@tonic-gate /* 5160Sstevel@tonic-gate * The kpm mapping window. 5170Sstevel@tonic-gate * kpm_size: 5180Sstevel@tonic-gate * The size of a single kpm range. 5190Sstevel@tonic-gate * The overall size will be: kpm_size * vac_colors. 5200Sstevel@tonic-gate * kpm_vbase: 5210Sstevel@tonic-gate * The virtual start address of the kpm range within the kernel 5220Sstevel@tonic-gate * virtual address space. kpm_vbase has to be kpm_size aligned. 5230Sstevel@tonic-gate */ 5240Sstevel@tonic-gate kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 5250Sstevel@tonic-gate kpm_size_shift = 41; 5260Sstevel@tonic-gate kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 5270Sstevel@tonic-gate 5280Sstevel@tonic-gate #if defined(SF_ERRATA_57) 5290Sstevel@tonic-gate errata57_limit = (caddr_t)0x80000000ul; 5300Sstevel@tonic-gate #endif 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate /* 5330Sstevel@tonic-gate * Allow only 8K, 64K and 4M pages for text by default. 5340Sstevel@tonic-gate * Allow only 8K and 64K page for initialized data segments by 5350Sstevel@tonic-gate * default. 5360Sstevel@tonic-gate */ 5370Sstevel@tonic-gate disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | 5380Sstevel@tonic-gate (1 << TTE256M); 5390Sstevel@tonic-gate disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | 5400Sstevel@tonic-gate (1 << TTE32M) | (1 << TTE256M); 5410Sstevel@tonic-gate } 5420Sstevel@tonic-gate 5430Sstevel@tonic-gate static int 544789Sahrens getintprop(pnode_t node, char *name, int deflt) 5450Sstevel@tonic-gate { 5460Sstevel@tonic-gate int value; 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate switch (prom_getproplen(node, name)) { 5490Sstevel@tonic-gate case 0: 5500Sstevel@tonic-gate value = 1; /* boolean properties */ 5510Sstevel@tonic-gate break; 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate case sizeof (int): 5540Sstevel@tonic-gate (void) prom_getprop(node, name, (caddr_t)&value); 5550Sstevel@tonic-gate break; 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate default: 5580Sstevel@tonic-gate value = deflt; 5590Sstevel@tonic-gate break; 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate return (value); 5630Sstevel@tonic-gate } 5640Sstevel@tonic-gate 5650Sstevel@tonic-gate /* 5660Sstevel@tonic-gate * Set the magic constants of the implementation. 5670Sstevel@tonic-gate */ 5680Sstevel@tonic-gate void 569789Sahrens cpu_fiximp(pnode_t dnode) 5700Sstevel@tonic-gate { 5710Sstevel@tonic-gate extern int vac_size, vac_shift; 5720Sstevel@tonic-gate extern uint_t vac_mask; 5730Sstevel@tonic-gate extern int dcache_line_mask; 5740Sstevel@tonic-gate int i, a; 5750Sstevel@tonic-gate static struct { 5760Sstevel@tonic-gate char *name; 5770Sstevel@tonic-gate int *var; 5780Sstevel@tonic-gate } prop[] = { 5790Sstevel@tonic-gate "dcache-size", &dcache_size, 5800Sstevel@tonic-gate "dcache-line-size", &dcache_linesize, 5810Sstevel@tonic-gate "icache-size", &icache_size, 5820Sstevel@tonic-gate "icache-line-size", &icache_linesize, 5830Sstevel@tonic-gate "ecache-size", &ecache_size, 5840Sstevel@tonic-gate "ecache-line-size", &ecache_alignsize, 5850Sstevel@tonic-gate "ecache-associativity", &ecache_associativity, 5860Sstevel@tonic-gate "#itlb-entries", &itlb_entries, 5870Sstevel@tonic-gate "#dtlb-entries", &dtlb_entries, 5880Sstevel@tonic-gate }; 5890Sstevel@tonic-gate 5900Sstevel@tonic-gate for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 5910Sstevel@tonic-gate if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 5920Sstevel@tonic-gate *prop[i].var = a; 5930Sstevel@tonic-gate } 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate ecache_setsize = ecache_size / ecache_associativity; 5970Sstevel@tonic-gate 5980Sstevel@tonic-gate vac_size = S_VAC_SIZE; 5990Sstevel@tonic-gate vac_mask = MMU_PAGEMASK & (vac_size - 1); 6000Sstevel@tonic-gate i = 0; a = vac_size; 6010Sstevel@tonic-gate while (a >>= 1) 6020Sstevel@tonic-gate ++i; 6030Sstevel@tonic-gate vac_shift = i; 6040Sstevel@tonic-gate shm_alignment = vac_size; 6050Sstevel@tonic-gate vac = 1; 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 6080Sstevel@tonic-gate 6090Sstevel@tonic-gate /* 6100Sstevel@tonic-gate * UltraSPARC I & II have ecache sizes running 6110Sstevel@tonic-gate * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 6120Sstevel@tonic-gate * and 8 MB. Adjust the copyin/copyout limits 6130Sstevel@tonic-gate * according to the cache size. The magic number 6140Sstevel@tonic-gate * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 6150Sstevel@tonic-gate * and its floor of VIS_COPY_THRESHOLD bytes before it will use 6160Sstevel@tonic-gate * VIS instructions. 6170Sstevel@tonic-gate * 6180Sstevel@tonic-gate * We assume that all CPUs on the system have the same size 6190Sstevel@tonic-gate * ecache. We're also called very early in the game. 6200Sstevel@tonic-gate * /etc/system will be parsed *after* we're called so 6210Sstevel@tonic-gate * these values can be overwritten. 6220Sstevel@tonic-gate */ 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate hw_copy_limit_1 = VIS_COPY_THRESHOLD; 6250Sstevel@tonic-gate if (ecache_size <= 524288) { 6260Sstevel@tonic-gate hw_copy_limit_2 = VIS_COPY_THRESHOLD; 6270Sstevel@tonic-gate hw_copy_limit_4 = VIS_COPY_THRESHOLD; 6280Sstevel@tonic-gate hw_copy_limit_8 = VIS_COPY_THRESHOLD; 6290Sstevel@tonic-gate } else if (ecache_size == 1048576) { 6300Sstevel@tonic-gate hw_copy_limit_2 = 1024; 6310Sstevel@tonic-gate hw_copy_limit_4 = 1280; 6320Sstevel@tonic-gate hw_copy_limit_8 = 1536; 6330Sstevel@tonic-gate } else if (ecache_size == 2097152) { 6340Sstevel@tonic-gate hw_copy_limit_2 = 1536; 6350Sstevel@tonic-gate hw_copy_limit_4 = 2048; 6360Sstevel@tonic-gate hw_copy_limit_8 = 2560; 6370Sstevel@tonic-gate } else if (ecache_size == 4194304) { 6380Sstevel@tonic-gate hw_copy_limit_2 = 2048; 6390Sstevel@tonic-gate hw_copy_limit_4 = 2560; 6400Sstevel@tonic-gate hw_copy_limit_8 = 3072; 6410Sstevel@tonic-gate } else { 6420Sstevel@tonic-gate hw_copy_limit_2 = 2560; 6430Sstevel@tonic-gate hw_copy_limit_4 = 3072; 6440Sstevel@tonic-gate hw_copy_limit_8 = 3584; 6450Sstevel@tonic-gate } 6460Sstevel@tonic-gate } 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate /* 6490Sstevel@tonic-gate * Called by setcpudelay 6500Sstevel@tonic-gate */ 6510Sstevel@tonic-gate void 6520Sstevel@tonic-gate cpu_init_tick_freq(void) 6530Sstevel@tonic-gate { 6540Sstevel@tonic-gate /* 6550Sstevel@tonic-gate * Determine the cpu frequency by calling 6560Sstevel@tonic-gate * tod_get_cpufrequency. Use an approximate freqency 6570Sstevel@tonic-gate * value computed by the prom if the tod module 6580Sstevel@tonic-gate * is not initialized and loaded yet. 6590Sstevel@tonic-gate */ 6600Sstevel@tonic-gate if (tod_ops.tod_get_cpufrequency != NULL) { 6610Sstevel@tonic-gate mutex_enter(&tod_lock); 6620Sstevel@tonic-gate sys_tick_freq = tod_ops.tod_get_cpufrequency(); 6630Sstevel@tonic-gate mutex_exit(&tod_lock); 6640Sstevel@tonic-gate } else { 6650Sstevel@tonic-gate #if defined(HUMMINGBIRD) 6660Sstevel@tonic-gate /* 6670Sstevel@tonic-gate * the hummingbird version of %stick is used as the basis for 6680Sstevel@tonic-gate * low level timing; this provides an independent constant-rate 6690Sstevel@tonic-gate * clock for general system use, and frees power mgmt to set 6700Sstevel@tonic-gate * various cpu clock speeds. 6710Sstevel@tonic-gate */ 6720Sstevel@tonic-gate if (system_clock_freq == 0) 6730Sstevel@tonic-gate cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 6740Sstevel@tonic-gate system_clock_freq); 6750Sstevel@tonic-gate sys_tick_freq = system_clock_freq; 6760Sstevel@tonic-gate #else /* SPITFIRE */ 6770Sstevel@tonic-gate sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 6780Sstevel@tonic-gate #endif 6790Sstevel@tonic-gate } 6800Sstevel@tonic-gate } 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate 6830Sstevel@tonic-gate void shipit(int upaid); 6840Sstevel@tonic-gate extern uint64_t xc_tick_limit; 6850Sstevel@tonic-gate extern uint64_t xc_tick_jump_limit; 6860Sstevel@tonic-gate 6870Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 6880Sstevel@tonic-gate uint64_t x_early[NCPU][64]; 6890Sstevel@tonic-gate #endif 6900Sstevel@tonic-gate 6910Sstevel@tonic-gate /* 6920Sstevel@tonic-gate * Note: A version of this function is used by the debugger via the KDI, 6930Sstevel@tonic-gate * and must be kept in sync with this version. Any changes made to this 6940Sstevel@tonic-gate * function to support new chips or to accomodate errata must also be included 6950Sstevel@tonic-gate * in the KDI-specific version. See spitfire_kdi.c. 6960Sstevel@tonic-gate */ 6970Sstevel@tonic-gate void 6980Sstevel@tonic-gate send_one_mondo(int cpuid) 6990Sstevel@tonic-gate { 7000Sstevel@tonic-gate uint64_t idsr, starttick, endtick; 7010Sstevel@tonic-gate int upaid, busy, nack; 7020Sstevel@tonic-gate uint64_t tick, tick_prev; 7030Sstevel@tonic-gate ulong_t ticks; 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 7060Sstevel@tonic-gate upaid = CPUID_TO_UPAID(cpuid); 7070Sstevel@tonic-gate tick = starttick = gettick(); 7080Sstevel@tonic-gate shipit(upaid); 7090Sstevel@tonic-gate endtick = starttick + xc_tick_limit; 7100Sstevel@tonic-gate busy = nack = 0; 7110Sstevel@tonic-gate for (;;) { 7120Sstevel@tonic-gate idsr = getidsr(); 7130Sstevel@tonic-gate if (idsr == 0) 7140Sstevel@tonic-gate break; 7150Sstevel@tonic-gate /* 7160Sstevel@tonic-gate * When we detect an irregular tick jump, we adjust 7170Sstevel@tonic-gate * the timer window to the current tick value. 7180Sstevel@tonic-gate */ 7190Sstevel@tonic-gate tick_prev = tick; 7200Sstevel@tonic-gate tick = gettick(); 7210Sstevel@tonic-gate ticks = tick - tick_prev; 7220Sstevel@tonic-gate if (ticks > xc_tick_jump_limit) { 7230Sstevel@tonic-gate endtick = tick + xc_tick_limit; 7240Sstevel@tonic-gate } else if (tick > endtick) { 7250Sstevel@tonic-gate if (panic_quiesce) 7260Sstevel@tonic-gate return; 7270Sstevel@tonic-gate cmn_err(CE_PANIC, 7280Sstevel@tonic-gate "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 7290Sstevel@tonic-gate upaid, nack, busy); 7300Sstevel@tonic-gate } 7310Sstevel@tonic-gate if (idsr & IDSR_BUSY) { 7320Sstevel@tonic-gate busy++; 7330Sstevel@tonic-gate continue; 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate drv_usecwait(1); 7360Sstevel@tonic-gate shipit(upaid); 7370Sstevel@tonic-gate nack++; 7380Sstevel@tonic-gate busy = 0; 7390Sstevel@tonic-gate } 7400Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 7410Sstevel@tonic-gate x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 7420Sstevel@tonic-gate #endif 7430Sstevel@tonic-gate } 7440Sstevel@tonic-gate 7450Sstevel@tonic-gate void 7460Sstevel@tonic-gate send_mondo_set(cpuset_t set) 7470Sstevel@tonic-gate { 7480Sstevel@tonic-gate int i; 7490Sstevel@tonic-gate 7500Sstevel@tonic-gate for (i = 0; i < NCPU; i++) 7510Sstevel@tonic-gate if (CPU_IN_SET(set, i)) { 7520Sstevel@tonic-gate send_one_mondo(i); 7530Sstevel@tonic-gate CPUSET_DEL(set, i); 7540Sstevel@tonic-gate if (CPUSET_ISNULL(set)) 7550Sstevel@tonic-gate break; 7560Sstevel@tonic-gate } 7570Sstevel@tonic-gate } 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate void 7600Sstevel@tonic-gate syncfpu(void) 7610Sstevel@tonic-gate { 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate 7640Sstevel@tonic-gate /* 7650Sstevel@tonic-gate * Determine the size of the CPU module's error structure in bytes. This is 7660Sstevel@tonic-gate * called once during boot to initialize the error queues. 7670Sstevel@tonic-gate */ 7680Sstevel@tonic-gate int 7690Sstevel@tonic-gate cpu_aflt_size(void) 7700Sstevel@tonic-gate { 7710Sstevel@tonic-gate /* 7720Sstevel@tonic-gate * We need to determine whether this is a sabre, Hummingbird or a 7730Sstevel@tonic-gate * Spitfire/Blackbird impl and set the appropriate state variables for 7740Sstevel@tonic-gate * ecache tag manipulation. We can't do this in cpu_setup() as it is 7750Sstevel@tonic-gate * too early in the boot flow and the cpunodes are not initialized. 7760Sstevel@tonic-gate * This routine will be called once after cpunodes[] is ready, so do 7770Sstevel@tonic-gate * it here. 7780Sstevel@tonic-gate */ 7790Sstevel@tonic-gate if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 7800Sstevel@tonic-gate isus2i = 1; 7810Sstevel@tonic-gate cpu_ec_tag_mask = SB_ECTAG_MASK; 7820Sstevel@tonic-gate cpu_ec_state_mask = SB_ECSTATE_MASK; 7830Sstevel@tonic-gate cpu_ec_par_mask = SB_ECPAR_MASK; 7840Sstevel@tonic-gate cpu_ec_par_shift = SB_ECPAR_SHIFT; 7850Sstevel@tonic-gate cpu_ec_tag_shift = SB_ECTAG_SHIFT; 7860Sstevel@tonic-gate cpu_ec_state_shift = SB_ECSTATE_SHIFT; 7870Sstevel@tonic-gate cpu_ec_state_exl = SB_ECSTATE_EXL; 7880Sstevel@tonic-gate cpu_ec_state_mod = SB_ECSTATE_MOD; 7890Sstevel@tonic-gate 7900Sstevel@tonic-gate /* These states do not exist in sabre - set to 0xFF */ 7910Sstevel@tonic-gate cpu_ec_state_shr = 0xFF; 7920Sstevel@tonic-gate cpu_ec_state_own = 0xFF; 7930Sstevel@tonic-gate 7940Sstevel@tonic-gate cpu_ec_state_valid = SB_ECSTATE_VALID; 7950Sstevel@tonic-gate cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 7960Sstevel@tonic-gate cpu_ec_state_parity = SB_ECSTATE_PARITY; 7970Sstevel@tonic-gate cpu_ec_parity = SB_EC_PARITY; 7980Sstevel@tonic-gate } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 7990Sstevel@tonic-gate isus2e = 1; 8000Sstevel@tonic-gate cpu_ec_tag_mask = HB_ECTAG_MASK; 8010Sstevel@tonic-gate cpu_ec_state_mask = HB_ECSTATE_MASK; 8020Sstevel@tonic-gate cpu_ec_par_mask = HB_ECPAR_MASK; 8030Sstevel@tonic-gate cpu_ec_par_shift = HB_ECPAR_SHIFT; 8040Sstevel@tonic-gate cpu_ec_tag_shift = HB_ECTAG_SHIFT; 8050Sstevel@tonic-gate cpu_ec_state_shift = HB_ECSTATE_SHIFT; 8060Sstevel@tonic-gate cpu_ec_state_exl = HB_ECSTATE_EXL; 8070Sstevel@tonic-gate cpu_ec_state_mod = HB_ECSTATE_MOD; 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate /* These states do not exist in hummingbird - set to 0xFF */ 8100Sstevel@tonic-gate cpu_ec_state_shr = 0xFF; 8110Sstevel@tonic-gate cpu_ec_state_own = 0xFF; 8120Sstevel@tonic-gate 8130Sstevel@tonic-gate cpu_ec_state_valid = HB_ECSTATE_VALID; 8140Sstevel@tonic-gate cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 8150Sstevel@tonic-gate cpu_ec_state_parity = HB_ECSTATE_PARITY; 8160Sstevel@tonic-gate cpu_ec_parity = HB_EC_PARITY; 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate return (sizeof (spitf_async_flt)); 8200Sstevel@tonic-gate } 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate 8230Sstevel@tonic-gate /* 8240Sstevel@tonic-gate * Correctable ecc error trap handler 8250Sstevel@tonic-gate */ 8260Sstevel@tonic-gate /*ARGSUSED*/ 8270Sstevel@tonic-gate void 8280Sstevel@tonic-gate cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 8290Sstevel@tonic-gate uint_t p_afsr_high, uint_t p_afar_high) 8300Sstevel@tonic-gate { 8310Sstevel@tonic-gate ushort_t sdbh, sdbl; 8320Sstevel@tonic-gate ushort_t e_syndh, e_syndl; 8330Sstevel@tonic-gate spitf_async_flt spf_flt; 8340Sstevel@tonic-gate struct async_flt *ecc; 8350Sstevel@tonic-gate int queue = 1; 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate uint64_t t_afar = p_afar; 8380Sstevel@tonic-gate uint64_t t_afsr = p_afsr; 8390Sstevel@tonic-gate 8400Sstevel@tonic-gate /* 8410Sstevel@tonic-gate * Note: the Spitfire data buffer error registers 8420Sstevel@tonic-gate * (upper and lower halves) are or'ed into the upper 8430Sstevel@tonic-gate * word of the afsr by ce_err(). 8440Sstevel@tonic-gate */ 8450Sstevel@tonic-gate sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 8460Sstevel@tonic-gate sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 8470Sstevel@tonic-gate 8480Sstevel@tonic-gate e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 8490Sstevel@tonic-gate e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 8500Sstevel@tonic-gate 8510Sstevel@tonic-gate t_afsr &= S_AFSR_MASK; 8520Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 8530Sstevel@tonic-gate 8540Sstevel@tonic-gate /* Setup the async fault structure */ 8550Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 8560Sstevel@tonic-gate ecc = (struct async_flt *)&spf_flt; 8570Sstevel@tonic-gate ecc->flt_id = gethrtime_waitfree(); 8580Sstevel@tonic-gate ecc->flt_stat = t_afsr; 8590Sstevel@tonic-gate ecc->flt_addr = t_afar; 8600Sstevel@tonic-gate ecc->flt_status = ECC_C_TRAP; 8610Sstevel@tonic-gate ecc->flt_bus_id = getprocessorid(); 8620Sstevel@tonic-gate ecc->flt_inst = CPU->cpu_id; 8630Sstevel@tonic-gate ecc->flt_pc = (caddr_t)rp->r_pc; 8640Sstevel@tonic-gate ecc->flt_func = log_ce_err; 8650Sstevel@tonic-gate ecc->flt_in_memory = 8660Sstevel@tonic-gate (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 8670Sstevel@tonic-gate spf_flt.flt_sdbh = sdbh; 8680Sstevel@tonic-gate spf_flt.flt_sdbl = sdbl; 8690Sstevel@tonic-gate 8700Sstevel@tonic-gate /* 8710Sstevel@tonic-gate * Check for fatal conditions. 8720Sstevel@tonic-gate */ 8730Sstevel@tonic-gate check_misc_err(&spf_flt); 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate /* 8760Sstevel@tonic-gate * Pananoid checks for valid AFSR and UDBs 8770Sstevel@tonic-gate */ 8780Sstevel@tonic-gate if ((t_afsr & P_AFSR_CE) == 0) { 8790Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 8800Sstevel@tonic-gate "** Panic due to CE bit not set in the AFSR", 8810Sstevel@tonic-gate " Corrected Memory Error on"); 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate 8840Sstevel@tonic-gate /* 8850Sstevel@tonic-gate * We want to skip logging only if ALL the following 8860Sstevel@tonic-gate * conditions are true: 8870Sstevel@tonic-gate * 8880Sstevel@tonic-gate * 1. There is only one error 8890Sstevel@tonic-gate * 2. That error is a correctable memory error 8900Sstevel@tonic-gate * 3. The error is caused by the memory scrubber (in which case 8910Sstevel@tonic-gate * the error will have occurred under on_trap protection) 8920Sstevel@tonic-gate * 4. The error is on a retired page 8930Sstevel@tonic-gate * 8940Sstevel@tonic-gate * Note: OT_DATA_EC is used places other than the memory scrubber. 8950Sstevel@tonic-gate * However, none of those errors should occur on a retired page. 8960Sstevel@tonic-gate */ 8970Sstevel@tonic-gate if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 8980Sstevel@tonic-gate curthread->t_ontrap != NULL) { 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 901*917Selowe if (page_retire_check(ecc->flt_addr, NULL) == 0) { 9020Sstevel@tonic-gate queue = 0; 9030Sstevel@tonic-gate } 9040Sstevel@tonic-gate } 9050Sstevel@tonic-gate } 9060Sstevel@tonic-gate 9070Sstevel@tonic-gate if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 9080Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 9090Sstevel@tonic-gate "** Panic due to CE bits not set in the UDBs", 9100Sstevel@tonic-gate " Corrected Memory Error on"); 9110Sstevel@tonic-gate } 9120Sstevel@tonic-gate 9130Sstevel@tonic-gate if ((sdbh >> 8) & 1) { 9140Sstevel@tonic-gate ecc->flt_synd = e_syndh; 9150Sstevel@tonic-gate ce_scrub(ecc); 9160Sstevel@tonic-gate if (queue) { 9170Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 9180Sstevel@tonic-gate sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate } 9210Sstevel@tonic-gate 9220Sstevel@tonic-gate if ((sdbl >> 8) & 1) { 9230Sstevel@tonic-gate ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 9240Sstevel@tonic-gate ecc->flt_synd = e_syndl | UDBL_REG; 9250Sstevel@tonic-gate ce_scrub(ecc); 9260Sstevel@tonic-gate if (queue) { 9270Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 9280Sstevel@tonic-gate sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate } 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate /* 9330Sstevel@tonic-gate * Re-enable all error trapping (CEEN currently cleared). 9340Sstevel@tonic-gate */ 9350Sstevel@tonic-gate clr_datapath(); 9360Sstevel@tonic-gate set_asyncflt(P_AFSR_CE); 9370Sstevel@tonic-gate set_error_enable(EER_ENABLE); 9380Sstevel@tonic-gate } 9390Sstevel@tonic-gate 9400Sstevel@tonic-gate /* 9410Sstevel@tonic-gate * Cpu specific CE logging routine 9420Sstevel@tonic-gate */ 9430Sstevel@tonic-gate static void 9440Sstevel@tonic-gate log_ce_err(struct async_flt *aflt, char *unum) 9450Sstevel@tonic-gate { 9460Sstevel@tonic-gate spitf_async_flt spf_flt; 9470Sstevel@tonic-gate 9480Sstevel@tonic-gate if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 9490Sstevel@tonic-gate return; 9500Sstevel@tonic-gate } 9510Sstevel@tonic-gate 9520Sstevel@tonic-gate spf_flt.cmn_asyncflt = *aflt; 9530Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 9540Sstevel@tonic-gate " Corrected Memory Error detected by"); 9550Sstevel@tonic-gate } 9560Sstevel@tonic-gate 9570Sstevel@tonic-gate /* 9580Sstevel@tonic-gate * Spitfire does not perform any further CE classification refinement 9590Sstevel@tonic-gate */ 9600Sstevel@tonic-gate /*ARGSUSED*/ 9610Sstevel@tonic-gate int 9620Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 9630Sstevel@tonic-gate size_t afltoffset) 9640Sstevel@tonic-gate { 9650Sstevel@tonic-gate return (0); 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate 9680Sstevel@tonic-gate char * 9690Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt) 9700Sstevel@tonic-gate { 9710Sstevel@tonic-gate if (aflt->flt_status & ECC_INTERMITTENT) 9720Sstevel@tonic-gate return (ERR_TYPE_DESC_INTERMITTENT); 9730Sstevel@tonic-gate if (aflt->flt_status & ECC_PERSISTENT) 9740Sstevel@tonic-gate return (ERR_TYPE_DESC_PERSISTENT); 9750Sstevel@tonic-gate if (aflt->flt_status & ECC_STICKY) 9760Sstevel@tonic-gate return (ERR_TYPE_DESC_STICKY); 9770Sstevel@tonic-gate return (ERR_TYPE_DESC_UNKNOWN); 9780Sstevel@tonic-gate } 9790Sstevel@tonic-gate 9800Sstevel@tonic-gate /* 9810Sstevel@tonic-gate * Called by correctable ecc error logging code to print out 9820Sstevel@tonic-gate * the stick/persistent/intermittent status of the error. 9830Sstevel@tonic-gate */ 9840Sstevel@tonic-gate static void 9850Sstevel@tonic-gate cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 9860Sstevel@tonic-gate { 9870Sstevel@tonic-gate ushort_t status; 9880Sstevel@tonic-gate char *status1_str = "Memory"; 9890Sstevel@tonic-gate char *status2_str = "Intermittent"; 9900Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 9910Sstevel@tonic-gate 9920Sstevel@tonic-gate status = aflt->flt_status; 9930Sstevel@tonic-gate 9940Sstevel@tonic-gate if (status & ECC_ECACHE) 9950Sstevel@tonic-gate status1_str = "Ecache"; 9960Sstevel@tonic-gate 9970Sstevel@tonic-gate if (status & ECC_STICKY) 9980Sstevel@tonic-gate status2_str = "Sticky"; 9990Sstevel@tonic-gate else if (status & ECC_PERSISTENT) 10000Sstevel@tonic-gate status2_str = "Persistent"; 10010Sstevel@tonic-gate 10020Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 10030Sstevel@tonic-gate NULL, " Corrected %s Error on %s is %s", 10040Sstevel@tonic-gate status1_str, unum, status2_str); 10050Sstevel@tonic-gate } 10060Sstevel@tonic-gate 10070Sstevel@tonic-gate /* 10080Sstevel@tonic-gate * check for a valid ce syndrome, then call the 10090Sstevel@tonic-gate * displacement flush scrubbing code, and then check the afsr to see if 10100Sstevel@tonic-gate * the error was persistent or intermittent. Reread the afar/afsr to see 10110Sstevel@tonic-gate * if the error was not scrubbed successfully, and is therefore sticky. 10120Sstevel@tonic-gate */ 10130Sstevel@tonic-gate /*ARGSUSED1*/ 10140Sstevel@tonic-gate void 10150Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 10160Sstevel@tonic-gate { 10170Sstevel@tonic-gate uint64_t eer, afsr; 10180Sstevel@tonic-gate ushort_t status; 10190Sstevel@tonic-gate 10200Sstevel@tonic-gate ASSERT(getpil() > LOCK_LEVEL); 10210Sstevel@tonic-gate 10220Sstevel@tonic-gate /* 10230Sstevel@tonic-gate * It is possible that the flt_addr is not a valid 10240Sstevel@tonic-gate * physical address. To deal with this, we disable 10250Sstevel@tonic-gate * NCEEN while we scrub that address. If this causes 10260Sstevel@tonic-gate * a TIMEOUT/BERR, we know this is an invalid 10270Sstevel@tonic-gate * memory location. 10280Sstevel@tonic-gate */ 10290Sstevel@tonic-gate kpreempt_disable(); 10300Sstevel@tonic-gate eer = get_error_enable(); 10310Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 10320Sstevel@tonic-gate set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 10330Sstevel@tonic-gate 10340Sstevel@tonic-gate /* 10350Sstevel@tonic-gate * To check if the error detected by IO is persistent, sticky or 10360Sstevel@tonic-gate * intermittent. 10370Sstevel@tonic-gate */ 10380Sstevel@tonic-gate if (ecc->flt_status & ECC_IOBUS) { 10390Sstevel@tonic-gate ecc->flt_stat = P_AFSR_CE; 10400Sstevel@tonic-gate } 10410Sstevel@tonic-gate 10420Sstevel@tonic-gate scrubphys(P2ALIGN(ecc->flt_addr, 64), 10430Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 10440Sstevel@tonic-gate 10450Sstevel@tonic-gate get_asyncflt(&afsr); 10460Sstevel@tonic-gate if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 10470Sstevel@tonic-gate /* 10480Sstevel@tonic-gate * Must ensure that we don't get the TIMEOUT/BERR 10490Sstevel@tonic-gate * when we reenable NCEEN, so we clear the AFSR. 10500Sstevel@tonic-gate */ 10510Sstevel@tonic-gate set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 10520Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 10530Sstevel@tonic-gate set_error_enable(eer); 10540Sstevel@tonic-gate kpreempt_enable(); 10550Sstevel@tonic-gate return; 10560Sstevel@tonic-gate } 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate if (eer & EER_NCEEN) 10590Sstevel@tonic-gate set_error_enable(eer & ~EER_CEEN); 10600Sstevel@tonic-gate 10610Sstevel@tonic-gate /* 10620Sstevel@tonic-gate * Check and clear any ECC errors from the scrub. If the scrub did 10630Sstevel@tonic-gate * not trip over the error, mark it intermittent. If the scrub did 10640Sstevel@tonic-gate * trip the error again and it did not scrub away, mark it sticky. 10650Sstevel@tonic-gate * Otherwise mark it persistent. 10660Sstevel@tonic-gate */ 10670Sstevel@tonic-gate if (check_ecc(ecc) != 0) { 10680Sstevel@tonic-gate cpu_read_paddr(ecc, 0, 1); 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate if (check_ecc(ecc) != 0) 10710Sstevel@tonic-gate status = ECC_STICKY; 10720Sstevel@tonic-gate else 10730Sstevel@tonic-gate status = ECC_PERSISTENT; 10740Sstevel@tonic-gate } else 10750Sstevel@tonic-gate status = ECC_INTERMITTENT; 10760Sstevel@tonic-gate 10770Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 10780Sstevel@tonic-gate set_error_enable(eer); 10790Sstevel@tonic-gate kpreempt_enable(); 10800Sstevel@tonic-gate 10810Sstevel@tonic-gate ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 10820Sstevel@tonic-gate ecc->flt_status |= status; 10830Sstevel@tonic-gate } 10840Sstevel@tonic-gate 10850Sstevel@tonic-gate /* 10860Sstevel@tonic-gate * get the syndrome and unum, and then call the routines 10870Sstevel@tonic-gate * to check the other cpus and iobuses, and then do the error logging. 10880Sstevel@tonic-gate */ 10890Sstevel@tonic-gate /*ARGSUSED1*/ 10900Sstevel@tonic-gate void 10910Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 10920Sstevel@tonic-gate { 10930Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 10940Sstevel@tonic-gate int len = 0; 10950Sstevel@tonic-gate int ce_verbose = 0; 1096*917Selowe int err; 10970Sstevel@tonic-gate 10980Sstevel@tonic-gate ASSERT(ecc->flt_func != NULL); 10990Sstevel@tonic-gate 11000Sstevel@tonic-gate /* Get the unum string for logging purposes */ 11010Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 11020Sstevel@tonic-gate UNUM_NAMLEN, &len); 11030Sstevel@tonic-gate 11040Sstevel@tonic-gate /* Call specific error logging routine */ 11050Sstevel@tonic-gate (void) (*ecc->flt_func)(ecc, unum); 11060Sstevel@tonic-gate 11070Sstevel@tonic-gate /* 11080Sstevel@tonic-gate * Count errors per unum. 11090Sstevel@tonic-gate * Non-memory errors are all counted via a special unum string. 11100Sstevel@tonic-gate */ 1111*917Selowe if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK && 11120Sstevel@tonic-gate automatic_page_removal) { 1113*917Selowe (void) page_retire(ecc->flt_addr, err); 11140Sstevel@tonic-gate } 11150Sstevel@tonic-gate 11160Sstevel@tonic-gate if (ecc->flt_panic) { 11170Sstevel@tonic-gate ce_verbose = 1; 11180Sstevel@tonic-gate } else if ((ecc->flt_class == BUS_FAULT) || 11190Sstevel@tonic-gate (ecc->flt_stat & P_AFSR_CE)) { 11200Sstevel@tonic-gate ce_verbose = (ce_verbose_memory > 0); 11210Sstevel@tonic-gate } else { 11220Sstevel@tonic-gate ce_verbose = 1; 11230Sstevel@tonic-gate } 11240Sstevel@tonic-gate 11250Sstevel@tonic-gate if (ce_verbose) { 11260Sstevel@tonic-gate spitf_async_flt sflt; 11270Sstevel@tonic-gate int synd_code; 11280Sstevel@tonic-gate 11290Sstevel@tonic-gate sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 11300Sstevel@tonic-gate 11310Sstevel@tonic-gate cpu_ce_log_status(&sflt, unum); 11320Sstevel@tonic-gate 11330Sstevel@tonic-gate synd_code = synd_to_synd_code(AFLT_STAT_VALID, 11340Sstevel@tonic-gate SYND(ecc->flt_synd)); 11350Sstevel@tonic-gate 11360Sstevel@tonic-gate if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 11370Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 11380Sstevel@tonic-gate NULL, " ECC Data Bit %2d was in error " 11390Sstevel@tonic-gate "and corrected", synd_code); 11400Sstevel@tonic-gate } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 11410Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 11420Sstevel@tonic-gate NULL, " ECC Check Bit %2d was in error " 11430Sstevel@tonic-gate "and corrected", synd_code - C0); 11440Sstevel@tonic-gate } else { 11450Sstevel@tonic-gate /* 11460Sstevel@tonic-gate * These are UE errors - we shouldn't be getting CE 11470Sstevel@tonic-gate * traps for these; handle them in case of bad h/w. 11480Sstevel@tonic-gate */ 11490Sstevel@tonic-gate switch (synd_code) { 11500Sstevel@tonic-gate case M2: 11510Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 11520Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 11530Sstevel@tonic-gate " Two ECC Bits were in error"); 11540Sstevel@tonic-gate break; 11550Sstevel@tonic-gate case M3: 11560Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 11570Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 11580Sstevel@tonic-gate " Three ECC Bits were in error"); 11590Sstevel@tonic-gate break; 11600Sstevel@tonic-gate case M4: 11610Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 11620Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 11630Sstevel@tonic-gate " Four ECC Bits were in error"); 11640Sstevel@tonic-gate break; 11650Sstevel@tonic-gate case MX: 11660Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 11670Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 11680Sstevel@tonic-gate " More than Four ECC bits were " 11690Sstevel@tonic-gate "in error"); 11700Sstevel@tonic-gate break; 11710Sstevel@tonic-gate default: 11720Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 11730Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 11740Sstevel@tonic-gate " Unknown fault syndrome %d", 11750Sstevel@tonic-gate synd_code); 11760Sstevel@tonic-gate break; 11770Sstevel@tonic-gate } 11780Sstevel@tonic-gate } 11790Sstevel@tonic-gate } 11800Sstevel@tonic-gate 11810Sstevel@tonic-gate /* Display entire cache line, if valid address */ 11820Sstevel@tonic-gate if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 11830Sstevel@tonic-gate read_ecc_data(ecc, 1, 1); 11840Sstevel@tonic-gate } 11850Sstevel@tonic-gate 11860Sstevel@tonic-gate /* 11870Sstevel@tonic-gate * We route all errors through a single switch statement. 11880Sstevel@tonic-gate */ 11890Sstevel@tonic-gate void 11900Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt) 11910Sstevel@tonic-gate { 11920Sstevel@tonic-gate 11930Sstevel@tonic-gate switch (aflt->flt_class) { 11940Sstevel@tonic-gate case CPU_FAULT: 11950Sstevel@tonic-gate cpu_async_log_err(aflt); 11960Sstevel@tonic-gate break; 11970Sstevel@tonic-gate 11980Sstevel@tonic-gate case BUS_FAULT: 11990Sstevel@tonic-gate bus_async_log_err(aflt); 12000Sstevel@tonic-gate break; 12010Sstevel@tonic-gate 12020Sstevel@tonic-gate default: 12030Sstevel@tonic-gate cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 12040Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class); 12050Sstevel@tonic-gate break; 12060Sstevel@tonic-gate } 12070Sstevel@tonic-gate } 12080Sstevel@tonic-gate 12090Sstevel@tonic-gate /* Values for action variable in cpu_async_error() */ 12100Sstevel@tonic-gate #define ACTION_NONE 0 12110Sstevel@tonic-gate #define ACTION_TRAMPOLINE 1 12120Sstevel@tonic-gate #define ACTION_AST_FLAGS 2 12130Sstevel@tonic-gate 12140Sstevel@tonic-gate /* 12150Sstevel@tonic-gate * Access error trap handler for asynchronous cpu errors. This routine is 12160Sstevel@tonic-gate * called to handle a data or instruction access error. All fatal errors are 12170Sstevel@tonic-gate * completely handled by this routine (by panicking). Non fatal error logging 12180Sstevel@tonic-gate * is queued for later processing either via AST or softint at a lower PIL. 12190Sstevel@tonic-gate * In case of panic, the error log queue will also be processed as part of the 12200Sstevel@tonic-gate * panic flow to ensure all errors are logged. This routine is called with all 12210Sstevel@tonic-gate * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 12220Sstevel@tonic-gate * error bits are also cleared. The hardware has also disabled the I and 12230Sstevel@tonic-gate * D-caches for us, so we must re-enable them before returning. 12240Sstevel@tonic-gate * 12250Sstevel@tonic-gate * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 12260Sstevel@tonic-gate * 12270Sstevel@tonic-gate * _______________________________________________________________ 12280Sstevel@tonic-gate * | Privileged tl0 | Unprivileged | 12290Sstevel@tonic-gate * | Protected | Unprotected | Protected | Unprotected | 12300Sstevel@tonic-gate * |on_trap|lofault| | | | 12310Sstevel@tonic-gate * -------------|-------|-------+---------------+---------------+-------------| 12320Sstevel@tonic-gate * | | | | | | 12330Sstevel@tonic-gate * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 12340Sstevel@tonic-gate * | | | | | | 12350Sstevel@tonic-gate * TO/BERR | T | S | L,P | n/a | S | 12360Sstevel@tonic-gate * | | | | | | 12370Sstevel@tonic-gate * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 12380Sstevel@tonic-gate * | | | | | | 12390Sstevel@tonic-gate * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 12400Sstevel@tonic-gate * ____________________________________________________________________________ 12410Sstevel@tonic-gate * 12420Sstevel@tonic-gate * 12430Sstevel@tonic-gate * Action codes: 12440Sstevel@tonic-gate * 12450Sstevel@tonic-gate * L - log 12460Sstevel@tonic-gate * M - kick off memscrubber if flt_in_memory 12470Sstevel@tonic-gate * P - panic 12480Sstevel@tonic-gate * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 12490Sstevel@tonic-gate * R - i) if aft_panic is set, panic 12500Sstevel@tonic-gate * ii) otherwise, send hwerr event to contract and SIGKILL to process 12510Sstevel@tonic-gate * S - send SIGBUS to process 12520Sstevel@tonic-gate * T - trampoline 12530Sstevel@tonic-gate * 12540Sstevel@tonic-gate * Special cases: 12550Sstevel@tonic-gate * 12560Sstevel@tonic-gate * 1) if aft_testfatal is set, all faults result in a panic regardless 12570Sstevel@tonic-gate * of type (even WP), protection (even on_trap), or privilege. 12580Sstevel@tonic-gate */ 12590Sstevel@tonic-gate /*ARGSUSED*/ 12600Sstevel@tonic-gate void 12610Sstevel@tonic-gate cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 12620Sstevel@tonic-gate uint_t p_afsr_high, uint_t p_afar_high) 12630Sstevel@tonic-gate { 12640Sstevel@tonic-gate ushort_t sdbh, sdbl, ttype, tl; 12650Sstevel@tonic-gate spitf_async_flt spf_flt; 12660Sstevel@tonic-gate struct async_flt *aflt; 12670Sstevel@tonic-gate char pr_reason[28]; 12680Sstevel@tonic-gate uint64_t oafsr; 12690Sstevel@tonic-gate uint64_t acc_afsr = 0; /* accumulated afsr */ 12700Sstevel@tonic-gate int action = ACTION_NONE; 12710Sstevel@tonic-gate uint64_t t_afar = p_afar; 12720Sstevel@tonic-gate uint64_t t_afsr = p_afsr; 12730Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED; 12740Sstevel@tonic-gate ddi_acc_hdl_t *hp; 12750Sstevel@tonic-gate 12760Sstevel@tonic-gate /* 12770Sstevel@tonic-gate * We need to look at p_flag to determine if the thread detected an 12780Sstevel@tonic-gate * error while dumping core. We can't grab p_lock here, but it's ok 12790Sstevel@tonic-gate * because we just need a consistent snapshot and we know that everyone 12800Sstevel@tonic-gate * else will store a consistent set of bits while holding p_lock. We 12810Sstevel@tonic-gate * don't have to worry about a race because SDOCORE is set once prior 12820Sstevel@tonic-gate * to doing i/o from the process's address space and is never cleared. 12830Sstevel@tonic-gate */ 12840Sstevel@tonic-gate uint_t pflag = ttoproc(curthread)->p_flag; 12850Sstevel@tonic-gate 12860Sstevel@tonic-gate pr_reason[0] = '\0'; 12870Sstevel@tonic-gate 12880Sstevel@tonic-gate /* 12890Sstevel@tonic-gate * Note: the Spitfire data buffer error registers 12900Sstevel@tonic-gate * (upper and lower halves) are or'ed into the upper 12910Sstevel@tonic-gate * word of the afsr by async_err() if P_AFSR_UE is set. 12920Sstevel@tonic-gate */ 12930Sstevel@tonic-gate sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 12940Sstevel@tonic-gate sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 12950Sstevel@tonic-gate 12960Sstevel@tonic-gate /* 12970Sstevel@tonic-gate * Grab the ttype encoded in <63:53> of the saved 12980Sstevel@tonic-gate * afsr passed from async_err() 12990Sstevel@tonic-gate */ 13000Sstevel@tonic-gate ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 13010Sstevel@tonic-gate tl = (ushort_t)(t_afsr >> 62); 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate t_afsr &= S_AFSR_MASK; 13040Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 13050Sstevel@tonic-gate 13060Sstevel@tonic-gate /* 13070Sstevel@tonic-gate * Initialize most of the common and CPU-specific structure. We derive 13080Sstevel@tonic-gate * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 13090Sstevel@tonic-gate * initial setting of aflt->flt_panic is based on TL: we must panic if 13100Sstevel@tonic-gate * the error occurred at TL > 0. We also set flt_panic if the test/demo 13110Sstevel@tonic-gate * tuneable aft_testfatal is set (not the default). 13120Sstevel@tonic-gate */ 13130Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 13140Sstevel@tonic-gate aflt = (struct async_flt *)&spf_flt; 13150Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 13160Sstevel@tonic-gate aflt->flt_stat = t_afsr; 13170Sstevel@tonic-gate aflt->flt_addr = t_afar; 13180Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 13190Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 13200Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc; 13210Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 13220Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 13230Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 13240Sstevel@tonic-gate aflt->flt_tl = (uchar_t)tl; 13250Sstevel@tonic-gate aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 13260Sstevel@tonic-gate aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 13270Sstevel@tonic-gate 13280Sstevel@tonic-gate /* 13290Sstevel@tonic-gate * Set flt_status based on the trap type. If we end up here as the 13300Sstevel@tonic-gate * result of a UE detected by the CE handling code, leave status 0. 13310Sstevel@tonic-gate */ 13320Sstevel@tonic-gate switch (ttype) { 13330Sstevel@tonic-gate case T_DATA_ERROR: 13340Sstevel@tonic-gate aflt->flt_status = ECC_D_TRAP; 13350Sstevel@tonic-gate break; 13360Sstevel@tonic-gate case T_INSTR_ERROR: 13370Sstevel@tonic-gate aflt->flt_status = ECC_I_TRAP; 13380Sstevel@tonic-gate break; 13390Sstevel@tonic-gate } 13400Sstevel@tonic-gate 13410Sstevel@tonic-gate spf_flt.flt_sdbh = sdbh; 13420Sstevel@tonic-gate spf_flt.flt_sdbl = sdbl; 13430Sstevel@tonic-gate 13440Sstevel@tonic-gate /* 13450Sstevel@tonic-gate * Check for fatal async errors. 13460Sstevel@tonic-gate */ 13470Sstevel@tonic-gate check_misc_err(&spf_flt); 13480Sstevel@tonic-gate 13490Sstevel@tonic-gate /* 13500Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0, we need to check to 13510Sstevel@tonic-gate * see if we were executing in the kernel under on_trap() or t_lofault 13520Sstevel@tonic-gate * protection. If so, modify the saved registers so that we return 13530Sstevel@tonic-gate * from the trap to the appropriate trampoline routine. 13540Sstevel@tonic-gate */ 13550Sstevel@tonic-gate if (aflt->flt_priv && tl == 0) { 13560Sstevel@tonic-gate if (curthread->t_ontrap != NULL) { 13570Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap; 13580Sstevel@tonic-gate 13590Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) { 13600Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC; 13610Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC; 13620Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 13630Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 13640Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 13650Sstevel@tonic-gate } 13660Sstevel@tonic-gate 13670Sstevel@tonic-gate if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 13680Sstevel@tonic-gate (otp->ot_prot & OT_DATA_ACCESS)) { 13690Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS; 13700Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS; 13710Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 13720Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 13730Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 13740Sstevel@tonic-gate /* 13750Sstevel@tonic-gate * for peeks and caut_gets errors are expected 13760Sstevel@tonic-gate */ 13770Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle; 13780Sstevel@tonic-gate if (!hp) 13790Sstevel@tonic-gate expected = DDI_FM_ERR_PEEK; 13800Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access == 13810Sstevel@tonic-gate DDI_CAUTIOUS_ACC) 13820Sstevel@tonic-gate expected = DDI_FM_ERR_EXPECTED; 13830Sstevel@tonic-gate } 13840Sstevel@tonic-gate 13850Sstevel@tonic-gate } else if (curthread->t_lofault) { 13860Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY; 13870Sstevel@tonic-gate rp->r_g1 = EFAULT; 13880Sstevel@tonic-gate rp->r_pc = curthread->t_lofault; 13890Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 13900Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 13910Sstevel@tonic-gate } 13920Sstevel@tonic-gate } 13930Sstevel@tonic-gate 13940Sstevel@tonic-gate /* 13950Sstevel@tonic-gate * Determine if this error needs to be treated as fatal. Note that 13960Sstevel@tonic-gate * multiple errors detected upon entry to this trap handler does not 13970Sstevel@tonic-gate * necessarily warrant a panic. We only want to panic if the trap 13980Sstevel@tonic-gate * happened in privileged mode and not under t_ontrap or t_lofault 13990Sstevel@tonic-gate * protection. The exception is WP: if we *only* get WP, it is not 14000Sstevel@tonic-gate * fatal even if the trap occurred in privileged mode, except on Sabre. 14010Sstevel@tonic-gate * 14020Sstevel@tonic-gate * aft_panic, if set, effectively makes us treat usermode 14030Sstevel@tonic-gate * UE/EDP/LDP faults as if they were privileged - so we we will 14040Sstevel@tonic-gate * panic instead of sending a contract event. A lofault-protected 14050Sstevel@tonic-gate * fault will normally follow the contract event; if aft_panic is 14060Sstevel@tonic-gate * set this will be changed to a panic. 14070Sstevel@tonic-gate * 14080Sstevel@tonic-gate * For usermode BERR/BTO errors, eg from processes performing device 14090Sstevel@tonic-gate * control through mapped device memory, we need only deliver 14100Sstevel@tonic-gate * a SIGBUS to the offending process. 14110Sstevel@tonic-gate * 14120Sstevel@tonic-gate * Some additional flt_panic reasons (eg, WP on Sabre) will be 14130Sstevel@tonic-gate * checked later; for now we implement the common reasons. 14140Sstevel@tonic-gate */ 14150Sstevel@tonic-gate if (aflt->flt_prot == AFLT_PROT_NONE) { 14160Sstevel@tonic-gate /* 14170Sstevel@tonic-gate * Beware - multiple bits may be set in AFSR 14180Sstevel@tonic-gate */ 14190Sstevel@tonic-gate if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 14200Sstevel@tonic-gate if (aflt->flt_priv || aft_panic) 14210Sstevel@tonic-gate aflt->flt_panic = 1; 14220Sstevel@tonic-gate } 14230Sstevel@tonic-gate 14240Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 14250Sstevel@tonic-gate if (aflt->flt_priv) 14260Sstevel@tonic-gate aflt->flt_panic = 1; 14270Sstevel@tonic-gate } 14280Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 14290Sstevel@tonic-gate aflt->flt_panic = 1; 14300Sstevel@tonic-gate } 14310Sstevel@tonic-gate 14320Sstevel@tonic-gate /* 14330Sstevel@tonic-gate * UE/BERR/TO: Call our bus nexus friends to check for 14340Sstevel@tonic-gate * IO errors that may have resulted in this trap. 14350Sstevel@tonic-gate */ 14360Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 14370Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected); 14380Sstevel@tonic-gate } 14390Sstevel@tonic-gate 14400Sstevel@tonic-gate /* 14410Sstevel@tonic-gate * Handle UE: If the UE is in memory, we need to flush the bad line from 14420Sstevel@tonic-gate * the E-cache. We also need to query the bus nexus for fatal errors. 14430Sstevel@tonic-gate * For sabre, we will panic on UEs. Attempts to do diagnostic read on 14440Sstevel@tonic-gate * caches may introduce more parity errors (especially when the module 14450Sstevel@tonic-gate * is bad) and in sabre there is no guarantee that such errors 14460Sstevel@tonic-gate * (if introduced) are written back as poisoned data. 14470Sstevel@tonic-gate */ 14480Sstevel@tonic-gate if (t_afsr & P_AFSR_UE) { 14490Sstevel@tonic-gate int i; 14500Sstevel@tonic-gate 14510Sstevel@tonic-gate (void) strcat(pr_reason, "UE "); 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate spf_flt.flt_type = CPU_UE_ERR; 14540Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 14550Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1: 0; 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate /* 14580Sstevel@tonic-gate * With UE, we have the PA of the fault. 14590Sstevel@tonic-gate * Let do a diagnostic read to get the ecache 14600Sstevel@tonic-gate * data and tag info of the bad line for logging. 14610Sstevel@tonic-gate */ 14620Sstevel@tonic-gate if (aflt->flt_in_memory) { 14630Sstevel@tonic-gate uint32_t ec_set_size; 14640Sstevel@tonic-gate uchar_t state; 14650Sstevel@tonic-gate uint32_t ecache_idx; 14660Sstevel@tonic-gate uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 14670Sstevel@tonic-gate 14680Sstevel@tonic-gate /* touch the line to put it in ecache */ 14690Sstevel@tonic-gate acc_afsr |= read_and_clear_afsr(); 14700Sstevel@tonic-gate (void) lddphys(faultpa); 14710Sstevel@tonic-gate acc_afsr |= (read_and_clear_afsr() & 14720Sstevel@tonic-gate ~(P_AFSR_EDP | P_AFSR_UE)); 14730Sstevel@tonic-gate 14740Sstevel@tonic-gate ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 14750Sstevel@tonic-gate ecache_associativity; 14760Sstevel@tonic-gate 14770Sstevel@tonic-gate for (i = 0; i < ecache_associativity; i++) { 14780Sstevel@tonic-gate ecache_idx = i * ec_set_size + 14790Sstevel@tonic-gate (aflt->flt_addr % ec_set_size); 14800Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(ecache_idx, 64), 14810Sstevel@tonic-gate (uint64_t *)&spf_flt.flt_ec_data[0], 14820Sstevel@tonic-gate &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 14830Sstevel@tonic-gate acc_afsr |= oafsr; 14840Sstevel@tonic-gate 14850Sstevel@tonic-gate state = (uchar_t)((spf_flt.flt_ec_tag & 14860Sstevel@tonic-gate cpu_ec_state_mask) >> cpu_ec_state_shift); 14870Sstevel@tonic-gate 14880Sstevel@tonic-gate if ((state & cpu_ec_state_valid) && 14890Sstevel@tonic-gate ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 14900Sstevel@tonic-gate ((uint64_t)aflt->flt_addr >> 14910Sstevel@tonic-gate cpu_ec_tag_shift))) 14920Sstevel@tonic-gate break; 14930Sstevel@tonic-gate } 14940Sstevel@tonic-gate 14950Sstevel@tonic-gate /* 14960Sstevel@tonic-gate * Check to see if the ecache tag is valid for the 14970Sstevel@tonic-gate * fault PA. In the very unlikely event where the 14980Sstevel@tonic-gate * line could be victimized, no ecache info will be 14990Sstevel@tonic-gate * available. If this is the case, capture the line 15000Sstevel@tonic-gate * from memory instead. 15010Sstevel@tonic-gate */ 15020Sstevel@tonic-gate if ((state & cpu_ec_state_valid) == 0 || 15030Sstevel@tonic-gate (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 15040Sstevel@tonic-gate ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 15050Sstevel@tonic-gate for (i = 0; i < 8; i++, faultpa += 8) { 15060Sstevel@tonic-gate ec_data_t *ecdptr; 15070Sstevel@tonic-gate 15080Sstevel@tonic-gate ecdptr = &spf_flt.flt_ec_data[i]; 15090Sstevel@tonic-gate acc_afsr |= read_and_clear_afsr(); 15100Sstevel@tonic-gate ecdptr->ec_d8 = lddphys(faultpa); 15110Sstevel@tonic-gate acc_afsr |= (read_and_clear_afsr() & 15120Sstevel@tonic-gate ~(P_AFSR_EDP | P_AFSR_UE)); 15130Sstevel@tonic-gate ecdptr->ec_afsr = 0; 15140Sstevel@tonic-gate /* null afsr value */ 15150Sstevel@tonic-gate } 15160Sstevel@tonic-gate 15170Sstevel@tonic-gate /* 15180Sstevel@tonic-gate * Mark tag invalid to indicate mem dump 15190Sstevel@tonic-gate * when we print out the info. 15200Sstevel@tonic-gate */ 15210Sstevel@tonic-gate spf_flt.flt_ec_tag = AFLT_INV_ADDR; 15220Sstevel@tonic-gate } 15230Sstevel@tonic-gate spf_flt.flt_ec_lcnt = 1; 15240Sstevel@tonic-gate 15250Sstevel@tonic-gate /* 15260Sstevel@tonic-gate * Flush out the bad line 15270Sstevel@tonic-gate */ 15280Sstevel@tonic-gate flushecacheline(P2ALIGN(aflt->flt_addr, 64), 15290Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 15300Sstevel@tonic-gate 15310Sstevel@tonic-gate acc_afsr |= clear_errors(NULL, NULL); 15320Sstevel@tonic-gate } 15330Sstevel@tonic-gate 15340Sstevel@tonic-gate /* 15350Sstevel@tonic-gate * Ask our bus nexus friends if they have any fatal errors. If 15360Sstevel@tonic-gate * so, they will log appropriate error messages and panic as a 15370Sstevel@tonic-gate * result. We then queue an event for each UDB that reports a 15380Sstevel@tonic-gate * UE. Each UE reported in a UDB will have its own log message. 15390Sstevel@tonic-gate * 15400Sstevel@tonic-gate * Note from kbn: In the case where there are multiple UEs 15410Sstevel@tonic-gate * (ME bit is set) - the AFAR address is only accurate to 15420Sstevel@tonic-gate * the 16-byte granularity. One cannot tell whether the AFAR 15430Sstevel@tonic-gate * belongs to the UDBH or UDBL syndromes. In this case, we 15440Sstevel@tonic-gate * always report the AFAR address to be 16-byte aligned. 15450Sstevel@tonic-gate * 15460Sstevel@tonic-gate * If we're on a Sabre, there is no SDBL, but it will always 15470Sstevel@tonic-gate * read as zero, so the sdbl test below will safely fail. 15480Sstevel@tonic-gate */ 15490Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 15500Sstevel@tonic-gate aflt->flt_panic = 1; 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate if (sdbh & P_DER_UE) { 15530Sstevel@tonic-gate aflt->flt_synd = sdbh & P_DER_E_SYND; 15540Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 15550Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 15560Sstevel@tonic-gate aflt->flt_panic); 15570Sstevel@tonic-gate } 15580Sstevel@tonic-gate if (sdbl & P_DER_UE) { 15590Sstevel@tonic-gate aflt->flt_synd = sdbl & P_DER_E_SYND; 15600Sstevel@tonic-gate aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 15610Sstevel@tonic-gate if (!(aflt->flt_stat & P_AFSR_ME)) 15620Sstevel@tonic-gate aflt->flt_addr |= 0x8; 15630Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 15640Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 15650Sstevel@tonic-gate aflt->flt_panic); 15660Sstevel@tonic-gate } 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate /* 15690Sstevel@tonic-gate * We got a UE and are panicking, save the fault PA in a known 15700Sstevel@tonic-gate * location so that the platform specific panic code can check 15710Sstevel@tonic-gate * for copyback errors. 15720Sstevel@tonic-gate */ 15730Sstevel@tonic-gate if (aflt->flt_panic && aflt->flt_in_memory) { 15740Sstevel@tonic-gate panic_aflt = *aflt; 15750Sstevel@tonic-gate } 15760Sstevel@tonic-gate } 15770Sstevel@tonic-gate 15780Sstevel@tonic-gate /* 15790Sstevel@tonic-gate * Handle EDP and LDP: Locate the line with bad parity and enqueue an 15800Sstevel@tonic-gate * async error for logging. For Sabre, we panic on EDP or LDP. 15810Sstevel@tonic-gate */ 15820Sstevel@tonic-gate if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 15830Sstevel@tonic-gate spf_flt.flt_type = CPU_EDP_LDP_ERR; 15840Sstevel@tonic-gate 15850Sstevel@tonic-gate if (t_afsr & P_AFSR_EDP) 15860Sstevel@tonic-gate (void) strcat(pr_reason, "EDP "); 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate if (t_afsr & P_AFSR_LDP) 15890Sstevel@tonic-gate (void) strcat(pr_reason, "LDP "); 15900Sstevel@tonic-gate 15910Sstevel@tonic-gate /* 15920Sstevel@tonic-gate * Here we have no PA to work with. 15930Sstevel@tonic-gate * Scan each line in the ecache to look for 15940Sstevel@tonic-gate * the one with bad parity. 15950Sstevel@tonic-gate */ 15960Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 15970Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 15980Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 15990Sstevel@tonic-gate acc_afsr |= (oafsr & ~P_AFSR_WP); 16000Sstevel@tonic-gate 16010Sstevel@tonic-gate /* 16020Sstevel@tonic-gate * If we found a bad PA, update the state to indicate if it is 16030Sstevel@tonic-gate * memory or I/O space. This code will be important if we ever 16040Sstevel@tonic-gate * support cacheable frame buffers. 16050Sstevel@tonic-gate */ 16060Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 16070Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 16080Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 16090Sstevel@tonic-gate } 16100Sstevel@tonic-gate 16110Sstevel@tonic-gate if (isus2i || isus2e) 16120Sstevel@tonic-gate aflt->flt_panic = 1; 16130Sstevel@tonic-gate 16140Sstevel@tonic-gate cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 16150Sstevel@tonic-gate FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 16160Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 16170Sstevel@tonic-gate aflt->flt_panic); 16180Sstevel@tonic-gate } 16190Sstevel@tonic-gate 16200Sstevel@tonic-gate /* 16210Sstevel@tonic-gate * Timeout and bus error handling. There are two cases to consider: 16220Sstevel@tonic-gate * 16230Sstevel@tonic-gate * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 16240Sstevel@tonic-gate * have already modified the saved registers so that we will return 16250Sstevel@tonic-gate * from the trap to the appropriate trampoline routine; otherwise panic. 16260Sstevel@tonic-gate * 16270Sstevel@tonic-gate * (2) In user mode, we can simply use our AST mechanism to deliver 16280Sstevel@tonic-gate * a SIGBUS. We do not log the occurence - processes performing 16290Sstevel@tonic-gate * device control would generate lots of uninteresting messages. 16300Sstevel@tonic-gate */ 16310Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 16320Sstevel@tonic-gate if (t_afsr & P_AFSR_TO) 16330Sstevel@tonic-gate (void) strcat(pr_reason, "BTO "); 16340Sstevel@tonic-gate 16350Sstevel@tonic-gate if (t_afsr & P_AFSR_BERR) 16360Sstevel@tonic-gate (void) strcat(pr_reason, "BERR "); 16370Sstevel@tonic-gate 16380Sstevel@tonic-gate spf_flt.flt_type = CPU_BTO_BERR_ERR; 16390Sstevel@tonic-gate if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 16400Sstevel@tonic-gate cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 16410Sstevel@tonic-gate FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 16420Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 16430Sstevel@tonic-gate aflt->flt_panic); 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate } 16460Sstevel@tonic-gate 16470Sstevel@tonic-gate /* 16480Sstevel@tonic-gate * Handle WP: WP happens when the ecache is victimized and a parity 16490Sstevel@tonic-gate * error was detected on a writeback. The data in question will be 16500Sstevel@tonic-gate * poisoned as a UE will be written back. The PA is not logged and 16510Sstevel@tonic-gate * it is possible that it doesn't belong to the trapped thread. The 16520Sstevel@tonic-gate * WP trap is not fatal, but it could be fatal to someone that 16530Sstevel@tonic-gate * subsequently accesses the toxic page. We set read_all_memscrub 16540Sstevel@tonic-gate * to force the memscrubber to read all of memory when it awakens. 16550Sstevel@tonic-gate * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 16560Sstevel@tonic-gate * UE back to poison the data. 16570Sstevel@tonic-gate */ 16580Sstevel@tonic-gate if (t_afsr & P_AFSR_WP) { 16590Sstevel@tonic-gate (void) strcat(pr_reason, "WP "); 16600Sstevel@tonic-gate if (isus2i || isus2e) { 16610Sstevel@tonic-gate aflt->flt_panic = 1; 16620Sstevel@tonic-gate } else { 16630Sstevel@tonic-gate read_all_memscrub = 1; 16640Sstevel@tonic-gate } 16650Sstevel@tonic-gate spf_flt.flt_type = CPU_WP_ERR; 16660Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 16670Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 16680Sstevel@tonic-gate aflt->flt_panic); 16690Sstevel@tonic-gate } 16700Sstevel@tonic-gate 16710Sstevel@tonic-gate /* 16720Sstevel@tonic-gate * Handle trapping CP error: In Sabre/Hummingbird, parity error in 16730Sstevel@tonic-gate * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 16740Sstevel@tonic-gate * This is fatal. 16750Sstevel@tonic-gate */ 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate if (t_afsr & P_AFSR_CP) { 16780Sstevel@tonic-gate if (isus2i || isus2e) { 16790Sstevel@tonic-gate (void) strcat(pr_reason, "CP "); 16800Sstevel@tonic-gate aflt->flt_panic = 1; 16810Sstevel@tonic-gate spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 16820Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 16830Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 16840Sstevel@tonic-gate aflt->flt_panic); 16850Sstevel@tonic-gate } else { 16860Sstevel@tonic-gate /* 16870Sstevel@tonic-gate * Orphan CP: Happens due to signal integrity problem 16880Sstevel@tonic-gate * on a CPU, where a CP is reported, without reporting 16890Sstevel@tonic-gate * its associated UE. This is handled by locating the 16900Sstevel@tonic-gate * bad parity line and would kick off the memscrubber 16910Sstevel@tonic-gate * to find the UE if in memory or in another's cache. 16920Sstevel@tonic-gate */ 16930Sstevel@tonic-gate spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 16940Sstevel@tonic-gate (void) strcat(pr_reason, "ORPHAN_CP "); 16950Sstevel@tonic-gate 16960Sstevel@tonic-gate /* 16970Sstevel@tonic-gate * Here we have no PA to work with. 16980Sstevel@tonic-gate * Scan each line in the ecache to look for 16990Sstevel@tonic-gate * the one with bad parity. 17000Sstevel@tonic-gate */ 17010Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 17020Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 17030Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 17040Sstevel@tonic-gate &oafsr); 17050Sstevel@tonic-gate acc_afsr |= oafsr; 17060Sstevel@tonic-gate 17070Sstevel@tonic-gate /* 17080Sstevel@tonic-gate * If we found a bad PA, update the state to indicate 17090Sstevel@tonic-gate * if it is memory or I/O space. 17100Sstevel@tonic-gate */ 17110Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 17120Sstevel@tonic-gate aflt->flt_in_memory = 17130Sstevel@tonic-gate (pf_is_memory(aflt->flt_addr >> 17140Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 17150Sstevel@tonic-gate } 17160Sstevel@tonic-gate read_all_memscrub = 1; 17170Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 17180Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 17190Sstevel@tonic-gate aflt->flt_panic); 17200Sstevel@tonic-gate 17210Sstevel@tonic-gate } 17220Sstevel@tonic-gate } 17230Sstevel@tonic-gate 17240Sstevel@tonic-gate /* 17250Sstevel@tonic-gate * If we queued an error other than WP or CP and we are going to return 17260Sstevel@tonic-gate * from the trap and the error was in user mode or inside of a 17270Sstevel@tonic-gate * copy routine, set AST flag so the queue will be drained before 17280Sstevel@tonic-gate * returning to user mode. 17290Sstevel@tonic-gate * 17300Sstevel@tonic-gate * For UE/LDP/EDP, the AST processing will SIGKILL the process 17310Sstevel@tonic-gate * and send an event to its process contract. 17320Sstevel@tonic-gate * 17330Sstevel@tonic-gate * For BERR/BTO, the AST processing will SIGBUS the process. There 17340Sstevel@tonic-gate * will have been no error queued in this case. 17350Sstevel@tonic-gate */ 17360Sstevel@tonic-gate if ((t_afsr & 17370Sstevel@tonic-gate (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 17380Sstevel@tonic-gate (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 17390Sstevel@tonic-gate int pcb_flag = 0; 17400Sstevel@tonic-gate 17410Sstevel@tonic-gate if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 17420Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR; 17430Sstevel@tonic-gate 17440Sstevel@tonic-gate if (t_afsr & P_AFSR_BERR) 17450Sstevel@tonic-gate pcb_flag |= ASYNC_BERR; 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate if (t_afsr & P_AFSR_TO) 17480Sstevel@tonic-gate pcb_flag |= ASYNC_BTO; 17490Sstevel@tonic-gate 17500Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 17510Sstevel@tonic-gate aston(curthread); 17520Sstevel@tonic-gate action = ACTION_AST_FLAGS; 17530Sstevel@tonic-gate } 17540Sstevel@tonic-gate 17550Sstevel@tonic-gate /* 17560Sstevel@tonic-gate * In response to a deferred error, we must do one of three things: 17570Sstevel@tonic-gate * (1) set the AST flags, (2) trampoline, or (3) panic. action is 17580Sstevel@tonic-gate * set in cases (1) and (2) - check that either action is set or 17590Sstevel@tonic-gate * (3) is true. 17600Sstevel@tonic-gate * 17610Sstevel@tonic-gate * On II, the WP writes poisoned data back to memory, which will 17620Sstevel@tonic-gate * cause a UE and a panic or reboot when read. In this case, we 17630Sstevel@tonic-gate * don't need to panic at this time. On IIi and IIe, 17640Sstevel@tonic-gate * aflt->flt_panic is already set above. 17650Sstevel@tonic-gate */ 17660Sstevel@tonic-gate ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 17670Sstevel@tonic-gate (t_afsr & P_AFSR_WP)); 17680Sstevel@tonic-gate 17690Sstevel@tonic-gate /* 17700Sstevel@tonic-gate * Make a final sanity check to make sure we did not get any more async 17710Sstevel@tonic-gate * errors and accumulate the afsr. 17720Sstevel@tonic-gate */ 17730Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 17740Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 17750Sstevel@tonic-gate (void) clear_errors(&spf_flt, NULL); 17760Sstevel@tonic-gate 17770Sstevel@tonic-gate /* 17780Sstevel@tonic-gate * Take care of a special case: If there is a UE in the ecache flush 17790Sstevel@tonic-gate * area, we'll see it in flush_ecache(). This will trigger the 17800Sstevel@tonic-gate * CPU_ADDITIONAL_ERRORS case below. 17810Sstevel@tonic-gate * 17820Sstevel@tonic-gate * This could occur if the original error was a UE in the flush area, 17830Sstevel@tonic-gate * or if the original error was an E$ error that was flushed out of 17840Sstevel@tonic-gate * the E$ in scan_ecache(). 17850Sstevel@tonic-gate * 17860Sstevel@tonic-gate * If it's at the same address that we're already logging, then it's 17870Sstevel@tonic-gate * probably one of these cases. Clear the bit so we don't trip over 17880Sstevel@tonic-gate * it on the additional errors case, which could cause an unnecessary 17890Sstevel@tonic-gate * panic. 17900Sstevel@tonic-gate */ 17910Sstevel@tonic-gate if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 17920Sstevel@tonic-gate acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 17930Sstevel@tonic-gate else 17940Sstevel@tonic-gate acc_afsr |= aflt->flt_stat; 17950Sstevel@tonic-gate 17960Sstevel@tonic-gate /* 17970Sstevel@tonic-gate * Check the acumulated afsr for the important bits. 17980Sstevel@tonic-gate * Make sure the spf_flt.flt_type value is set, and 17990Sstevel@tonic-gate * enque an error. 18000Sstevel@tonic-gate */ 18010Sstevel@tonic-gate if (acc_afsr & 18020Sstevel@tonic-gate (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 18030Sstevel@tonic-gate if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 18040Sstevel@tonic-gate P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 18050Sstevel@tonic-gate P_AFSR_ISAP)) 18060Sstevel@tonic-gate aflt->flt_panic = 1; 18070Sstevel@tonic-gate 18080Sstevel@tonic-gate spf_flt.flt_type = CPU_ADDITIONAL_ERR; 18090Sstevel@tonic-gate aflt->flt_stat = acc_afsr; 18100Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 18110Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 18120Sstevel@tonic-gate aflt->flt_panic); 18130Sstevel@tonic-gate } 18140Sstevel@tonic-gate 18150Sstevel@tonic-gate /* 18160Sstevel@tonic-gate * If aflt->flt_panic is set at this point, we need to panic as the 18170Sstevel@tonic-gate * result of a trap at TL > 0, or an error we determined to be fatal. 18180Sstevel@tonic-gate * We've already enqueued the error in one of the if-clauses above, 18190Sstevel@tonic-gate * and it will be dequeued and logged as part of the panic flow. 18200Sstevel@tonic-gate */ 18210Sstevel@tonic-gate if (aflt->flt_panic) { 18220Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 18230Sstevel@tonic-gate "See previous message(s) for details", " %sError(s)", 18240Sstevel@tonic-gate pr_reason); 18250Sstevel@tonic-gate } 18260Sstevel@tonic-gate 18270Sstevel@tonic-gate /* 18280Sstevel@tonic-gate * Before returning, we must re-enable errors, and 18290Sstevel@tonic-gate * reset the caches to their boot-up state. 18300Sstevel@tonic-gate */ 18310Sstevel@tonic-gate set_lsu(get_lsu() | cache_boot_state); 18320Sstevel@tonic-gate set_error_enable(EER_ENABLE); 18330Sstevel@tonic-gate } 18340Sstevel@tonic-gate 18350Sstevel@tonic-gate /* 18360Sstevel@tonic-gate * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 18370Sstevel@tonic-gate * This routine is shared by the CE and UE handling code. 18380Sstevel@tonic-gate */ 18390Sstevel@tonic-gate static void 18400Sstevel@tonic-gate check_misc_err(spitf_async_flt *spf_flt) 18410Sstevel@tonic-gate { 18420Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 18430Sstevel@tonic-gate char *fatal_str = NULL; 18440Sstevel@tonic-gate 18450Sstevel@tonic-gate /* 18460Sstevel@tonic-gate * The ISAP and ETP errors are supposed to cause a POR 18470Sstevel@tonic-gate * from the system, so in theory we never, ever see these messages. 18480Sstevel@tonic-gate * ISAP, ETP and IVUE are considered to be fatal. 18490Sstevel@tonic-gate */ 18500Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_ISAP) 18510Sstevel@tonic-gate fatal_str = " System Address Parity Error on"; 18520Sstevel@tonic-gate else if (aflt->flt_stat & P_AFSR_ETP) 18530Sstevel@tonic-gate fatal_str = " Ecache Tag Parity Error on"; 18540Sstevel@tonic-gate else if (aflt->flt_stat & P_AFSR_IVUE) 18550Sstevel@tonic-gate fatal_str = " Interrupt Vector Uncorrectable Error on"; 18560Sstevel@tonic-gate if (fatal_str != NULL) { 18570Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 18580Sstevel@tonic-gate NULL, fatal_str); 18590Sstevel@tonic-gate } 18600Sstevel@tonic-gate } 18610Sstevel@tonic-gate 18620Sstevel@tonic-gate /* 18630Sstevel@tonic-gate * Routine to convert a syndrome into a syndrome code. 18640Sstevel@tonic-gate */ 18650Sstevel@tonic-gate static int 18660Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd) 18670Sstevel@tonic-gate { 18680Sstevel@tonic-gate if (synd_status != AFLT_STAT_VALID) 18690Sstevel@tonic-gate return (-1); 18700Sstevel@tonic-gate 18710Sstevel@tonic-gate /* 18720Sstevel@tonic-gate * Use the 8-bit syndrome to index the ecc_syndrome_tab 18730Sstevel@tonic-gate * to get the code indicating which bit(s) is(are) bad. 18740Sstevel@tonic-gate */ 18750Sstevel@tonic-gate if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 18760Sstevel@tonic-gate return (-1); 18770Sstevel@tonic-gate else 18780Sstevel@tonic-gate return (ecc_syndrome_tab[synd]); 18790Sstevel@tonic-gate } 18800Sstevel@tonic-gate 18810Sstevel@tonic-gate /* 18820Sstevel@tonic-gate * Routine to return a string identifying the physical name 18830Sstevel@tonic-gate * associated with a memory/cache error. 18840Sstevel@tonic-gate */ 18850Sstevel@tonic-gate /* ARGSUSED */ 18860Sstevel@tonic-gate int 18870Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 18880Sstevel@tonic-gate uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 18890Sstevel@tonic-gate char *buf, int buflen, int *lenp) 18900Sstevel@tonic-gate { 18910Sstevel@tonic-gate short synd_code; 18920Sstevel@tonic-gate int ret; 18930Sstevel@tonic-gate 18940Sstevel@tonic-gate if (flt_in_memory) { 18950Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, synd); 18960Sstevel@tonic-gate if (synd_code == -1) { 18970Sstevel@tonic-gate ret = EINVAL; 18980Sstevel@tonic-gate } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 18990Sstevel@tonic-gate buf, buflen, lenp) != 0) { 19000Sstevel@tonic-gate ret = EIO; 19010Sstevel@tonic-gate } else if (*lenp <= 1) { 19020Sstevel@tonic-gate ret = EINVAL; 19030Sstevel@tonic-gate } else { 19040Sstevel@tonic-gate ret = 0; 19050Sstevel@tonic-gate } 19060Sstevel@tonic-gate } else { 19070Sstevel@tonic-gate ret = ENOTSUP; 19080Sstevel@tonic-gate } 19090Sstevel@tonic-gate 19100Sstevel@tonic-gate if (ret != 0) { 19110Sstevel@tonic-gate buf[0] = '\0'; 19120Sstevel@tonic-gate *lenp = 0; 19130Sstevel@tonic-gate } 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate return (ret); 19160Sstevel@tonic-gate } 19170Sstevel@tonic-gate 19180Sstevel@tonic-gate /* 19190Sstevel@tonic-gate * Wrapper for cpu_get_mem_unum() routine that takes an 19200Sstevel@tonic-gate * async_flt struct rather than explicit arguments. 19210Sstevel@tonic-gate */ 19220Sstevel@tonic-gate int 19230Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 19240Sstevel@tonic-gate char *buf, int buflen, int *lenp) 19250Sstevel@tonic-gate { 19260Sstevel@tonic-gate return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 19270Sstevel@tonic-gate aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 19280Sstevel@tonic-gate aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 19290Sstevel@tonic-gate } 19300Sstevel@tonic-gate 19310Sstevel@tonic-gate /* 19320Sstevel@tonic-gate * This routine is a more generic interface to cpu_get_mem_unum(), 19330Sstevel@tonic-gate * that may be used by other modules (e.g. mm). 19340Sstevel@tonic-gate */ 19350Sstevel@tonic-gate int 19360Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 19370Sstevel@tonic-gate char *buf, int buflen, int *lenp) 19380Sstevel@tonic-gate { 19390Sstevel@tonic-gate int synd_status, flt_in_memory, ret; 19400Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 19410Sstevel@tonic-gate 19420Sstevel@tonic-gate /* 19430Sstevel@tonic-gate * Check for an invalid address. 19440Sstevel@tonic-gate */ 19450Sstevel@tonic-gate if (afar == (uint64_t)-1) 19460Sstevel@tonic-gate return (ENXIO); 19470Sstevel@tonic-gate 19480Sstevel@tonic-gate if (synd == (uint64_t)-1) 19490Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID; 19500Sstevel@tonic-gate else 19510Sstevel@tonic-gate synd_status = AFLT_STAT_VALID; 19520Sstevel@tonic-gate 19530Sstevel@tonic-gate flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 19540Sstevel@tonic-gate 19550Sstevel@tonic-gate if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 19560Sstevel@tonic-gate CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 19570Sstevel@tonic-gate != 0) 19580Sstevel@tonic-gate return (ret); 19590Sstevel@tonic-gate 19600Sstevel@tonic-gate if (*lenp >= buflen) 19610Sstevel@tonic-gate return (ENAMETOOLONG); 19620Sstevel@tonic-gate 19630Sstevel@tonic-gate (void) strncpy(buf, unum, buflen); 19640Sstevel@tonic-gate 19650Sstevel@tonic-gate return (0); 19660Sstevel@tonic-gate } 19670Sstevel@tonic-gate 19680Sstevel@tonic-gate /* 19690Sstevel@tonic-gate * Routine to return memory information associated 19700Sstevel@tonic-gate * with a physical address and syndrome. 19710Sstevel@tonic-gate */ 19720Sstevel@tonic-gate /* ARGSUSED */ 19730Sstevel@tonic-gate int 19740Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar, 19750Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 19760Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp) 19770Sstevel@tonic-gate { 19780Sstevel@tonic-gate return (ENOTSUP); 19790Sstevel@tonic-gate } 19800Sstevel@tonic-gate 19810Sstevel@tonic-gate /* 19820Sstevel@tonic-gate * Routine to return a string identifying the physical 19830Sstevel@tonic-gate * name associated with a cpuid. 19840Sstevel@tonic-gate */ 19850Sstevel@tonic-gate /* ARGSUSED */ 19860Sstevel@tonic-gate int 19870Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 19880Sstevel@tonic-gate { 19890Sstevel@tonic-gate return (ENOTSUP); 19900Sstevel@tonic-gate } 19910Sstevel@tonic-gate 19920Sstevel@tonic-gate /* 19930Sstevel@tonic-gate * This routine returns the size of the kernel's FRU name buffer. 19940Sstevel@tonic-gate */ 19950Sstevel@tonic-gate size_t 19960Sstevel@tonic-gate cpu_get_name_bufsize() 19970Sstevel@tonic-gate { 19980Sstevel@tonic-gate return (UNUM_NAMLEN); 19990Sstevel@tonic-gate } 20000Sstevel@tonic-gate 20010Sstevel@tonic-gate /* 20020Sstevel@tonic-gate * Cpu specific log func for UEs. 20030Sstevel@tonic-gate */ 20040Sstevel@tonic-gate static void 20050Sstevel@tonic-gate log_ue_err(struct async_flt *aflt, char *unum) 20060Sstevel@tonic-gate { 20070Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 20080Sstevel@tonic-gate int len = 0; 20090Sstevel@tonic-gate 20100Sstevel@tonic-gate #ifdef DEBUG 20110Sstevel@tonic-gate int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 20120Sstevel@tonic-gate 20130Sstevel@tonic-gate /* 20140Sstevel@tonic-gate * Paranoid Check for priv mismatch 20150Sstevel@tonic-gate * Only applicable for UEs 20160Sstevel@tonic-gate */ 20170Sstevel@tonic-gate if (afsr_priv != aflt->flt_priv) { 20180Sstevel@tonic-gate /* 20190Sstevel@tonic-gate * The priv bits in %tstate and %afsr did not match; we expect 20200Sstevel@tonic-gate * this to be very rare, so flag it with a message. 20210Sstevel@tonic-gate */ 20220Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 20230Sstevel@tonic-gate ": PRIV bit in TSTATE and AFSR mismatched; " 20240Sstevel@tonic-gate "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 20250Sstevel@tonic-gate 20260Sstevel@tonic-gate /* update saved afsr to reflect the correct priv */ 20270Sstevel@tonic-gate aflt->flt_stat &= ~P_AFSR_PRIV; 20280Sstevel@tonic-gate if (aflt->flt_priv) 20290Sstevel@tonic-gate aflt->flt_stat |= P_AFSR_PRIV; 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate #endif /* DEBUG */ 20320Sstevel@tonic-gate 20330Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 20340Sstevel@tonic-gate UNUM_NAMLEN, &len); 20350Sstevel@tonic-gate 20360Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 20370Sstevel@tonic-gate " Uncorrectable Memory Error on"); 20380Sstevel@tonic-gate 20390Sstevel@tonic-gate if (SYND(aflt->flt_synd) == 0x3) { 20400Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 20410Sstevel@tonic-gate " Syndrome 0x3 indicates that this may not be a " 20420Sstevel@tonic-gate "memory module problem"); 20430Sstevel@tonic-gate } 20440Sstevel@tonic-gate 20450Sstevel@tonic-gate if (aflt->flt_in_memory) 20460Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 20470Sstevel@tonic-gate } 20480Sstevel@tonic-gate 20490Sstevel@tonic-gate 20500Sstevel@tonic-gate /* 20510Sstevel@tonic-gate * The cpu_async_log_err() function is called via the ue_drain() function to 20520Sstevel@tonic-gate * handle logging for CPU events that are dequeued. As such, it can be invoked 20530Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the 20540Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and log appropriate messages. 20550Sstevel@tonic-gate */ 20560Sstevel@tonic-gate static void 20570Sstevel@tonic-gate cpu_async_log_err(void *flt) 20580Sstevel@tonic-gate { 20590Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 20600Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)flt; 20610Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 20620Sstevel@tonic-gate char *space; 20630Sstevel@tonic-gate char *ecache_scrub_logstr = NULL; 20640Sstevel@tonic-gate 20650Sstevel@tonic-gate switch (spf_flt->flt_type) { 20660Sstevel@tonic-gate case CPU_UE_ERR: 20670Sstevel@tonic-gate /* 20680Sstevel@tonic-gate * We want to skip logging only if ALL the following 20690Sstevel@tonic-gate * conditions are true: 20700Sstevel@tonic-gate * 20710Sstevel@tonic-gate * 1. We are not panicking 20720Sstevel@tonic-gate * 2. There is only one error 20730Sstevel@tonic-gate * 3. That error is a memory error 20740Sstevel@tonic-gate * 4. The error is caused by the memory scrubber (in 20750Sstevel@tonic-gate * which case the error will have occurred under 20760Sstevel@tonic-gate * on_trap protection) 20770Sstevel@tonic-gate * 5. The error is on a retired page 20780Sstevel@tonic-gate * 20790Sstevel@tonic-gate * Note 1: AFLT_PROT_EC is used places other than the memory 20800Sstevel@tonic-gate * scrubber. However, none of those errors should occur 20810Sstevel@tonic-gate * on a retired page. 20820Sstevel@tonic-gate * 20830Sstevel@tonic-gate * Note 2: In the CE case, these errors are discarded before 20840Sstevel@tonic-gate * the errorq. In the UE case, we must wait until now -- 20850Sstevel@tonic-gate * softcall() grabs a mutex, which we can't do at a high PIL. 20860Sstevel@tonic-gate */ 20870Sstevel@tonic-gate if (!panicstr && 20880Sstevel@tonic-gate (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 20890Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) { 2090*917Selowe if (page_retire_check(aflt->flt_addr, NULL) == 0) { 20910Sstevel@tonic-gate /* Zero the address to clear the error */ 20920Sstevel@tonic-gate softcall(ecc_page_zero, (void *)aflt->flt_addr); 20930Sstevel@tonic-gate return; 20940Sstevel@tonic-gate } 20950Sstevel@tonic-gate } 20960Sstevel@tonic-gate 20970Sstevel@tonic-gate /* 20980Sstevel@tonic-gate * Log the UE and check for causes of this UE error that 20990Sstevel@tonic-gate * don't cause a trap (Copyback error). cpu_async_error() 21000Sstevel@tonic-gate * has already checked the i/o buses for us. 21010Sstevel@tonic-gate */ 21020Sstevel@tonic-gate log_ue_err(aflt, unum); 21030Sstevel@tonic-gate if (aflt->flt_in_memory) 21040Sstevel@tonic-gate cpu_check_allcpus(aflt); 21050Sstevel@tonic-gate break; 21060Sstevel@tonic-gate 21070Sstevel@tonic-gate case CPU_EDP_LDP_ERR: 21080Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_EDP) 21090Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 21100Sstevel@tonic-gate NULL, " EDP event on"); 21110Sstevel@tonic-gate 21120Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_LDP) 21130Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 21140Sstevel@tonic-gate NULL, " LDP event on"); 21150Sstevel@tonic-gate 21160Sstevel@tonic-gate /* Log ecache info if exist */ 21170Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) { 21180Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 21190Sstevel@tonic-gate 21200Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 21210Sstevel@tonic-gate NULL, " AFAR was derived from E$Tag"); 21220Sstevel@tonic-gate } else { 21230Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 21240Sstevel@tonic-gate NULL, " No error found in ecache (No fault " 21250Sstevel@tonic-gate "PA available)"); 21260Sstevel@tonic-gate } 21270Sstevel@tonic-gate break; 21280Sstevel@tonic-gate 21290Sstevel@tonic-gate case CPU_WP_ERR: 21300Sstevel@tonic-gate /* 21310Sstevel@tonic-gate * If the memscrub thread hasn't yet read 21320Sstevel@tonic-gate * all of memory, as we requested in the 21330Sstevel@tonic-gate * trap handler, then give it a kick to 21340Sstevel@tonic-gate * make sure it does. 21350Sstevel@tonic-gate */ 21360Sstevel@tonic-gate if (!isus2i && !isus2e && read_all_memscrub) 21370Sstevel@tonic-gate memscrub_run(); 21380Sstevel@tonic-gate 21390Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 21400Sstevel@tonic-gate " WP event on"); 21410Sstevel@tonic-gate return; 21420Sstevel@tonic-gate 21430Sstevel@tonic-gate case CPU_BTO_BERR_ERR: 21440Sstevel@tonic-gate /* 21450Sstevel@tonic-gate * A bus timeout or error occurred that was in user mode or not 21460Sstevel@tonic-gate * in a protected kernel code region. 21470Sstevel@tonic-gate */ 21480Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_BERR) { 21490Sstevel@tonic-gate cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 21500Sstevel@tonic-gate spf_flt, BERRTO_LFLAGS, NULL, 21510Sstevel@tonic-gate " Bus Error on System Bus in %s mode from", 21520Sstevel@tonic-gate aflt->flt_priv ? "privileged" : "user"); 21530Sstevel@tonic-gate } 21540Sstevel@tonic-gate 21550Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_TO) { 21560Sstevel@tonic-gate cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 21570Sstevel@tonic-gate spf_flt, BERRTO_LFLAGS, NULL, 21580Sstevel@tonic-gate " Timeout on System Bus in %s mode from", 21590Sstevel@tonic-gate aflt->flt_priv ? "privileged" : "user"); 21600Sstevel@tonic-gate } 21610Sstevel@tonic-gate 21620Sstevel@tonic-gate return; 21630Sstevel@tonic-gate 21640Sstevel@tonic-gate case CPU_PANIC_CP_ERR: 21650Sstevel@tonic-gate /* 21660Sstevel@tonic-gate * Process the Copyback (CP) error info (if any) obtained from 21670Sstevel@tonic-gate * polling all the cpus in the panic flow. This case is only 21680Sstevel@tonic-gate * entered if we are panicking. 21690Sstevel@tonic-gate */ 21700Sstevel@tonic-gate ASSERT(panicstr != NULL); 21710Sstevel@tonic-gate ASSERT(aflt->flt_id == panic_aflt.flt_id); 21720Sstevel@tonic-gate 21730Sstevel@tonic-gate /* See which space - this info may not exist */ 21740Sstevel@tonic-gate if (panic_aflt.flt_status & ECC_D_TRAP) 21750Sstevel@tonic-gate space = "Data "; 21760Sstevel@tonic-gate else if (panic_aflt.flt_status & ECC_I_TRAP) 21770Sstevel@tonic-gate space = "Instruction "; 21780Sstevel@tonic-gate else 21790Sstevel@tonic-gate space = ""; 21800Sstevel@tonic-gate 21810Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 21820Sstevel@tonic-gate " AFAR was derived from UE report," 21830Sstevel@tonic-gate " CP event on CPU%d (caused %saccess error on %s%d)", 21840Sstevel@tonic-gate aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 21850Sstevel@tonic-gate "IOBUS" : "CPU", panic_aflt.flt_bus_id); 21860Sstevel@tonic-gate 21870Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) 21880Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 21890Sstevel@tonic-gate else 21900Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 21910Sstevel@tonic-gate NULL, " No cache dump available"); 21920Sstevel@tonic-gate 21930Sstevel@tonic-gate return; 21940Sstevel@tonic-gate 21950Sstevel@tonic-gate case CPU_TRAPPING_CP_ERR: 21960Sstevel@tonic-gate /* 21970Sstevel@tonic-gate * For sabre only. This is a copyback ecache parity error due 21980Sstevel@tonic-gate * to a PCI DMA read. We should be panicking if we get here. 21990Sstevel@tonic-gate */ 22000Sstevel@tonic-gate ASSERT(panicstr != NULL); 22010Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 22020Sstevel@tonic-gate " AFAR was derived from UE report," 22030Sstevel@tonic-gate " CP event on CPU%d (caused Data access error " 22040Sstevel@tonic-gate "on PCIBus)", aflt->flt_inst); 22050Sstevel@tonic-gate return; 22060Sstevel@tonic-gate 22070Sstevel@tonic-gate /* 22080Sstevel@tonic-gate * We log the ecache lines of the following states, 22090Sstevel@tonic-gate * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 22100Sstevel@tonic-gate * dirty_bad_busy if ecache_scrub_verbose is set and panic 22110Sstevel@tonic-gate * in addition to logging if ecache_scrub_panic is set. 22120Sstevel@tonic-gate */ 22130Sstevel@tonic-gate case CPU_BADLINE_CI_ERR: 22140Sstevel@tonic-gate ecache_scrub_logstr = "CBI"; 22150Sstevel@tonic-gate /* FALLTHRU */ 22160Sstevel@tonic-gate 22170Sstevel@tonic-gate case CPU_BADLINE_CB_ERR: 22180Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 22190Sstevel@tonic-gate ecache_scrub_logstr = "CBB"; 22200Sstevel@tonic-gate /* FALLTHRU */ 22210Sstevel@tonic-gate 22220Sstevel@tonic-gate case CPU_BADLINE_DI_ERR: 22230Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 22240Sstevel@tonic-gate ecache_scrub_logstr = "DBI"; 22250Sstevel@tonic-gate /* FALLTHRU */ 22260Sstevel@tonic-gate 22270Sstevel@tonic-gate case CPU_BADLINE_DB_ERR: 22280Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 22290Sstevel@tonic-gate ecache_scrub_logstr = "DBB"; 22300Sstevel@tonic-gate 22310Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, 22320Sstevel@tonic-gate (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 22330Sstevel@tonic-gate " %s event on", ecache_scrub_logstr); 22340Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 22350Sstevel@tonic-gate 22360Sstevel@tonic-gate return; 22370Sstevel@tonic-gate 22380Sstevel@tonic-gate case CPU_ORPHAN_CP_ERR: 22390Sstevel@tonic-gate /* 22400Sstevel@tonic-gate * Orphan CPs, where the CP bit is set, but when a CPU 22410Sstevel@tonic-gate * doesn't report a UE. 22420Sstevel@tonic-gate */ 22430Sstevel@tonic-gate if (read_all_memscrub) 22440Sstevel@tonic-gate memscrub_run(); 22450Sstevel@tonic-gate 22460Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 22470Sstevel@tonic-gate NULL, " Orphan CP event on"); 22480Sstevel@tonic-gate 22490Sstevel@tonic-gate /* Log ecache info if exist */ 22500Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) 22510Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 22520Sstevel@tonic-gate else 22530Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, 22540Sstevel@tonic-gate (CP_LFLAGS | CPU_FLTCPU), NULL, 22550Sstevel@tonic-gate " No error found in ecache (No fault " 22560Sstevel@tonic-gate "PA available"); 22570Sstevel@tonic-gate return; 22580Sstevel@tonic-gate 22590Sstevel@tonic-gate case CPU_ECACHE_ADDR_PAR_ERR: 22600Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 22610Sstevel@tonic-gate " E$ Tag Address Parity error on"); 22620Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 22630Sstevel@tonic-gate return; 22640Sstevel@tonic-gate 22650Sstevel@tonic-gate case CPU_ECACHE_STATE_ERR: 22660Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 22670Sstevel@tonic-gate " E$ Tag State Parity error on"); 22680Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 22690Sstevel@tonic-gate return; 22700Sstevel@tonic-gate 22710Sstevel@tonic-gate case CPU_ECACHE_TAG_ERR: 22720Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 22730Sstevel@tonic-gate " E$ Tag scrub event on"); 22740Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 22750Sstevel@tonic-gate return; 22760Sstevel@tonic-gate 22770Sstevel@tonic-gate case CPU_ECACHE_ETP_ETS_ERR: 22780Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 22790Sstevel@tonic-gate " AFSR.ETP is set and AFSR.ETS is zero on"); 22800Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 22810Sstevel@tonic-gate return; 22820Sstevel@tonic-gate 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate case CPU_ADDITIONAL_ERR: 22850Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 22860Sstevel@tonic-gate " Additional errors detected during error processing on"); 22870Sstevel@tonic-gate return; 22880Sstevel@tonic-gate 22890Sstevel@tonic-gate default: 22900Sstevel@tonic-gate cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 22910Sstevel@tonic-gate "fault type %x", (void *)spf_flt, spf_flt->flt_type); 22920Sstevel@tonic-gate return; 22930Sstevel@tonic-gate } 22940Sstevel@tonic-gate 22950Sstevel@tonic-gate /* ... fall through from the UE, EDP, or LDP cases */ 22960Sstevel@tonic-gate 22970Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 22980Sstevel@tonic-gate if (!panicstr) { 2299*917Selowe (void) page_retire(aflt->flt_addr, PR_UE); 23000Sstevel@tonic-gate } else { 23010Sstevel@tonic-gate /* 23020Sstevel@tonic-gate * Clear UEs on panic so that we don't 23030Sstevel@tonic-gate * get haunted by them during panic or 23040Sstevel@tonic-gate * after reboot 23050Sstevel@tonic-gate */ 23060Sstevel@tonic-gate clearphys(P2ALIGN(aflt->flt_addr, 64), 23070Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size, 23080Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 23090Sstevel@tonic-gate 23100Sstevel@tonic-gate (void) clear_errors(NULL, NULL); 23110Sstevel@tonic-gate } 23120Sstevel@tonic-gate } 23130Sstevel@tonic-gate 23140Sstevel@tonic-gate /* 23150Sstevel@tonic-gate * Log final recover message 23160Sstevel@tonic-gate */ 23170Sstevel@tonic-gate if (!panicstr) { 23180Sstevel@tonic-gate if (!aflt->flt_priv) { 23190Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 23200Sstevel@tonic-gate NULL, " Above Error is in User Mode" 23210Sstevel@tonic-gate "\n and is fatal: " 23220Sstevel@tonic-gate "will SIGKILL process and notify contract"); 23230Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 23240Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 23250Sstevel@tonic-gate NULL, " Above Error detected while dumping core;" 23260Sstevel@tonic-gate "\n core file will be truncated"); 23270Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY) { 23280Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 23290Sstevel@tonic-gate NULL, " Above Error is due to Kernel access" 23300Sstevel@tonic-gate "\n to User space and is fatal: " 23310Sstevel@tonic-gate "will SIGKILL process and notify contract"); 23320Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_EC) { 23330Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 23340Sstevel@tonic-gate " Above Error detected by protected Kernel code" 23350Sstevel@tonic-gate "\n that will try to clear error from system"); 23360Sstevel@tonic-gate } 23370Sstevel@tonic-gate } 23380Sstevel@tonic-gate } 23390Sstevel@tonic-gate 23400Sstevel@tonic-gate 23410Sstevel@tonic-gate /* 23420Sstevel@tonic-gate * Check all cpus for non-trapping UE-causing errors 23430Sstevel@tonic-gate * In Ultra I/II, we look for copyback errors (CPs) 23440Sstevel@tonic-gate */ 23450Sstevel@tonic-gate void 23460Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt) 23470Sstevel@tonic-gate { 23480Sstevel@tonic-gate spitf_async_flt cp; 23490Sstevel@tonic-gate spitf_async_flt *spf_cpflt = &cp; 23500Sstevel@tonic-gate struct async_flt *cpflt = (struct async_flt *)&cp; 23510Sstevel@tonic-gate int pix; 23520Sstevel@tonic-gate 23530Sstevel@tonic-gate cpflt->flt_id = aflt->flt_id; 23540Sstevel@tonic-gate cpflt->flt_addr = aflt->flt_addr; 23550Sstevel@tonic-gate 23560Sstevel@tonic-gate for (pix = 0; pix < NCPU; pix++) { 23570Sstevel@tonic-gate if (CPU_XCALL_READY(pix)) { 23580Sstevel@tonic-gate xc_one(pix, (xcfunc_t *)get_cpu_status, 23590Sstevel@tonic-gate (uint64_t)cpflt, 0); 23600Sstevel@tonic-gate 23610Sstevel@tonic-gate if (cpflt->flt_stat & P_AFSR_CP) { 23620Sstevel@tonic-gate char *space; 23630Sstevel@tonic-gate 23640Sstevel@tonic-gate /* See which space - this info may not exist */ 23650Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 23660Sstevel@tonic-gate space = "Data "; 23670Sstevel@tonic-gate else if (aflt->flt_status & ECC_I_TRAP) 23680Sstevel@tonic-gate space = "Instruction "; 23690Sstevel@tonic-gate else 23700Sstevel@tonic-gate space = ""; 23710Sstevel@tonic-gate 23720Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 23730Sstevel@tonic-gate NULL, " AFAR was derived from UE report," 23740Sstevel@tonic-gate " CP event on CPU%d (caused %saccess " 23750Sstevel@tonic-gate "error on %s%d)", pix, space, 23760Sstevel@tonic-gate (aflt->flt_status & ECC_IOBUS) ? 23770Sstevel@tonic-gate "IOBUS" : "CPU", aflt->flt_bus_id); 23780Sstevel@tonic-gate 23790Sstevel@tonic-gate if (spf_cpflt->flt_ec_lcnt > 0) 23800Sstevel@tonic-gate cpu_log_ecmem_info(spf_cpflt); 23810Sstevel@tonic-gate else 23820Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_cpflt, 23830Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 23840Sstevel@tonic-gate " No cache dump available"); 23850Sstevel@tonic-gate } 23860Sstevel@tonic-gate } 23870Sstevel@tonic-gate } 23880Sstevel@tonic-gate } 23890Sstevel@tonic-gate 23900Sstevel@tonic-gate #ifdef DEBUG 23910Sstevel@tonic-gate int test_mp_cp = 0; 23920Sstevel@tonic-gate #endif 23930Sstevel@tonic-gate 23940Sstevel@tonic-gate /* 23950Sstevel@tonic-gate * Cross-call callback routine to tell a CPU to read its own %afsr to check 23960Sstevel@tonic-gate * for copyback errors and capture relevant information. 23970Sstevel@tonic-gate */ 23980Sstevel@tonic-gate static uint_t 23990Sstevel@tonic-gate get_cpu_status(uint64_t arg) 24000Sstevel@tonic-gate { 24010Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)arg; 24020Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 24030Sstevel@tonic-gate uint64_t afsr; 24040Sstevel@tonic-gate uint32_t ec_idx; 24050Sstevel@tonic-gate uint64_t sdbh, sdbl; 24060Sstevel@tonic-gate int i; 24070Sstevel@tonic-gate uint32_t ec_set_size; 24080Sstevel@tonic-gate uchar_t valid; 24090Sstevel@tonic-gate ec_data_t ec_data[8]; 24100Sstevel@tonic-gate uint64_t ec_tag, flt_addr_tag, oafsr; 24110Sstevel@tonic-gate uint64_t *acc_afsr = NULL; 24120Sstevel@tonic-gate 24130Sstevel@tonic-gate get_asyncflt(&afsr); 24140Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) { 24150Sstevel@tonic-gate acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 24160Sstevel@tonic-gate afsr |= *acc_afsr; 24170Sstevel@tonic-gate *acc_afsr = 0; 24180Sstevel@tonic-gate } 24190Sstevel@tonic-gate 24200Sstevel@tonic-gate #ifdef DEBUG 24210Sstevel@tonic-gate if (test_mp_cp) 24220Sstevel@tonic-gate afsr |= P_AFSR_CP; 24230Sstevel@tonic-gate #endif 24240Sstevel@tonic-gate aflt->flt_stat = afsr; 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate if (afsr & P_AFSR_CP) { 24270Sstevel@tonic-gate /* 24280Sstevel@tonic-gate * Capture the UDBs 24290Sstevel@tonic-gate */ 24300Sstevel@tonic-gate get_udb_errors(&sdbh, &sdbl); 24310Sstevel@tonic-gate spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 24320Sstevel@tonic-gate spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 24330Sstevel@tonic-gate 24340Sstevel@tonic-gate /* 24350Sstevel@tonic-gate * Clear CP bit before capturing ecache data 24360Sstevel@tonic-gate * and AFSR info. 24370Sstevel@tonic-gate */ 24380Sstevel@tonic-gate set_asyncflt(P_AFSR_CP); 24390Sstevel@tonic-gate 24400Sstevel@tonic-gate /* 24410Sstevel@tonic-gate * See if we can capture the ecache line for the 24420Sstevel@tonic-gate * fault PA. 24430Sstevel@tonic-gate * 24440Sstevel@tonic-gate * Return a valid matching ecache line, if any. 24450Sstevel@tonic-gate * Otherwise, return the first matching ecache 24460Sstevel@tonic-gate * line marked invalid. 24470Sstevel@tonic-gate */ 24480Sstevel@tonic-gate flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 24490Sstevel@tonic-gate ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 24500Sstevel@tonic-gate ecache_associativity; 24510Sstevel@tonic-gate spf_flt->flt_ec_lcnt = 0; 24520Sstevel@tonic-gate 24530Sstevel@tonic-gate for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 24540Sstevel@tonic-gate i < ecache_associativity; i++, ec_idx += ec_set_size) { 24550Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(ec_idx, 64), 24560Sstevel@tonic-gate (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 24570Sstevel@tonic-gate acc_afsr); 24580Sstevel@tonic-gate 24590Sstevel@tonic-gate if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 24600Sstevel@tonic-gate continue; 24610Sstevel@tonic-gate 24620Sstevel@tonic-gate valid = cpu_ec_state_valid & 24630Sstevel@tonic-gate (uchar_t)((ec_tag & cpu_ec_state_mask) >> 24640Sstevel@tonic-gate cpu_ec_state_shift); 24650Sstevel@tonic-gate 24660Sstevel@tonic-gate if (valid || spf_flt->flt_ec_lcnt == 0) { 24670Sstevel@tonic-gate spf_flt->flt_ec_tag = ec_tag; 24680Sstevel@tonic-gate bcopy(&ec_data, &spf_flt->flt_ec_data, 24690Sstevel@tonic-gate sizeof (ec_data)); 24700Sstevel@tonic-gate spf_flt->flt_ec_lcnt = 1; 24710Sstevel@tonic-gate 24720Sstevel@tonic-gate if (valid) 24730Sstevel@tonic-gate break; 24740Sstevel@tonic-gate } 24750Sstevel@tonic-gate } 24760Sstevel@tonic-gate } 24770Sstevel@tonic-gate return (0); 24780Sstevel@tonic-gate } 24790Sstevel@tonic-gate 24800Sstevel@tonic-gate /* 24810Sstevel@tonic-gate * CPU-module callback for the non-panicking CPUs. This routine is invoked 24820Sstevel@tonic-gate * from panic_idle() as part of the other CPUs stopping themselves when a 24830Sstevel@tonic-gate * panic occurs. We need to be VERY careful what we do here, since panicstr 24840Sstevel@tonic-gate * is NOT set yet and we cannot blow through locks. If panic_aflt is set 24850Sstevel@tonic-gate * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 24860Sstevel@tonic-gate * CP error information. 24870Sstevel@tonic-gate */ 24880Sstevel@tonic-gate void 24890Sstevel@tonic-gate cpu_async_panic_callb(void) 24900Sstevel@tonic-gate { 24910Sstevel@tonic-gate spitf_async_flt cp; 24920Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)&cp; 24930Sstevel@tonic-gate uint64_t *scrub_afsr; 24940Sstevel@tonic-gate 24950Sstevel@tonic-gate if (panic_aflt.flt_id != 0) { 24960Sstevel@tonic-gate aflt->flt_addr = panic_aflt.flt_addr; 24970Sstevel@tonic-gate (void) get_cpu_status((uint64_t)aflt); 24980Sstevel@tonic-gate 24990Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) { 25000Sstevel@tonic-gate scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 25010Sstevel@tonic-gate if (*scrub_afsr & P_AFSR_CP) { 25020Sstevel@tonic-gate aflt->flt_stat |= *scrub_afsr; 25030Sstevel@tonic-gate *scrub_afsr = 0; 25040Sstevel@tonic-gate } 25050Sstevel@tonic-gate } 25060Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_CP) { 25070Sstevel@tonic-gate aflt->flt_id = panic_aflt.flt_id; 25080Sstevel@tonic-gate aflt->flt_panic = 1; 25090Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 25100Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 25110Sstevel@tonic-gate cp.flt_type = CPU_PANIC_CP_ERR; 25120Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 25130Sstevel@tonic-gate (void *)&cp, sizeof (cp), ue_queue, 25140Sstevel@tonic-gate aflt->flt_panic); 25150Sstevel@tonic-gate } 25160Sstevel@tonic-gate } 25170Sstevel@tonic-gate } 25180Sstevel@tonic-gate 25190Sstevel@tonic-gate /* 25200Sstevel@tonic-gate * Turn off all cpu error detection, normally only used for panics. 25210Sstevel@tonic-gate */ 25220Sstevel@tonic-gate void 25230Sstevel@tonic-gate cpu_disable_errors(void) 25240Sstevel@tonic-gate { 25250Sstevel@tonic-gate xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 25260Sstevel@tonic-gate } 25270Sstevel@tonic-gate 25280Sstevel@tonic-gate /* 25290Sstevel@tonic-gate * Enable errors. 25300Sstevel@tonic-gate */ 25310Sstevel@tonic-gate void 25320Sstevel@tonic-gate cpu_enable_errors(void) 25330Sstevel@tonic-gate { 25340Sstevel@tonic-gate xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 25350Sstevel@tonic-gate } 25360Sstevel@tonic-gate 25370Sstevel@tonic-gate static void 25380Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 25390Sstevel@tonic-gate { 25400Sstevel@tonic-gate uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 25410Sstevel@tonic-gate int i, loop = 1; 25420Sstevel@tonic-gate ushort_t ecc_0; 25430Sstevel@tonic-gate uint64_t paddr; 25440Sstevel@tonic-gate uint64_t data; 25450Sstevel@tonic-gate 25460Sstevel@tonic-gate if (verbose) 25470Sstevel@tonic-gate loop = 8; 25480Sstevel@tonic-gate for (i = 0; i < loop; i++) { 25490Sstevel@tonic-gate paddr = aligned_addr + (i * 8); 25500Sstevel@tonic-gate data = lddphys(paddr); 25510Sstevel@tonic-gate if (verbose) { 25520Sstevel@tonic-gate if (ce_err) { 25530Sstevel@tonic-gate ecc_0 = ecc_gen((uint32_t)(data>>32), 25540Sstevel@tonic-gate (uint32_t)data); 25550Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 25560Sstevel@tonic-gate NULL, " Paddr 0x%" PRIx64 ", " 25570Sstevel@tonic-gate "Data 0x%08x.%08x, ECC 0x%x", paddr, 25580Sstevel@tonic-gate (uint32_t)(data>>32), (uint32_t)data, ecc_0); 25590Sstevel@tonic-gate } else { 25600Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 25610Sstevel@tonic-gate NULL, " Paddr 0x%" PRIx64 ", " 25620Sstevel@tonic-gate "Data 0x%08x.%08x", paddr, 25630Sstevel@tonic-gate (uint32_t)(data>>32), (uint32_t)data); 25640Sstevel@tonic-gate } 25650Sstevel@tonic-gate } 25660Sstevel@tonic-gate } 25670Sstevel@tonic-gate } 25680Sstevel@tonic-gate 25690Sstevel@tonic-gate static struct { /* sec-ded-s4ed ecc code */ 25700Sstevel@tonic-gate uint_t hi, lo; 25710Sstevel@tonic-gate } ecc_code[8] = { 25720Sstevel@tonic-gate { 0xee55de23U, 0x16161161U }, 25730Sstevel@tonic-gate { 0x55eede93U, 0x61612212U }, 25740Sstevel@tonic-gate { 0xbb557b8cU, 0x49494494U }, 25750Sstevel@tonic-gate { 0x55bb7b6cU, 0x94948848U }, 25760Sstevel@tonic-gate { 0x16161161U, 0xee55de23U }, 25770Sstevel@tonic-gate { 0x61612212U, 0x55eede93U }, 25780Sstevel@tonic-gate { 0x49494494U, 0xbb557b8cU }, 25790Sstevel@tonic-gate { 0x94948848U, 0x55bb7b6cU } 25800Sstevel@tonic-gate }; 25810Sstevel@tonic-gate 25820Sstevel@tonic-gate static ushort_t 25830Sstevel@tonic-gate ecc_gen(uint_t high_bytes, uint_t low_bytes) 25840Sstevel@tonic-gate { 25850Sstevel@tonic-gate int i, j; 25860Sstevel@tonic-gate uchar_t checker, bit_mask; 25870Sstevel@tonic-gate struct { 25880Sstevel@tonic-gate uint_t hi, lo; 25890Sstevel@tonic-gate } hex_data, masked_data[8]; 25900Sstevel@tonic-gate 25910Sstevel@tonic-gate hex_data.hi = high_bytes; 25920Sstevel@tonic-gate hex_data.lo = low_bytes; 25930Sstevel@tonic-gate 25940Sstevel@tonic-gate /* mask out bits according to sec-ded-s4ed ecc code */ 25950Sstevel@tonic-gate for (i = 0; i < 8; i++) { 25960Sstevel@tonic-gate masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 25970Sstevel@tonic-gate masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 25980Sstevel@tonic-gate } 25990Sstevel@tonic-gate 26000Sstevel@tonic-gate /* 26010Sstevel@tonic-gate * xor all bits in masked_data[i] to get bit_i of checker, 26020Sstevel@tonic-gate * where i = 0 to 7 26030Sstevel@tonic-gate */ 26040Sstevel@tonic-gate checker = 0; 26050Sstevel@tonic-gate for (i = 0; i < 8; i++) { 26060Sstevel@tonic-gate bit_mask = 1 << i; 26070Sstevel@tonic-gate for (j = 0; j < 32; j++) { 26080Sstevel@tonic-gate if (masked_data[i].lo & 1) checker ^= bit_mask; 26090Sstevel@tonic-gate if (masked_data[i].hi & 1) checker ^= bit_mask; 26100Sstevel@tonic-gate masked_data[i].hi >>= 1; 26110Sstevel@tonic-gate masked_data[i].lo >>= 1; 26120Sstevel@tonic-gate } 26130Sstevel@tonic-gate } 26140Sstevel@tonic-gate return (checker); 26150Sstevel@tonic-gate } 26160Sstevel@tonic-gate 26170Sstevel@tonic-gate /* 26180Sstevel@tonic-gate * Flush the entire ecache using displacement flush by reading through a 26190Sstevel@tonic-gate * physical address range as large as the ecache. 26200Sstevel@tonic-gate */ 26210Sstevel@tonic-gate void 26220Sstevel@tonic-gate cpu_flush_ecache(void) 26230Sstevel@tonic-gate { 26240Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 26250Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 26260Sstevel@tonic-gate } 26270Sstevel@tonic-gate 26280Sstevel@tonic-gate /* 26290Sstevel@tonic-gate * read and display the data in the cache line where the 26300Sstevel@tonic-gate * original ce error occurred. 26310Sstevel@tonic-gate * This routine is mainly used for debugging new hardware. 26320Sstevel@tonic-gate */ 26330Sstevel@tonic-gate void 26340Sstevel@tonic-gate read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 26350Sstevel@tonic-gate { 26360Sstevel@tonic-gate kpreempt_disable(); 26370Sstevel@tonic-gate /* disable ECC error traps */ 26380Sstevel@tonic-gate set_error_enable(EER_ECC_DISABLE); 26390Sstevel@tonic-gate 26400Sstevel@tonic-gate /* 26410Sstevel@tonic-gate * flush the ecache 26420Sstevel@tonic-gate * read the data 26430Sstevel@tonic-gate * check to see if an ECC error occured 26440Sstevel@tonic-gate */ 26450Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 26460Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 26470Sstevel@tonic-gate set_lsu(get_lsu() | cache_boot_state); 26480Sstevel@tonic-gate cpu_read_paddr(ecc, verbose, ce_err); 26490Sstevel@tonic-gate (void) check_ecc(ecc); 26500Sstevel@tonic-gate 26510Sstevel@tonic-gate /* enable ECC error traps */ 26520Sstevel@tonic-gate set_error_enable(EER_ENABLE); 26530Sstevel@tonic-gate kpreempt_enable(); 26540Sstevel@tonic-gate } 26550Sstevel@tonic-gate 26560Sstevel@tonic-gate /* 26570Sstevel@tonic-gate * Check the AFSR bits for UE/CE persistence. 26580Sstevel@tonic-gate * If UE or CE errors are detected, the routine will 26590Sstevel@tonic-gate * clears all the AFSR sticky bits (except CP for 26600Sstevel@tonic-gate * spitfire/blackbird) and the UDBs. 26610Sstevel@tonic-gate * if ce_debug or ue_debug is set, log any ue/ce errors detected. 26620Sstevel@tonic-gate */ 26630Sstevel@tonic-gate static int 26640Sstevel@tonic-gate check_ecc(struct async_flt *ecc) 26650Sstevel@tonic-gate { 26660Sstevel@tonic-gate uint64_t t_afsr; 26670Sstevel@tonic-gate uint64_t t_afar; 26680Sstevel@tonic-gate uint64_t udbh; 26690Sstevel@tonic-gate uint64_t udbl; 26700Sstevel@tonic-gate ushort_t udb; 26710Sstevel@tonic-gate int persistent = 0; 26720Sstevel@tonic-gate 26730Sstevel@tonic-gate /* 26740Sstevel@tonic-gate * Capture the AFSR, AFAR and UDBs info 26750Sstevel@tonic-gate */ 26760Sstevel@tonic-gate get_asyncflt(&t_afsr); 26770Sstevel@tonic-gate get_asyncaddr(&t_afar); 26780Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; 26790Sstevel@tonic-gate get_udb_errors(&udbh, &udbl); 26800Sstevel@tonic-gate 26810Sstevel@tonic-gate if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 26820Sstevel@tonic-gate /* 26830Sstevel@tonic-gate * Clear the errors 26840Sstevel@tonic-gate */ 26850Sstevel@tonic-gate clr_datapath(); 26860Sstevel@tonic-gate 26870Sstevel@tonic-gate if (isus2i || isus2e) 26880Sstevel@tonic-gate set_asyncflt(t_afsr); 26890Sstevel@tonic-gate else 26900Sstevel@tonic-gate set_asyncflt(t_afsr & ~P_AFSR_CP); 26910Sstevel@tonic-gate 26920Sstevel@tonic-gate /* 26930Sstevel@tonic-gate * determine whether to check UDBH or UDBL for persistence 26940Sstevel@tonic-gate */ 26950Sstevel@tonic-gate if (ecc->flt_synd & UDBL_REG) { 26960Sstevel@tonic-gate udb = (ushort_t)udbl; 26970Sstevel@tonic-gate t_afar |= 0x8; 26980Sstevel@tonic-gate } else { 26990Sstevel@tonic-gate udb = (ushort_t)udbh; 27000Sstevel@tonic-gate } 27010Sstevel@tonic-gate 27020Sstevel@tonic-gate if (ce_debug || ue_debug) { 27030Sstevel@tonic-gate spitf_async_flt spf_flt; /* for logging */ 27040Sstevel@tonic-gate struct async_flt *aflt = 27050Sstevel@tonic-gate (struct async_flt *)&spf_flt; 27060Sstevel@tonic-gate 27070Sstevel@tonic-gate /* Package the info nicely in the spf_flt struct */ 27080Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 27090Sstevel@tonic-gate aflt->flt_stat = t_afsr; 27100Sstevel@tonic-gate aflt->flt_addr = t_afar; 27110Sstevel@tonic-gate spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 27120Sstevel@tonic-gate spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 27130Sstevel@tonic-gate 27140Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 27150Sstevel@tonic-gate CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 27160Sstevel@tonic-gate " check_ecc: Dumping captured error states ..."); 27170Sstevel@tonic-gate } 27180Sstevel@tonic-gate 27190Sstevel@tonic-gate /* 27200Sstevel@tonic-gate * if the fault addresses don't match, not persistent 27210Sstevel@tonic-gate */ 27220Sstevel@tonic-gate if (t_afar != ecc->flt_addr) { 27230Sstevel@tonic-gate return (persistent); 27240Sstevel@tonic-gate } 27250Sstevel@tonic-gate 27260Sstevel@tonic-gate /* 27270Sstevel@tonic-gate * check for UE persistence 27280Sstevel@tonic-gate * since all DIMMs in the bank are identified for a UE, 27290Sstevel@tonic-gate * there's no reason to check the syndrome 27300Sstevel@tonic-gate */ 27310Sstevel@tonic-gate if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 27320Sstevel@tonic-gate persistent = 1; 27330Sstevel@tonic-gate } 27340Sstevel@tonic-gate 27350Sstevel@tonic-gate /* 27360Sstevel@tonic-gate * check for CE persistence 27370Sstevel@tonic-gate */ 27380Sstevel@tonic-gate if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 27390Sstevel@tonic-gate if ((udb & P_DER_E_SYND) == 27400Sstevel@tonic-gate (ecc->flt_synd & P_DER_E_SYND)) { 27410Sstevel@tonic-gate persistent = 1; 27420Sstevel@tonic-gate } 27430Sstevel@tonic-gate } 27440Sstevel@tonic-gate } 27450Sstevel@tonic-gate return (persistent); 27460Sstevel@tonic-gate } 27470Sstevel@tonic-gate 27480Sstevel@tonic-gate #ifdef HUMMINGBIRD 27490Sstevel@tonic-gate #define HB_FULL_DIV 1 27500Sstevel@tonic-gate #define HB_HALF_DIV 2 27510Sstevel@tonic-gate #define HB_LOWEST_DIV 8 27520Sstevel@tonic-gate #define HB_ECLK_INVALID 0xdeadbad 27530Sstevel@tonic-gate static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 27540Sstevel@tonic-gate HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 27550Sstevel@tonic-gate HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 27560Sstevel@tonic-gate HB_ECLK_8 }; 27570Sstevel@tonic-gate 27580Sstevel@tonic-gate #define HB_SLOW_DOWN 0 27590Sstevel@tonic-gate #define HB_SPEED_UP 1 27600Sstevel@tonic-gate 27610Sstevel@tonic-gate #define SET_ESTAR_MODE(mode) \ 27620Sstevel@tonic-gate stdphysio(HB_ESTAR_MODE, (mode)); \ 27630Sstevel@tonic-gate /* \ 27640Sstevel@tonic-gate * PLL logic requires minimum of 16 clock \ 27650Sstevel@tonic-gate * cycles to lock to the new clock speed. \ 27660Sstevel@tonic-gate * Wait 1 usec to satisfy this requirement. \ 27670Sstevel@tonic-gate */ \ 27680Sstevel@tonic-gate drv_usecwait(1); 27690Sstevel@tonic-gate 27700Sstevel@tonic-gate #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 27710Sstevel@tonic-gate { \ 27720Sstevel@tonic-gate volatile uint64_t data; \ 27730Sstevel@tonic-gate uint64_t count, new_count; \ 27740Sstevel@tonic-gate clock_t delay; \ 27750Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 27760Sstevel@tonic-gate count = (data & HB_REFRESH_COUNT_MASK) >> \ 27770Sstevel@tonic-gate HB_REFRESH_COUNT_SHIFT; \ 27780Sstevel@tonic-gate new_count = (HB_REFRESH_INTERVAL * \ 27790Sstevel@tonic-gate cpunodes[CPU->cpu_id].clock_freq) / \ 27800Sstevel@tonic-gate (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 27810Sstevel@tonic-gate data = (data & ~HB_REFRESH_COUNT_MASK) | \ 27820Sstevel@tonic-gate (new_count << HB_REFRESH_COUNT_SHIFT); \ 27830Sstevel@tonic-gate stdphysio(HB_MEM_CNTRL0, data); \ 27840Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 27850Sstevel@tonic-gate /* \ 27860Sstevel@tonic-gate * If we are slowing down the cpu and Memory \ 27870Sstevel@tonic-gate * Self Refresh is not enabled, it is required \ 27880Sstevel@tonic-gate * to wait for old refresh count to count-down and \ 27890Sstevel@tonic-gate * new refresh count to go into effect (let new value \ 27900Sstevel@tonic-gate * counts down once). \ 27910Sstevel@tonic-gate */ \ 27920Sstevel@tonic-gate if ((direction) == HB_SLOW_DOWN && \ 27930Sstevel@tonic-gate (data & HB_SELF_REFRESH_MASK) == 0) { \ 27940Sstevel@tonic-gate /* \ 27950Sstevel@tonic-gate * Each count takes 64 cpu clock cycles \ 27960Sstevel@tonic-gate * to decrement. Wait for current refresh \ 27970Sstevel@tonic-gate * count plus new refresh count at current \ 27980Sstevel@tonic-gate * cpu speed to count down to zero. Round \ 27990Sstevel@tonic-gate * up the delay time. \ 28000Sstevel@tonic-gate */ \ 28010Sstevel@tonic-gate delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 28020Sstevel@tonic-gate (count + new_count) * MICROSEC * (cur_div)) /\ 28030Sstevel@tonic-gate cpunodes[CPU->cpu_id].clock_freq) + 1; \ 28040Sstevel@tonic-gate drv_usecwait(delay); \ 28050Sstevel@tonic-gate } \ 28060Sstevel@tonic-gate } 28070Sstevel@tonic-gate 28080Sstevel@tonic-gate #define SET_SELF_REFRESH(bit) \ 28090Sstevel@tonic-gate { \ 28100Sstevel@tonic-gate volatile uint64_t data; \ 28110Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 28120Sstevel@tonic-gate data = (data & ~HB_SELF_REFRESH_MASK) | \ 28130Sstevel@tonic-gate ((bit) << HB_SELF_REFRESH_SHIFT); \ 28140Sstevel@tonic-gate stdphysio(HB_MEM_CNTRL0, data); \ 28150Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 28160Sstevel@tonic-gate } 28170Sstevel@tonic-gate #endif /* HUMMINGBIRD */ 28180Sstevel@tonic-gate 28190Sstevel@tonic-gate /* ARGSUSED */ 28200Sstevel@tonic-gate void 28210Sstevel@tonic-gate cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 28220Sstevel@tonic-gate { 28230Sstevel@tonic-gate #ifdef HUMMINGBIRD 28240Sstevel@tonic-gate uint64_t cur_mask, cur_divisor = 0; 28250Sstevel@tonic-gate volatile uint64_t reg; 28260Sstevel@tonic-gate int index; 28270Sstevel@tonic-gate 28280Sstevel@tonic-gate if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 28290Sstevel@tonic-gate (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 28300Sstevel@tonic-gate cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 28310Sstevel@tonic-gate new_divisor); 28320Sstevel@tonic-gate return; 28330Sstevel@tonic-gate } 28340Sstevel@tonic-gate 28350Sstevel@tonic-gate reg = lddphysio(HB_ESTAR_MODE); 28360Sstevel@tonic-gate cur_mask = reg & HB_ECLK_MASK; 28370Sstevel@tonic-gate for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 28380Sstevel@tonic-gate if (hb_eclk[index] == cur_mask) { 28390Sstevel@tonic-gate cur_divisor = index; 28400Sstevel@tonic-gate break; 28410Sstevel@tonic-gate } 28420Sstevel@tonic-gate } 28430Sstevel@tonic-gate 28440Sstevel@tonic-gate if (cur_divisor == 0) 28450Sstevel@tonic-gate cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 28460Sstevel@tonic-gate "can't be determined!"); 28470Sstevel@tonic-gate 28480Sstevel@tonic-gate /* 28490Sstevel@tonic-gate * If we are already at the requested divisor speed, just 28500Sstevel@tonic-gate * return. 28510Sstevel@tonic-gate */ 28520Sstevel@tonic-gate if (cur_divisor == new_divisor) 28530Sstevel@tonic-gate return; 28540Sstevel@tonic-gate 28550Sstevel@tonic-gate if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 28560Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 28570Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 28580Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 28590Sstevel@tonic-gate 28600Sstevel@tonic-gate } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 28610Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 28620Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 28630Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 28640Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 28650Sstevel@tonic-gate 28660Sstevel@tonic-gate } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 28670Sstevel@tonic-gate /* 28680Sstevel@tonic-gate * Transition to 1/2 speed first, then to 28690Sstevel@tonic-gate * lower speed. 28700Sstevel@tonic-gate */ 28710Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 28720Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 28730Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 28740Sstevel@tonic-gate 28750Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 28760Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 28770Sstevel@tonic-gate 28780Sstevel@tonic-gate } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 28790Sstevel@tonic-gate /* 28800Sstevel@tonic-gate * Transition to 1/2 speed first, then to 28810Sstevel@tonic-gate * full speed. 28820Sstevel@tonic-gate */ 28830Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 28840Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 28850Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 28860Sstevel@tonic-gate 28870Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 28880Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 28890Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 28900Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 28910Sstevel@tonic-gate 28920Sstevel@tonic-gate } else if (cur_divisor < new_divisor) { 28930Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 28940Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 28950Sstevel@tonic-gate 28960Sstevel@tonic-gate } else if (cur_divisor > new_divisor) { 28970Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 28980Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 28990Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 29000Sstevel@tonic-gate } 29010Sstevel@tonic-gate CPU->cpu_m.divisor = (uchar_t)new_divisor; 29020Sstevel@tonic-gate #endif 29030Sstevel@tonic-gate } 29040Sstevel@tonic-gate 29050Sstevel@tonic-gate /* 29060Sstevel@tonic-gate * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 29070Sstevel@tonic-gate * we clear all the sticky bits. If a non-null pointer to a async fault 29080Sstevel@tonic-gate * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 29090Sstevel@tonic-gate * info will be returned in the structure. If a non-null pointer to a 29100Sstevel@tonic-gate * uint64_t is passed in, this will be updated if the CP bit is set in the 29110Sstevel@tonic-gate * AFSR. The afsr will be returned. 29120Sstevel@tonic-gate */ 29130Sstevel@tonic-gate static uint64_t 29140Sstevel@tonic-gate clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 29150Sstevel@tonic-gate { 29160Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 29170Sstevel@tonic-gate uint64_t afsr; 29180Sstevel@tonic-gate uint64_t udbh, udbl; 29190Sstevel@tonic-gate 29200Sstevel@tonic-gate get_asyncflt(&afsr); 29210Sstevel@tonic-gate 29220Sstevel@tonic-gate if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 29230Sstevel@tonic-gate *acc_afsr |= afsr; 29240Sstevel@tonic-gate 29250Sstevel@tonic-gate if (spf_flt != NULL) { 29260Sstevel@tonic-gate aflt->flt_stat = afsr; 29270Sstevel@tonic-gate get_asyncaddr(&aflt->flt_addr); 29280Sstevel@tonic-gate aflt->flt_addr &= SABRE_AFAR_PA; 29290Sstevel@tonic-gate 29300Sstevel@tonic-gate get_udb_errors(&udbh, &udbl); 29310Sstevel@tonic-gate spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 29320Sstevel@tonic-gate spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 29330Sstevel@tonic-gate } 29340Sstevel@tonic-gate 29350Sstevel@tonic-gate set_asyncflt(afsr); /* clear afsr */ 29360Sstevel@tonic-gate clr_datapath(); /* clear udbs */ 29370Sstevel@tonic-gate return (afsr); 29380Sstevel@tonic-gate } 29390Sstevel@tonic-gate 29400Sstevel@tonic-gate /* 29410Sstevel@tonic-gate * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 29420Sstevel@tonic-gate * tag of the first bad line will be returned. We also return the old-afsr 29430Sstevel@tonic-gate * (before clearing the sticky bits). The linecnt data will be updated to 29440Sstevel@tonic-gate * indicate the number of bad lines detected. 29450Sstevel@tonic-gate */ 29460Sstevel@tonic-gate static void 29470Sstevel@tonic-gate scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 29480Sstevel@tonic-gate uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 29490Sstevel@tonic-gate { 29500Sstevel@tonic-gate ec_data_t t_ecdata[8]; 29510Sstevel@tonic-gate uint64_t t_etag, oafsr; 29520Sstevel@tonic-gate uint64_t pa = AFLT_INV_ADDR; 29530Sstevel@tonic-gate uint32_t i, j, ecache_sz; 29540Sstevel@tonic-gate uint64_t acc_afsr = 0; 29550Sstevel@tonic-gate uint64_t *cpu_afsr = NULL; 29560Sstevel@tonic-gate 29570Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) 29580Sstevel@tonic-gate cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 29590Sstevel@tonic-gate 29600Sstevel@tonic-gate *linecnt = 0; 29610Sstevel@tonic-gate ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 29620Sstevel@tonic-gate 29630Sstevel@tonic-gate for (i = 0; i < ecache_sz; i += 64) { 29640Sstevel@tonic-gate get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 29650Sstevel@tonic-gate cpu_afsr); 29660Sstevel@tonic-gate acc_afsr |= oafsr; 29670Sstevel@tonic-gate 29680Sstevel@tonic-gate /* 29690Sstevel@tonic-gate * Scan through the whole 64 bytes line in 8 8-byte chunks 29700Sstevel@tonic-gate * looking for the first occurrence of an EDP error. The AFSR 29710Sstevel@tonic-gate * info is captured for each 8-byte chunk. Note that for 29720Sstevel@tonic-gate * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 29730Sstevel@tonic-gate * 16-byte chunk granularity (i.e. the AFSR will be the same 29740Sstevel@tonic-gate * for the high and low 8-byte words within the 16-byte chunk). 29750Sstevel@tonic-gate * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 29760Sstevel@tonic-gate * granularity and only PSYND bits [7:0] are used. 29770Sstevel@tonic-gate */ 29780Sstevel@tonic-gate for (j = 0; j < 8; j++) { 29790Sstevel@tonic-gate ec_data_t *ecdptr = &t_ecdata[j]; 29800Sstevel@tonic-gate 29810Sstevel@tonic-gate if (ecdptr->ec_afsr & P_AFSR_EDP) { 29820Sstevel@tonic-gate uint64_t errpa; 29830Sstevel@tonic-gate ushort_t psynd; 29840Sstevel@tonic-gate uint32_t ec_set_size = ecache_sz / 29850Sstevel@tonic-gate ecache_associativity; 29860Sstevel@tonic-gate 29870Sstevel@tonic-gate /* 29880Sstevel@tonic-gate * For Spitfire/Blackbird, we need to look at 29890Sstevel@tonic-gate * the PSYND to make sure that this 8-byte chunk 29900Sstevel@tonic-gate * is the right one. PSYND bits [15:8] belong 29910Sstevel@tonic-gate * to the upper 8-byte (even) chunk. Bits 29920Sstevel@tonic-gate * [7:0] belong to the lower 8-byte chunk (odd). 29930Sstevel@tonic-gate */ 29940Sstevel@tonic-gate psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 29950Sstevel@tonic-gate if (!isus2i && !isus2e) { 29960Sstevel@tonic-gate if (j & 0x1) 29970Sstevel@tonic-gate psynd = psynd & 0xFF; 29980Sstevel@tonic-gate else 29990Sstevel@tonic-gate psynd = psynd >> 8; 30000Sstevel@tonic-gate 30010Sstevel@tonic-gate if (!psynd) 30020Sstevel@tonic-gate continue; /* wrong chunk */ 30030Sstevel@tonic-gate } 30040Sstevel@tonic-gate 30050Sstevel@tonic-gate /* Construct the PA */ 30060Sstevel@tonic-gate errpa = ((t_etag & cpu_ec_tag_mask) << 30070Sstevel@tonic-gate cpu_ec_tag_shift) | ((i | (j << 3)) % 30080Sstevel@tonic-gate ec_set_size); 30090Sstevel@tonic-gate 30100Sstevel@tonic-gate /* clean up the cache line */ 30110Sstevel@tonic-gate flushecacheline(P2ALIGN(errpa, 64), 30120Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 30130Sstevel@tonic-gate 30140Sstevel@tonic-gate oafsr = clear_errors(NULL, cpu_afsr); 30150Sstevel@tonic-gate acc_afsr |= oafsr; 30160Sstevel@tonic-gate 30170Sstevel@tonic-gate (*linecnt)++; 30180Sstevel@tonic-gate 30190Sstevel@tonic-gate /* 30200Sstevel@tonic-gate * Capture the PA for the first bad line found. 30210Sstevel@tonic-gate * Return the ecache dump and tag info. 30220Sstevel@tonic-gate */ 30230Sstevel@tonic-gate if (pa == AFLT_INV_ADDR) { 30240Sstevel@tonic-gate int k; 30250Sstevel@tonic-gate 30260Sstevel@tonic-gate pa = errpa; 30270Sstevel@tonic-gate for (k = 0; k < 8; k++) 30280Sstevel@tonic-gate ecache_data[k] = t_ecdata[k]; 30290Sstevel@tonic-gate *ecache_tag = t_etag; 30300Sstevel@tonic-gate } 30310Sstevel@tonic-gate break; 30320Sstevel@tonic-gate } 30330Sstevel@tonic-gate } 30340Sstevel@tonic-gate } 30350Sstevel@tonic-gate *t_afar = pa; 30360Sstevel@tonic-gate *t_afsr = acc_afsr; 30370Sstevel@tonic-gate } 30380Sstevel@tonic-gate 30390Sstevel@tonic-gate static void 30400Sstevel@tonic-gate cpu_log_ecmem_info(spitf_async_flt *spf_flt) 30410Sstevel@tonic-gate { 30420Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 30430Sstevel@tonic-gate uint64_t ecache_tag = spf_flt->flt_ec_tag; 30440Sstevel@tonic-gate char linestr[30]; 30450Sstevel@tonic-gate char *state_str; 30460Sstevel@tonic-gate int i; 30470Sstevel@tonic-gate 30480Sstevel@tonic-gate /* 30490Sstevel@tonic-gate * Check the ecache tag to make sure it 30500Sstevel@tonic-gate * is valid. If invalid, a memory dump was 30510Sstevel@tonic-gate * captured instead of a ecache dump. 30520Sstevel@tonic-gate */ 30530Sstevel@tonic-gate if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 30540Sstevel@tonic-gate uchar_t eparity = (uchar_t) 30550Sstevel@tonic-gate ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 30560Sstevel@tonic-gate 30570Sstevel@tonic-gate uchar_t estate = (uchar_t) 30580Sstevel@tonic-gate ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 30590Sstevel@tonic-gate 30600Sstevel@tonic-gate if (estate == cpu_ec_state_shr) 30610Sstevel@tonic-gate state_str = "Shared"; 30620Sstevel@tonic-gate else if (estate == cpu_ec_state_exl) 30630Sstevel@tonic-gate state_str = "Exclusive"; 30640Sstevel@tonic-gate else if (estate == cpu_ec_state_own) 30650Sstevel@tonic-gate state_str = "Owner"; 30660Sstevel@tonic-gate else if (estate == cpu_ec_state_mod) 30670Sstevel@tonic-gate state_str = "Modified"; 30680Sstevel@tonic-gate else 30690Sstevel@tonic-gate state_str = "Invalid"; 30700Sstevel@tonic-gate 30710Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 1) { 30720Sstevel@tonic-gate (void) snprintf(linestr, sizeof (linestr), 30730Sstevel@tonic-gate "Badlines found=%d", spf_flt->flt_ec_lcnt); 30740Sstevel@tonic-gate } else { 30750Sstevel@tonic-gate linestr[0] = '\0'; 30760Sstevel@tonic-gate } 30770Sstevel@tonic-gate 30780Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 30790Sstevel@tonic-gate " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 30800Sstevel@tonic-gate "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 30810Sstevel@tonic-gate (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 30820Sstevel@tonic-gate (uint32_t)ecache_tag, state_str, 30830Sstevel@tonic-gate (uint32_t)eparity, linestr); 30840Sstevel@tonic-gate } else { 30850Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 30860Sstevel@tonic-gate " E$tag != PA from AFAR; E$line was victimized" 30870Sstevel@tonic-gate "\n dumping memory from PA 0x%08x.%08x instead", 30880Sstevel@tonic-gate (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 30890Sstevel@tonic-gate (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 30900Sstevel@tonic-gate } 30910Sstevel@tonic-gate 30920Sstevel@tonic-gate /* 30930Sstevel@tonic-gate * Dump out all 8 8-byte ecache data captured 30940Sstevel@tonic-gate * For each 8-byte data captured, we check the 30950Sstevel@tonic-gate * captured afsr's parity syndrome to find out 30960Sstevel@tonic-gate * which 8-byte chunk is bad. For memory dump, the 30970Sstevel@tonic-gate * AFSR values were initialized to 0. 30980Sstevel@tonic-gate */ 30990Sstevel@tonic-gate for (i = 0; i < 8; i++) { 31000Sstevel@tonic-gate ec_data_t *ecdptr; 31010Sstevel@tonic-gate uint_t offset; 31020Sstevel@tonic-gate ushort_t psynd; 31030Sstevel@tonic-gate ushort_t bad; 31040Sstevel@tonic-gate uint64_t edp; 31050Sstevel@tonic-gate 31060Sstevel@tonic-gate offset = i << 3; /* multiply by 8 */ 31070Sstevel@tonic-gate ecdptr = &spf_flt->flt_ec_data[i]; 31080Sstevel@tonic-gate psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 31090Sstevel@tonic-gate edp = ecdptr->ec_afsr & P_AFSR_EDP; 31100Sstevel@tonic-gate 31110Sstevel@tonic-gate /* 31120Sstevel@tonic-gate * For Sabre/Hummingbird, parity synd is captured only 31130Sstevel@tonic-gate * in [7:0] of AFSR.PSYND for each 8-byte chunk. 31140Sstevel@tonic-gate * For spitfire/blackbird, AFSR.PSYND is captured 31150Sstevel@tonic-gate * in 16-byte granularity. [15:8] represent 31160Sstevel@tonic-gate * the upper 8 byte and [7:0] the lower 8 byte. 31170Sstevel@tonic-gate */ 31180Sstevel@tonic-gate if (isus2i || isus2e || (i & 0x1)) 31190Sstevel@tonic-gate bad = (psynd & 0xFF); /* check bits [7:0] */ 31200Sstevel@tonic-gate else 31210Sstevel@tonic-gate bad = (psynd & 0xFF00); /* check bits [15:8] */ 31220Sstevel@tonic-gate 31230Sstevel@tonic-gate if (bad && edp) { 31240Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 31250Sstevel@tonic-gate " E$Data (0x%02x): 0x%08x.%08x " 31260Sstevel@tonic-gate "*Bad* PSYND=0x%04x", offset, 31270Sstevel@tonic-gate (uint32_t)(ecdptr->ec_d8 >> 32), 31280Sstevel@tonic-gate (uint32_t)ecdptr->ec_d8, psynd); 31290Sstevel@tonic-gate } else { 31300Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 31310Sstevel@tonic-gate " E$Data (0x%02x): 0x%08x.%08x", offset, 31320Sstevel@tonic-gate (uint32_t)(ecdptr->ec_d8 >> 32), 31330Sstevel@tonic-gate (uint32_t)ecdptr->ec_d8); 31340Sstevel@tonic-gate } 31350Sstevel@tonic-gate } 31360Sstevel@tonic-gate } 31370Sstevel@tonic-gate 31380Sstevel@tonic-gate /* 31390Sstevel@tonic-gate * Common logging function for all cpu async errors. This function allows the 31400Sstevel@tonic-gate * caller to generate a single cmn_err() call that logs the appropriate items 31410Sstevel@tonic-gate * from the fault structure, and implements our rules for AFT logging levels. 31420Sstevel@tonic-gate * 31430Sstevel@tonic-gate * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 31440Sstevel@tonic-gate * tagnum: 0, 1, 2, .. generate the [AFT#] tag 31450Sstevel@tonic-gate * spflt: pointer to spitfire async fault structure 31460Sstevel@tonic-gate * logflags: bitflags indicating what to output 31470Sstevel@tonic-gate * endstr: a end string to appear at the end of this log 31480Sstevel@tonic-gate * fmt: a format string to appear at the beginning of the log 31490Sstevel@tonic-gate * 31500Sstevel@tonic-gate * The logflags allows the construction of predetermined output from the spflt 31510Sstevel@tonic-gate * structure. The individual data items always appear in a consistent order. 31520Sstevel@tonic-gate * Note that either or both of the spflt structure pointer and logflags may be 31530Sstevel@tonic-gate * NULL or zero respectively, indicating that the predetermined output 31540Sstevel@tonic-gate * substrings are not requested in this log. The output looks like this: 31550Sstevel@tonic-gate * 31560Sstevel@tonic-gate * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 31570Sstevel@tonic-gate * <CPU_SPACE><CPU_ERRID> 31580Sstevel@tonic-gate * newline+4spaces<CPU_AFSR><CPU_AFAR> 31590Sstevel@tonic-gate * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 31600Sstevel@tonic-gate * newline+4spaces<CPU_UDBH><CPU_UDBL> 31610Sstevel@tonic-gate * newline+4spaces<CPU_SYND> 31620Sstevel@tonic-gate * newline+4spaces<endstr> 31630Sstevel@tonic-gate * 31640Sstevel@tonic-gate * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 31650Sstevel@tonic-gate * it is assumed that <endstr> will be the unum string in this case. The size 31660Sstevel@tonic-gate * of our intermediate formatting buf[] is based on the worst case of all flags 31670Sstevel@tonic-gate * being enabled. We pass the caller's varargs directly to vcmn_err() for 31680Sstevel@tonic-gate * formatting so we don't need additional stack space to format them here. 31690Sstevel@tonic-gate */ 31700Sstevel@tonic-gate /*PRINTFLIKE6*/ 31710Sstevel@tonic-gate static void 31720Sstevel@tonic-gate cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 31730Sstevel@tonic-gate const char *endstr, const char *fmt, ...) 31740Sstevel@tonic-gate { 31750Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spflt; 31760Sstevel@tonic-gate char buf[400], *p, *q; /* see comments about buf[] size above */ 31770Sstevel@tonic-gate va_list ap; 31780Sstevel@tonic-gate int console_log_flag; 31790Sstevel@tonic-gate 31800Sstevel@tonic-gate if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 31810Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_LEVEL1)) || 31820Sstevel@tonic-gate (aflt->flt_panic)) { 31830Sstevel@tonic-gate console_log_flag = (tagnum < 2) || aft_verbose; 31840Sstevel@tonic-gate } else { 31850Sstevel@tonic-gate int verbose = ((aflt->flt_class == BUS_FAULT) || 31860Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_CE)) ? 31870Sstevel@tonic-gate ce_verbose_memory : ce_verbose_other; 31880Sstevel@tonic-gate 31890Sstevel@tonic-gate if (!verbose) 31900Sstevel@tonic-gate return; 31910Sstevel@tonic-gate 31920Sstevel@tonic-gate console_log_flag = (verbose > 1); 31930Sstevel@tonic-gate } 31940Sstevel@tonic-gate 31950Sstevel@tonic-gate if (console_log_flag) 31960Sstevel@tonic-gate (void) sprintf(buf, "[AFT%d]", tagnum); 31970Sstevel@tonic-gate else 31980Sstevel@tonic-gate (void) sprintf(buf, "![AFT%d]", tagnum); 31990Sstevel@tonic-gate 32000Sstevel@tonic-gate p = buf + strlen(buf); /* current buffer position */ 32010Sstevel@tonic-gate q = buf + sizeof (buf); /* pointer past end of buffer */ 32020Sstevel@tonic-gate 32030Sstevel@tonic-gate if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 32040Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 32050Sstevel@tonic-gate (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 32060Sstevel@tonic-gate p += strlen(p); 32070Sstevel@tonic-gate } 32080Sstevel@tonic-gate 32090Sstevel@tonic-gate /* 32100Sstevel@tonic-gate * Copy the caller's format string verbatim into buf[]. It will be 32110Sstevel@tonic-gate * formatted by the call to vcmn_err() at the end of this function. 32120Sstevel@tonic-gate */ 32130Sstevel@tonic-gate if (fmt != NULL && p < q) { 32140Sstevel@tonic-gate (void) strncpy(p, fmt, (size_t)(q - p - 1)); 32150Sstevel@tonic-gate buf[sizeof (buf) - 1] = '\0'; 32160Sstevel@tonic-gate p += strlen(p); 32170Sstevel@tonic-gate } 32180Sstevel@tonic-gate 32190Sstevel@tonic-gate if (spflt != NULL) { 32200Sstevel@tonic-gate if (logflags & CPU_FLTCPU) { 32210Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " CPU%d", 32220Sstevel@tonic-gate aflt->flt_inst); 32230Sstevel@tonic-gate p += strlen(p); 32240Sstevel@tonic-gate } 32250Sstevel@tonic-gate 32260Sstevel@tonic-gate if (logflags & CPU_SPACE) { 32270Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 32280Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32290Sstevel@tonic-gate " Data access"); 32300Sstevel@tonic-gate else if (aflt->flt_status & ECC_I_TRAP) 32310Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32320Sstevel@tonic-gate " Instruction access"); 32330Sstevel@tonic-gate p += strlen(p); 32340Sstevel@tonic-gate } 32350Sstevel@tonic-gate 32360Sstevel@tonic-gate if (logflags & CPU_TL) { 32370Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " at TL%s", 32380Sstevel@tonic-gate aflt->flt_tl ? ">0" : "=0"); 32390Sstevel@tonic-gate p += strlen(p); 32400Sstevel@tonic-gate } 32410Sstevel@tonic-gate 32420Sstevel@tonic-gate if (logflags & CPU_ERRID) { 32430Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32440Sstevel@tonic-gate ", errID 0x%08x.%08x", 32450Sstevel@tonic-gate (uint32_t)(aflt->flt_id >> 32), 32460Sstevel@tonic-gate (uint32_t)aflt->flt_id); 32470Sstevel@tonic-gate p += strlen(p); 32480Sstevel@tonic-gate } 32490Sstevel@tonic-gate 32500Sstevel@tonic-gate if (logflags & CPU_AFSR) { 32510Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32520Sstevel@tonic-gate "\n AFSR 0x%08b.%08b", 32530Sstevel@tonic-gate (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 32540Sstevel@tonic-gate (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 32550Sstevel@tonic-gate p += strlen(p); 32560Sstevel@tonic-gate } 32570Sstevel@tonic-gate 32580Sstevel@tonic-gate if (logflags & CPU_AFAR) { 32590Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 32600Sstevel@tonic-gate (uint32_t)(aflt->flt_addr >> 32), 32610Sstevel@tonic-gate (uint32_t)aflt->flt_addr); 32620Sstevel@tonic-gate p += strlen(p); 32630Sstevel@tonic-gate } 32640Sstevel@tonic-gate 32650Sstevel@tonic-gate if (logflags & CPU_AF_PSYND) { 32660Sstevel@tonic-gate ushort_t psynd = (ushort_t) 32670Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_P_SYND); 32680Sstevel@tonic-gate 32690Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32700Sstevel@tonic-gate "\n AFSR.PSYND 0x%04x(Score %02d)", 32710Sstevel@tonic-gate psynd, ecc_psynd_score(psynd)); 32720Sstevel@tonic-gate p += strlen(p); 32730Sstevel@tonic-gate } 32740Sstevel@tonic-gate 32750Sstevel@tonic-gate if (logflags & CPU_AF_ETS) { 32760Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 32770Sstevel@tonic-gate (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 32780Sstevel@tonic-gate p += strlen(p); 32790Sstevel@tonic-gate } 32800Sstevel@tonic-gate 32810Sstevel@tonic-gate if (logflags & CPU_FAULTPC) { 32820Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 32830Sstevel@tonic-gate (void *)aflt->flt_pc); 32840Sstevel@tonic-gate p += strlen(p); 32850Sstevel@tonic-gate } 32860Sstevel@tonic-gate 32870Sstevel@tonic-gate if (logflags & CPU_UDBH) { 32880Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32890Sstevel@tonic-gate "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 32900Sstevel@tonic-gate spflt->flt_sdbh, UDB_FMTSTR, 32910Sstevel@tonic-gate spflt->flt_sdbh & 0xFF); 32920Sstevel@tonic-gate p += strlen(p); 32930Sstevel@tonic-gate } 32940Sstevel@tonic-gate 32950Sstevel@tonic-gate if (logflags & CPU_UDBL) { 32960Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 32970Sstevel@tonic-gate " UDBL 0x%04b UDBL.ESYND 0x%02x", 32980Sstevel@tonic-gate spflt->flt_sdbl, UDB_FMTSTR, 32990Sstevel@tonic-gate spflt->flt_sdbl & 0xFF); 33000Sstevel@tonic-gate p += strlen(p); 33010Sstevel@tonic-gate } 33020Sstevel@tonic-gate 33030Sstevel@tonic-gate if (logflags & CPU_SYND) { 33040Sstevel@tonic-gate ushort_t synd = SYND(aflt->flt_synd); 33050Sstevel@tonic-gate 33060Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 33070Sstevel@tonic-gate "\n %s Syndrome 0x%x Memory Module ", 33080Sstevel@tonic-gate UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 33090Sstevel@tonic-gate p += strlen(p); 33100Sstevel@tonic-gate } 33110Sstevel@tonic-gate } 33120Sstevel@tonic-gate 33130Sstevel@tonic-gate if (endstr != NULL) { 33140Sstevel@tonic-gate if (!(logflags & CPU_SYND)) 33150Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 33160Sstevel@tonic-gate else 33170Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), "%s", endstr); 33180Sstevel@tonic-gate p += strlen(p); 33190Sstevel@tonic-gate } 33200Sstevel@tonic-gate 33210Sstevel@tonic-gate if (ce_code == CE_CONT && (p < q - 1)) 33220Sstevel@tonic-gate (void) strcpy(p, "\n"); /* add final \n if needed */ 33230Sstevel@tonic-gate 33240Sstevel@tonic-gate va_start(ap, fmt); 33250Sstevel@tonic-gate vcmn_err(ce_code, buf, ap); 33260Sstevel@tonic-gate va_end(ap); 33270Sstevel@tonic-gate } 33280Sstevel@tonic-gate 33290Sstevel@tonic-gate /* 33300Sstevel@tonic-gate * Ecache Scrubbing 33310Sstevel@tonic-gate * 33320Sstevel@tonic-gate * The basic idea is to prevent lines from sitting in the ecache long enough 33330Sstevel@tonic-gate * to build up soft errors which can lead to ecache parity errors. 33340Sstevel@tonic-gate * 33350Sstevel@tonic-gate * The following rules are observed when flushing the ecache: 33360Sstevel@tonic-gate * 33370Sstevel@tonic-gate * 1. When the system is busy, flush bad clean lines 33380Sstevel@tonic-gate * 2. When the system is idle, flush all clean lines 33390Sstevel@tonic-gate * 3. When the system is idle, flush good dirty lines 33400Sstevel@tonic-gate * 4. Never flush bad dirty lines. 33410Sstevel@tonic-gate * 33420Sstevel@tonic-gate * modify parity busy idle 33430Sstevel@tonic-gate * ---------------------------- 33440Sstevel@tonic-gate * clean good X 33450Sstevel@tonic-gate * clean bad X X 33460Sstevel@tonic-gate * dirty good X 33470Sstevel@tonic-gate * dirty bad 33480Sstevel@tonic-gate * 33490Sstevel@tonic-gate * Bad or good refers to whether a line has an E$ parity error or not. 33500Sstevel@tonic-gate * Clean or dirty refers to the state of the modified bit. We currently 33510Sstevel@tonic-gate * default the scan rate to 100 (scan 10% of the cache per second). 33520Sstevel@tonic-gate * 33530Sstevel@tonic-gate * The following are E$ states and actions. 33540Sstevel@tonic-gate * 33550Sstevel@tonic-gate * We encode our state as a 3-bit number, consisting of: 33560Sstevel@tonic-gate * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 33570Sstevel@tonic-gate * ECACHE_STATE_PARITY (0=good, 1=bad) 33580Sstevel@tonic-gate * ECACHE_STATE_BUSY (0=idle, 1=busy) 33590Sstevel@tonic-gate * 33600Sstevel@tonic-gate * We associate a flushing and a logging action with each state. 33610Sstevel@tonic-gate * 33620Sstevel@tonic-gate * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 33630Sstevel@tonic-gate * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 33640Sstevel@tonic-gate * E$ only, in addition to value being set by ec_flush. 33650Sstevel@tonic-gate */ 33660Sstevel@tonic-gate 33670Sstevel@tonic-gate #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 33680Sstevel@tonic-gate #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 33690Sstevel@tonic-gate #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 33700Sstevel@tonic-gate 33710Sstevel@tonic-gate struct { 33720Sstevel@tonic-gate char ec_flush; /* whether to flush or not */ 33730Sstevel@tonic-gate char ec_log; /* ecache logging */ 33740Sstevel@tonic-gate char ec_log_type; /* log type info */ 33750Sstevel@tonic-gate } ec_action[] = { /* states of the E$ line in M P B */ 33760Sstevel@tonic-gate { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 33770Sstevel@tonic-gate { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 33780Sstevel@tonic-gate { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 33790Sstevel@tonic-gate { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 33800Sstevel@tonic-gate { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 33810Sstevel@tonic-gate { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 33820Sstevel@tonic-gate { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 33830Sstevel@tonic-gate { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 33840Sstevel@tonic-gate }; 33850Sstevel@tonic-gate 33860Sstevel@tonic-gate /* 33870Sstevel@tonic-gate * Offsets into the ec_action[] that determines clean_good_busy and 33880Sstevel@tonic-gate * dirty_good_busy lines. 33890Sstevel@tonic-gate */ 33900Sstevel@tonic-gate #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 33910Sstevel@tonic-gate #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 33920Sstevel@tonic-gate 33930Sstevel@tonic-gate /* 33940Sstevel@tonic-gate * We are flushing lines which are Clean_Good_Busy and also the lines 33950Sstevel@tonic-gate * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 33960Sstevel@tonic-gate */ 33970Sstevel@tonic-gate #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 33980Sstevel@tonic-gate #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 33990Sstevel@tonic-gate 34000Sstevel@tonic-gate #define ECACHE_STATE_MODIFIED 0x4 34010Sstevel@tonic-gate #define ECACHE_STATE_PARITY 0x2 34020Sstevel@tonic-gate #define ECACHE_STATE_BUSY 0x1 34030Sstevel@tonic-gate 34040Sstevel@tonic-gate /* 34050Sstevel@tonic-gate * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 34060Sstevel@tonic-gate */ 34070Sstevel@tonic-gate int ecache_calls_a_sec_mirrored = 1; 34080Sstevel@tonic-gate int ecache_lines_per_call_mirrored = 1; 34090Sstevel@tonic-gate 34100Sstevel@tonic-gate int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 34110Sstevel@tonic-gate int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 34120Sstevel@tonic-gate int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 34130Sstevel@tonic-gate int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 34140Sstevel@tonic-gate int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 34150Sstevel@tonic-gate int ecache_idle_factor = 1; /* increase the scan rate when idle */ 34160Sstevel@tonic-gate int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 34170Sstevel@tonic-gate int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 34180Sstevel@tonic-gate 34190Sstevel@tonic-gate volatile int ec_timeout_calls = 1; /* timeout calls */ 34200Sstevel@tonic-gate 34210Sstevel@tonic-gate /* 34220Sstevel@tonic-gate * Interrupt number and pil for ecache scrubber cross-trap calls. 34230Sstevel@tonic-gate */ 34240Sstevel@tonic-gate static uint_t ecache_scrub_inum; 34250Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9; 34260Sstevel@tonic-gate 34270Sstevel@tonic-gate /* 34280Sstevel@tonic-gate * Kstats for the E$ scrubber. 34290Sstevel@tonic-gate */ 34300Sstevel@tonic-gate typedef struct ecache_kstat { 34310Sstevel@tonic-gate kstat_named_t clean_good_idle; /* # of lines scrubbed */ 34320Sstevel@tonic-gate kstat_named_t clean_good_busy; /* # of lines skipped */ 34330Sstevel@tonic-gate kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 34340Sstevel@tonic-gate kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 34350Sstevel@tonic-gate kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 34360Sstevel@tonic-gate kstat_named_t dirty_good_busy; /* # of lines skipped */ 34370Sstevel@tonic-gate kstat_named_t dirty_bad_idle; /* # of lines skipped */ 34380Sstevel@tonic-gate kstat_named_t dirty_bad_busy; /* # of lines skipped */ 34390Sstevel@tonic-gate kstat_named_t invalid_lines; /* # of invalid lines */ 34400Sstevel@tonic-gate kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 34410Sstevel@tonic-gate kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 34420Sstevel@tonic-gate kstat_named_t tags_cleared; /* # of E$ tags cleared */ 34430Sstevel@tonic-gate } ecache_kstat_t; 34440Sstevel@tonic-gate 34450Sstevel@tonic-gate static ecache_kstat_t ec_kstat_template = { 34460Sstevel@tonic-gate { "clean_good_idle", KSTAT_DATA_ULONG }, 34470Sstevel@tonic-gate { "clean_good_busy", KSTAT_DATA_ULONG }, 34480Sstevel@tonic-gate { "clean_bad_idle", KSTAT_DATA_ULONG }, 34490Sstevel@tonic-gate { "clean_bad_busy", KSTAT_DATA_ULONG }, 34500Sstevel@tonic-gate { "dirty_good_idle", KSTAT_DATA_ULONG }, 34510Sstevel@tonic-gate { "dirty_good_busy", KSTAT_DATA_ULONG }, 34520Sstevel@tonic-gate { "dirty_bad_idle", KSTAT_DATA_ULONG }, 34530Sstevel@tonic-gate { "dirty_bad_busy", KSTAT_DATA_ULONG }, 34540Sstevel@tonic-gate { "invalid_lines", KSTAT_DATA_ULONG }, 34550Sstevel@tonic-gate { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 34560Sstevel@tonic-gate { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 34570Sstevel@tonic-gate { "ecache_tags_cleared", KSTAT_DATA_ULONG } 34580Sstevel@tonic-gate }; 34590Sstevel@tonic-gate 34600Sstevel@tonic-gate struct kmem_cache *sf_private_cache; 34610Sstevel@tonic-gate 34620Sstevel@tonic-gate /* 34630Sstevel@tonic-gate * Called periodically on each CPU to scan the ecache once a sec. 34640Sstevel@tonic-gate * adjusting the ecache line index appropriately 34650Sstevel@tonic-gate */ 34660Sstevel@tonic-gate void 34670Sstevel@tonic-gate scrub_ecache_line() 34680Sstevel@tonic-gate { 34690Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 34700Sstevel@tonic-gate int cpuid = CPU->cpu_id; 34710Sstevel@tonic-gate uint32_t index = ssmp->ecache_flush_index; 34720Sstevel@tonic-gate uint64_t ec_size = cpunodes[cpuid].ecache_size; 34730Sstevel@tonic-gate size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 34740Sstevel@tonic-gate int nlines = ssmp->ecache_nlines; 34750Sstevel@tonic-gate uint32_t ec_set_size = ec_size / ecache_associativity; 34760Sstevel@tonic-gate int ec_mirror = ssmp->ecache_mirror; 34770Sstevel@tonic-gate ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 34780Sstevel@tonic-gate 34790Sstevel@tonic-gate int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 34800Sstevel@tonic-gate int mpb; /* encode Modified, Parity, Busy for action */ 34810Sstevel@tonic-gate uchar_t state; 34820Sstevel@tonic-gate uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 34830Sstevel@tonic-gate uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 34840Sstevel@tonic-gate ec_data_t ec_data[8]; 34850Sstevel@tonic-gate kstat_named_t *ec_knp; 34860Sstevel@tonic-gate 34870Sstevel@tonic-gate switch (ec_mirror) { 34880Sstevel@tonic-gate default: 34890Sstevel@tonic-gate case ECACHE_CPU_NON_MIRROR: 34900Sstevel@tonic-gate /* 34910Sstevel@tonic-gate * The E$ scan rate is expressed in units of tenths of 34920Sstevel@tonic-gate * a percent. ecache_scan_rate = 1000 (100%) means the 34930Sstevel@tonic-gate * whole cache is scanned every second. 34940Sstevel@tonic-gate */ 34950Sstevel@tonic-gate scan_lines = (nlines * ecache_scan_rate) / 34960Sstevel@tonic-gate (1000 * ecache_calls_a_sec); 34970Sstevel@tonic-gate if (!(ssmp->ecache_busy)) { 34980Sstevel@tonic-gate if (ecache_idle_factor > 0) { 34990Sstevel@tonic-gate scan_lines *= ecache_idle_factor; 35000Sstevel@tonic-gate } 35010Sstevel@tonic-gate } else { 35020Sstevel@tonic-gate flush_clean_busy = (scan_lines * 35030Sstevel@tonic-gate ecache_flush_clean_good_busy) / 100; 35040Sstevel@tonic-gate flush_dirty_busy = (scan_lines * 35050Sstevel@tonic-gate ecache_flush_dirty_good_busy) / 100; 35060Sstevel@tonic-gate } 35070Sstevel@tonic-gate 35080Sstevel@tonic-gate ec_timeout_calls = (ecache_calls_a_sec ? 35090Sstevel@tonic-gate ecache_calls_a_sec : 1); 35100Sstevel@tonic-gate break; 35110Sstevel@tonic-gate 35120Sstevel@tonic-gate case ECACHE_CPU_MIRROR: 35130Sstevel@tonic-gate scan_lines = ecache_lines_per_call_mirrored; 35140Sstevel@tonic-gate ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 35150Sstevel@tonic-gate ecache_calls_a_sec_mirrored : 1); 35160Sstevel@tonic-gate break; 35170Sstevel@tonic-gate } 35180Sstevel@tonic-gate 35190Sstevel@tonic-gate /* 35200Sstevel@tonic-gate * The ecache scrubber algorithm operates by reading and 35210Sstevel@tonic-gate * decoding the E$ tag to determine whether the corresponding E$ line 35220Sstevel@tonic-gate * can be scrubbed. There is a implicit assumption in the scrubber 35230Sstevel@tonic-gate * logic that the E$ tag is valid. Unfortunately, this assertion is 35240Sstevel@tonic-gate * flawed since the E$ tag may also be corrupted and have parity errors 35250Sstevel@tonic-gate * The scrubber logic is enhanced to check the validity of the E$ tag 35260Sstevel@tonic-gate * before scrubbing. When a parity error is detected in the E$ tag, 35270Sstevel@tonic-gate * it is possible to recover and scrub the tag under certain conditions 35280Sstevel@tonic-gate * so that a ETP error condition can be avoided. 35290Sstevel@tonic-gate */ 35300Sstevel@tonic-gate 35310Sstevel@tonic-gate for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 35320Sstevel@tonic-gate /* 35330Sstevel@tonic-gate * We get the old-AFSR before clearing the AFSR sticky bits 35340Sstevel@tonic-gate * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 35350Sstevel@tonic-gate * If CP bit is set in the old-AFSR, we log an Orphan CP event. 35360Sstevel@tonic-gate */ 35370Sstevel@tonic-gate ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 35380Sstevel@tonic-gate state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 35390Sstevel@tonic-gate cpu_ec_state_shift); 35400Sstevel@tonic-gate 35410Sstevel@tonic-gate /* 35420Sstevel@tonic-gate * ETP is set try to scrub the ecache tag. 35430Sstevel@tonic-gate */ 35440Sstevel@tonic-gate if (nafsr & P_AFSR_ETP) { 35450Sstevel@tonic-gate ecache_scrub_tag_err(nafsr, state, index); 35460Sstevel@tonic-gate } else if (state & cpu_ec_state_valid) { 35470Sstevel@tonic-gate /* 35480Sstevel@tonic-gate * ETP is not set, E$ tag is valid. 35490Sstevel@tonic-gate * Proceed with the E$ scrubbing. 35500Sstevel@tonic-gate */ 35510Sstevel@tonic-gate if (state & cpu_ec_state_dirty) 35520Sstevel@tonic-gate mpb |= ECACHE_STATE_MODIFIED; 35530Sstevel@tonic-gate 35540Sstevel@tonic-gate tafsr = check_ecache_line(index, acc_afsr); 35550Sstevel@tonic-gate 35560Sstevel@tonic-gate if (tafsr & P_AFSR_EDP) { 35570Sstevel@tonic-gate mpb |= ECACHE_STATE_PARITY; 35580Sstevel@tonic-gate 35590Sstevel@tonic-gate if (ecache_scrub_verbose || 35600Sstevel@tonic-gate ecache_scrub_panic) { 35610Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(index, 64), 35620Sstevel@tonic-gate (uint64_t *)&ec_data[0], 35630Sstevel@tonic-gate &ec_tag, &oafsr, acc_afsr); 35640Sstevel@tonic-gate } 35650Sstevel@tonic-gate } 35660Sstevel@tonic-gate 35670Sstevel@tonic-gate if (ssmp->ecache_busy) 35680Sstevel@tonic-gate mpb |= ECACHE_STATE_BUSY; 35690Sstevel@tonic-gate 35700Sstevel@tonic-gate ec_knp = (kstat_named_t *)ec_ksp + mpb; 35710Sstevel@tonic-gate ec_knp->value.ul++; 35720Sstevel@tonic-gate 35730Sstevel@tonic-gate paddr = ((ec_tag & cpu_ec_tag_mask) << 35740Sstevel@tonic-gate cpu_ec_tag_shift) | (index % ec_set_size); 35750Sstevel@tonic-gate 35760Sstevel@tonic-gate /* 35770Sstevel@tonic-gate * We flush the E$ lines depending on the ec_flush, 35780Sstevel@tonic-gate * we additionally flush clean_good_busy and 35790Sstevel@tonic-gate * dirty_good_busy lines for mirrored E$. 35800Sstevel@tonic-gate */ 35810Sstevel@tonic-gate if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 35820Sstevel@tonic-gate flushecacheline(paddr, ec_size); 35830Sstevel@tonic-gate } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 35840Sstevel@tonic-gate (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 35850Sstevel@tonic-gate flushecacheline(paddr, ec_size); 35860Sstevel@tonic-gate } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 35870Sstevel@tonic-gate softcall(ecache_page_retire, (void *)paddr); 35880Sstevel@tonic-gate } 35890Sstevel@tonic-gate 35900Sstevel@tonic-gate /* 35910Sstevel@tonic-gate * Conditionally flush both the clean_good and 35920Sstevel@tonic-gate * dirty_good lines when busy. 35930Sstevel@tonic-gate */ 35940Sstevel@tonic-gate if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 35950Sstevel@tonic-gate flush_clean_busy--; 35960Sstevel@tonic-gate flushecacheline(paddr, ec_size); 35970Sstevel@tonic-gate ec_ksp->clean_good_busy_flush.value.ul++; 35980Sstevel@tonic-gate } else if (DGB(mpb, ec_mirror) && 35990Sstevel@tonic-gate (flush_dirty_busy > 0)) { 36000Sstevel@tonic-gate flush_dirty_busy--; 36010Sstevel@tonic-gate flushecacheline(paddr, ec_size); 36020Sstevel@tonic-gate ec_ksp->dirty_good_busy_flush.value.ul++; 36030Sstevel@tonic-gate } 36040Sstevel@tonic-gate 36050Sstevel@tonic-gate if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 36060Sstevel@tonic-gate ecache_scrub_panic)) { 36070Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 36080Sstevel@tonic-gate tafsr); 36090Sstevel@tonic-gate } 36100Sstevel@tonic-gate 36110Sstevel@tonic-gate } else { 36120Sstevel@tonic-gate ec_ksp->invalid_lines.value.ul++; 36130Sstevel@tonic-gate } 36140Sstevel@tonic-gate 36150Sstevel@tonic-gate if ((index += ec_linesize) >= ec_size) 36160Sstevel@tonic-gate index = 0; 36170Sstevel@tonic-gate 36180Sstevel@tonic-gate } 36190Sstevel@tonic-gate 36200Sstevel@tonic-gate /* 36210Sstevel@tonic-gate * set the ecache scrub index for the next time around 36220Sstevel@tonic-gate */ 36230Sstevel@tonic-gate ssmp->ecache_flush_index = index; 36240Sstevel@tonic-gate 36250Sstevel@tonic-gate if (*acc_afsr & P_AFSR_CP) { 36260Sstevel@tonic-gate uint64_t ret_afsr; 36270Sstevel@tonic-gate 36280Sstevel@tonic-gate ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 36290Sstevel@tonic-gate if ((ret_afsr & P_AFSR_CP) == 0) 36300Sstevel@tonic-gate *acc_afsr = 0; 36310Sstevel@tonic-gate } 36320Sstevel@tonic-gate } 36330Sstevel@tonic-gate 36340Sstevel@tonic-gate /* 36350Sstevel@tonic-gate * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 36360Sstevel@tonic-gate * we decrement the outstanding request count to zero. 36370Sstevel@tonic-gate */ 36380Sstevel@tonic-gate 36390Sstevel@tonic-gate /*ARGSUSED*/ 36400Sstevel@tonic-gate uint_t 36410Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 36420Sstevel@tonic-gate { 36430Sstevel@tonic-gate int i; 36440Sstevel@tonic-gate int outstanding; 36450Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 36460Sstevel@tonic-gate uint32_t *countp = &ssmp->ec_scrub_outstanding; 36470Sstevel@tonic-gate 36480Sstevel@tonic-gate do { 36490Sstevel@tonic-gate outstanding = *countp; 36500Sstevel@tonic-gate ASSERT(outstanding > 0); 36510Sstevel@tonic-gate for (i = 0; i < outstanding; i++) 36520Sstevel@tonic-gate scrub_ecache_line(); 36530Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding)); 36540Sstevel@tonic-gate 36550Sstevel@tonic-gate return (DDI_INTR_CLAIMED); 36560Sstevel@tonic-gate } 36570Sstevel@tonic-gate 36580Sstevel@tonic-gate /* 36590Sstevel@tonic-gate * force each cpu to perform an ecache scrub, called from a timeout 36600Sstevel@tonic-gate */ 36610Sstevel@tonic-gate extern xcfunc_t ecache_scrubreq_tl1; 36620Sstevel@tonic-gate 36630Sstevel@tonic-gate void 36640Sstevel@tonic-gate do_scrub_ecache_line(void) 36650Sstevel@tonic-gate { 36660Sstevel@tonic-gate long delta; 36670Sstevel@tonic-gate 36680Sstevel@tonic-gate if (ecache_calls_a_sec > hz) 36690Sstevel@tonic-gate ecache_calls_a_sec = hz; 36700Sstevel@tonic-gate else if (ecache_calls_a_sec <= 0) 36710Sstevel@tonic-gate ecache_calls_a_sec = 1; 36720Sstevel@tonic-gate 36730Sstevel@tonic-gate if (ecache_calls_a_sec_mirrored > hz) 36740Sstevel@tonic-gate ecache_calls_a_sec_mirrored = hz; 36750Sstevel@tonic-gate else if (ecache_calls_a_sec_mirrored <= 0) 36760Sstevel@tonic-gate ecache_calls_a_sec_mirrored = 1; 36770Sstevel@tonic-gate 36780Sstevel@tonic-gate if (ecache_scrub_enable) { 36790Sstevel@tonic-gate xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 36800Sstevel@tonic-gate delta = hz / ec_timeout_calls; 36810Sstevel@tonic-gate } else { 36820Sstevel@tonic-gate delta = hz; 36830Sstevel@tonic-gate } 36840Sstevel@tonic-gate 36850Sstevel@tonic-gate (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 36860Sstevel@tonic-gate delta); 36870Sstevel@tonic-gate } 36880Sstevel@tonic-gate 36890Sstevel@tonic-gate /* 36900Sstevel@tonic-gate * initialization for ecache scrubbing 36910Sstevel@tonic-gate * This routine is called AFTER all cpus have had cpu_init_private called 36920Sstevel@tonic-gate * to initialize their private data areas. 36930Sstevel@tonic-gate */ 36940Sstevel@tonic-gate void 36950Sstevel@tonic-gate cpu_init_cache_scrub(void) 36960Sstevel@tonic-gate { 36970Sstevel@tonic-gate if (ecache_calls_a_sec > hz) { 36980Sstevel@tonic-gate cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 36990Sstevel@tonic-gate "resetting to hz (%d)", ecache_calls_a_sec, hz); 37000Sstevel@tonic-gate ecache_calls_a_sec = hz; 37010Sstevel@tonic-gate } 37020Sstevel@tonic-gate 37030Sstevel@tonic-gate /* 37040Sstevel@tonic-gate * Register softint for ecache scrubbing. 37050Sstevel@tonic-gate */ 37060Sstevel@tonic-gate ecache_scrub_inum = add_softintr(ecache_scrub_pil, 37070Sstevel@tonic-gate scrub_ecache_line_intr, NULL); 37080Sstevel@tonic-gate 37090Sstevel@tonic-gate /* 37100Sstevel@tonic-gate * kick off the scrubbing using realtime timeout 37110Sstevel@tonic-gate */ 37120Sstevel@tonic-gate (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 37130Sstevel@tonic-gate hz / ecache_calls_a_sec); 37140Sstevel@tonic-gate } 37150Sstevel@tonic-gate 37160Sstevel@tonic-gate /* 37170Sstevel@tonic-gate * Unset the busy flag for this cpu. 37180Sstevel@tonic-gate */ 37190Sstevel@tonic-gate void 37200Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp) 37210Sstevel@tonic-gate { 37220Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 37230Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 37240Sstevel@tonic-gate sfpr_scrub_misc); 37250Sstevel@tonic-gate ssmp->ecache_busy = ECACHE_CPU_IDLE; 37260Sstevel@tonic-gate } 37270Sstevel@tonic-gate } 37280Sstevel@tonic-gate 37290Sstevel@tonic-gate /* 37300Sstevel@tonic-gate * Set the busy flag for this cpu. 37310Sstevel@tonic-gate */ 37320Sstevel@tonic-gate void 37330Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp) 37340Sstevel@tonic-gate { 37350Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 37360Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 37370Sstevel@tonic-gate sfpr_scrub_misc); 37380Sstevel@tonic-gate ssmp->ecache_busy = ECACHE_CPU_BUSY; 37390Sstevel@tonic-gate } 37400Sstevel@tonic-gate } 37410Sstevel@tonic-gate 37420Sstevel@tonic-gate /* 37430Sstevel@tonic-gate * initialize the ecache scrubber data structures 37440Sstevel@tonic-gate * The global entry point cpu_init_private replaces this entry point. 37450Sstevel@tonic-gate * 37460Sstevel@tonic-gate */ 37470Sstevel@tonic-gate static void 37480Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp) 37490Sstevel@tonic-gate { 37500Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 37510Sstevel@tonic-gate int cpuid = cp->cpu_id; 37520Sstevel@tonic-gate 37530Sstevel@tonic-gate /* 37540Sstevel@tonic-gate * intialize bookkeeping for cache scrubbing 37550Sstevel@tonic-gate */ 37560Sstevel@tonic-gate bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 37570Sstevel@tonic-gate 37580Sstevel@tonic-gate ssmp->ecache_flush_index = 0; 37590Sstevel@tonic-gate 37600Sstevel@tonic-gate ssmp->ecache_nlines = 37610Sstevel@tonic-gate cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 37620Sstevel@tonic-gate 37630Sstevel@tonic-gate /* 37640Sstevel@tonic-gate * Determine whether we are running on mirrored SRAM 37650Sstevel@tonic-gate */ 37660Sstevel@tonic-gate 37670Sstevel@tonic-gate if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 37680Sstevel@tonic-gate ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 37690Sstevel@tonic-gate else 37700Sstevel@tonic-gate ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 37710Sstevel@tonic-gate 37720Sstevel@tonic-gate cpu_busy_ecache_scrub(cp); 37730Sstevel@tonic-gate 37740Sstevel@tonic-gate /* 37750Sstevel@tonic-gate * initialize the kstats 37760Sstevel@tonic-gate */ 37770Sstevel@tonic-gate ecache_kstat_init(cp); 37780Sstevel@tonic-gate } 37790Sstevel@tonic-gate 37800Sstevel@tonic-gate /* 37810Sstevel@tonic-gate * uninitialize the ecache scrubber data structures 37820Sstevel@tonic-gate * The global entry point cpu_uninit_private replaces this entry point. 37830Sstevel@tonic-gate */ 37840Sstevel@tonic-gate static void 37850Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp) 37860Sstevel@tonic-gate { 37870Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 37880Sstevel@tonic-gate 37890Sstevel@tonic-gate if (ssmp->ecache_ksp != NULL) { 37900Sstevel@tonic-gate kstat_delete(ssmp->ecache_ksp); 37910Sstevel@tonic-gate ssmp->ecache_ksp = NULL; 37920Sstevel@tonic-gate } 37930Sstevel@tonic-gate 37940Sstevel@tonic-gate /* 37950Sstevel@tonic-gate * un-initialize bookkeeping for cache scrubbing 37960Sstevel@tonic-gate */ 37970Sstevel@tonic-gate bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 37980Sstevel@tonic-gate 37990Sstevel@tonic-gate cpu_idle_ecache_scrub(cp); 38000Sstevel@tonic-gate } 38010Sstevel@tonic-gate 38020Sstevel@tonic-gate struct kmem_cache *sf_private_cache; 38030Sstevel@tonic-gate 38040Sstevel@tonic-gate /* 38050Sstevel@tonic-gate * Cpu private initialization. This includes allocating the cpu_private 38060Sstevel@tonic-gate * data structure, initializing it, and initializing the scrubber for this 38070Sstevel@tonic-gate * cpu. This is called once for EVERY cpu, including CPU 0. This function 38080Sstevel@tonic-gate * calls cpu_init_ecache_scrub_dr to init the scrubber. 38090Sstevel@tonic-gate * We use kmem_cache_create for the spitfire private data structure because it 38100Sstevel@tonic-gate * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 38110Sstevel@tonic-gate */ 38120Sstevel@tonic-gate void 38130Sstevel@tonic-gate cpu_init_private(struct cpu *cp) 38140Sstevel@tonic-gate { 38150Sstevel@tonic-gate spitfire_private_t *sfprp; 38160Sstevel@tonic-gate 38170Sstevel@tonic-gate ASSERT(CPU_PRIVATE(cp) == NULL); 38180Sstevel@tonic-gate 38190Sstevel@tonic-gate /* 38200Sstevel@tonic-gate * If the sf_private_cache has not been created, create it. 38210Sstevel@tonic-gate */ 38220Sstevel@tonic-gate if (sf_private_cache == NULL) { 38230Sstevel@tonic-gate sf_private_cache = kmem_cache_create("sf_private_cache", 38240Sstevel@tonic-gate sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 38250Sstevel@tonic-gate NULL, NULL, NULL, NULL, 0); 38260Sstevel@tonic-gate ASSERT(sf_private_cache); 38270Sstevel@tonic-gate } 38280Sstevel@tonic-gate 38290Sstevel@tonic-gate sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 38300Sstevel@tonic-gate 38310Sstevel@tonic-gate bzero(sfprp, sizeof (spitfire_private_t)); 38320Sstevel@tonic-gate 38330Sstevel@tonic-gate cpu_init_ecache_scrub_dr(cp); 38340Sstevel@tonic-gate } 38350Sstevel@tonic-gate 38360Sstevel@tonic-gate /* 38370Sstevel@tonic-gate * Cpu private unitialization. Uninitialize the Ecache scrubber and 38380Sstevel@tonic-gate * deallocate the scrubber data structures and cpu_private data structure. 38390Sstevel@tonic-gate * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 38400Sstevel@tonic-gate * the scrubber for the specified cpu. 38410Sstevel@tonic-gate */ 38420Sstevel@tonic-gate void 38430Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp) 38440Sstevel@tonic-gate { 38450Sstevel@tonic-gate ASSERT(CPU_PRIVATE(cp)); 38460Sstevel@tonic-gate 38470Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(cp); 38480Sstevel@tonic-gate kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 38490Sstevel@tonic-gate CPU_PRIVATE(cp) = NULL; 38500Sstevel@tonic-gate } 38510Sstevel@tonic-gate 38520Sstevel@tonic-gate /* 38530Sstevel@tonic-gate * initialize the ecache kstats for each cpu 38540Sstevel@tonic-gate */ 38550Sstevel@tonic-gate static void 38560Sstevel@tonic-gate ecache_kstat_init(struct cpu *cp) 38570Sstevel@tonic-gate { 38580Sstevel@tonic-gate struct kstat *ksp; 38590Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 38600Sstevel@tonic-gate 38610Sstevel@tonic-gate ASSERT(ssmp != NULL); 38620Sstevel@tonic-gate 38630Sstevel@tonic-gate if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 38640Sstevel@tonic-gate KSTAT_TYPE_NAMED, 38650Sstevel@tonic-gate sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 38660Sstevel@tonic-gate KSTAT_FLAG_WRITABLE)) == NULL) { 38670Sstevel@tonic-gate ssmp->ecache_ksp = NULL; 38680Sstevel@tonic-gate cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 38690Sstevel@tonic-gate return; 38700Sstevel@tonic-gate } 38710Sstevel@tonic-gate 38720Sstevel@tonic-gate ssmp->ecache_ksp = ksp; 38730Sstevel@tonic-gate bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 38740Sstevel@tonic-gate kstat_install(ksp); 38750Sstevel@tonic-gate } 38760Sstevel@tonic-gate 38770Sstevel@tonic-gate /* 38780Sstevel@tonic-gate * log the bad ecache information 38790Sstevel@tonic-gate */ 38800Sstevel@tonic-gate static void 38810Sstevel@tonic-gate ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 38820Sstevel@tonic-gate uint64_t afsr) 38830Sstevel@tonic-gate { 38840Sstevel@tonic-gate spitf_async_flt spf_flt; 38850Sstevel@tonic-gate struct async_flt *aflt; 38860Sstevel@tonic-gate int i; 38870Sstevel@tonic-gate char *class; 38880Sstevel@tonic-gate 38890Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 38900Sstevel@tonic-gate aflt = &spf_flt.cmn_asyncflt; 38910Sstevel@tonic-gate 38920Sstevel@tonic-gate for (i = 0; i < 8; i++) { 38930Sstevel@tonic-gate spf_flt.flt_ec_data[i] = ec_data[i]; 38940Sstevel@tonic-gate } 38950Sstevel@tonic-gate 38960Sstevel@tonic-gate spf_flt.flt_ec_tag = ec_tag; 38970Sstevel@tonic-gate 38980Sstevel@tonic-gate if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 38990Sstevel@tonic-gate spf_flt.flt_type = ec_action[mpb].ec_log_type; 39000Sstevel@tonic-gate } else spf_flt.flt_type = (ushort_t)mpb; 39010Sstevel@tonic-gate 39020Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 39030Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 39040Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 39050Sstevel@tonic-gate aflt->flt_addr = paddr; 39060Sstevel@tonic-gate aflt->flt_stat = afsr; 39070Sstevel@tonic-gate aflt->flt_panic = (uchar_t)ecache_scrub_panic; 39080Sstevel@tonic-gate 39090Sstevel@tonic-gate switch (mpb) { 39100Sstevel@tonic-gate case CPU_ECACHE_TAG_ERR: 39110Sstevel@tonic-gate case CPU_ECACHE_ADDR_PAR_ERR: 39120Sstevel@tonic-gate case CPU_ECACHE_ETP_ETS_ERR: 39130Sstevel@tonic-gate case CPU_ECACHE_STATE_ERR: 39140Sstevel@tonic-gate class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 39150Sstevel@tonic-gate break; 39160Sstevel@tonic-gate default: 39170Sstevel@tonic-gate class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 39180Sstevel@tonic-gate break; 39190Sstevel@tonic-gate } 39200Sstevel@tonic-gate 39210Sstevel@tonic-gate cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 39220Sstevel@tonic-gate ue_queue, aflt->flt_panic); 39230Sstevel@tonic-gate 39240Sstevel@tonic-gate if (aflt->flt_panic) 39250Sstevel@tonic-gate cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 39260Sstevel@tonic-gate "line detected"); 39270Sstevel@tonic-gate } 39280Sstevel@tonic-gate 39290Sstevel@tonic-gate /* 39300Sstevel@tonic-gate * Process an ecache error that occured during the E$ scrubbing. 39310Sstevel@tonic-gate * We do the ecache scan to find the bad line, flush the bad line 39320Sstevel@tonic-gate * and start the memscrubber to find any UE (in memory or in another cache) 39330Sstevel@tonic-gate */ 39340Sstevel@tonic-gate static uint64_t 39350Sstevel@tonic-gate ecache_scrub_misc_err(int type, uint64_t afsr) 39360Sstevel@tonic-gate { 39370Sstevel@tonic-gate spitf_async_flt spf_flt; 39380Sstevel@tonic-gate struct async_flt *aflt; 39390Sstevel@tonic-gate uint64_t oafsr; 39400Sstevel@tonic-gate 39410Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 39420Sstevel@tonic-gate aflt = &spf_flt.cmn_asyncflt; 39430Sstevel@tonic-gate 39440Sstevel@tonic-gate /* 39450Sstevel@tonic-gate * Scan each line in the cache to look for the one 39460Sstevel@tonic-gate * with bad parity 39470Sstevel@tonic-gate */ 39480Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 39490Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 39500Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 39510Sstevel@tonic-gate 39520Sstevel@tonic-gate if (oafsr & P_AFSR_CP) { 39530Sstevel@tonic-gate uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 39540Sstevel@tonic-gate *cp_afsr |= oafsr; 39550Sstevel@tonic-gate } 39560Sstevel@tonic-gate 39570Sstevel@tonic-gate /* 39580Sstevel@tonic-gate * If we found a bad PA, update the state to indicate if it is 39590Sstevel@tonic-gate * memory or I/O space. 39600Sstevel@tonic-gate */ 39610Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 39620Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 39630Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 39640Sstevel@tonic-gate } 39650Sstevel@tonic-gate 39660Sstevel@tonic-gate spf_flt.flt_type = (ushort_t)type; 39670Sstevel@tonic-gate 39680Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 39690Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 39700Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 39710Sstevel@tonic-gate aflt->flt_status = afsr; 39720Sstevel@tonic-gate aflt->flt_panic = (uchar_t)ecache_scrub_panic; 39730Sstevel@tonic-gate 39740Sstevel@tonic-gate /* 39750Sstevel@tonic-gate * We have the bad line, flush that line and start 39760Sstevel@tonic-gate * the memscrubber. 39770Sstevel@tonic-gate */ 39780Sstevel@tonic-gate if (spf_flt.flt_ec_lcnt > 0) { 39790Sstevel@tonic-gate flushecacheline(P2ALIGN(aflt->flt_addr, 64), 39800Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 39810Sstevel@tonic-gate read_all_memscrub = 1; 39820Sstevel@tonic-gate memscrub_run(); 39830Sstevel@tonic-gate } 39840Sstevel@tonic-gate 39850Sstevel@tonic-gate cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 39860Sstevel@tonic-gate FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 39870Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 39880Sstevel@tonic-gate 39890Sstevel@tonic-gate return (oafsr); 39900Sstevel@tonic-gate } 39910Sstevel@tonic-gate 39920Sstevel@tonic-gate static void 39930Sstevel@tonic-gate ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 39940Sstevel@tonic-gate { 39950Sstevel@tonic-gate ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 39960Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 39970Sstevel@tonic-gate ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 39980Sstevel@tonic-gate uint64_t ec_tag, paddr, oafsr; 39990Sstevel@tonic-gate ec_data_t ec_data[8]; 40000Sstevel@tonic-gate int cpuid = CPU->cpu_id; 40010Sstevel@tonic-gate uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 40020Sstevel@tonic-gate ecache_associativity; 40030Sstevel@tonic-gate uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 40040Sstevel@tonic-gate 40050Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 40060Sstevel@tonic-gate &oafsr, cpu_afsr); 40070Sstevel@tonic-gate paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 40080Sstevel@tonic-gate (index % ec_set_size); 40090Sstevel@tonic-gate 40100Sstevel@tonic-gate /* 40110Sstevel@tonic-gate * E$ tag state has good parity 40120Sstevel@tonic-gate */ 40130Sstevel@tonic-gate if ((afsr_ets & cpu_ec_state_parity) == 0) { 40140Sstevel@tonic-gate if (afsr_ets & cpu_ec_parity) { 40150Sstevel@tonic-gate /* 40160Sstevel@tonic-gate * E$ tag state bits indicate the line is clean, 40170Sstevel@tonic-gate * invalidate the E$ tag and continue. 40180Sstevel@tonic-gate */ 40190Sstevel@tonic-gate if (!(state & cpu_ec_state_dirty)) { 40200Sstevel@tonic-gate /* 40210Sstevel@tonic-gate * Zero the tag and mark the state invalid 40220Sstevel@tonic-gate * with good parity for the tag. 40230Sstevel@tonic-gate */ 40240Sstevel@tonic-gate if (isus2i || isus2e) 40250Sstevel@tonic-gate write_hb_ec_tag_parity(index); 40260Sstevel@tonic-gate else 40270Sstevel@tonic-gate write_ec_tag_parity(index); 40280Sstevel@tonic-gate 40290Sstevel@tonic-gate /* Sync with the dual tag */ 40300Sstevel@tonic-gate flushecacheline(0, 40310Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 40320Sstevel@tonic-gate ec_ksp->tags_cleared.value.ul++; 40330Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 40340Sstevel@tonic-gate CPU_ECACHE_TAG_ERR, afsr); 40350Sstevel@tonic-gate return; 40360Sstevel@tonic-gate } else { 40370Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 40380Sstevel@tonic-gate CPU_ECACHE_ADDR_PAR_ERR, afsr); 40390Sstevel@tonic-gate cmn_err(CE_PANIC, " E$ tag address has bad" 40400Sstevel@tonic-gate " parity"); 40410Sstevel@tonic-gate } 40420Sstevel@tonic-gate } else if ((afsr_ets & cpu_ec_parity) == 0) { 40430Sstevel@tonic-gate /* 40440Sstevel@tonic-gate * ETS is zero but ETP is set 40450Sstevel@tonic-gate */ 40460Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 40470Sstevel@tonic-gate CPU_ECACHE_ETP_ETS_ERR, afsr); 40480Sstevel@tonic-gate cmn_err(CE_PANIC, "AFSR.ETP is set and" 40490Sstevel@tonic-gate " AFSR.ETS is zero"); 40500Sstevel@tonic-gate } 40510Sstevel@tonic-gate } else { 40520Sstevel@tonic-gate /* 40530Sstevel@tonic-gate * E$ tag state bit has a bad parity 40540Sstevel@tonic-gate */ 40550Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 40560Sstevel@tonic-gate CPU_ECACHE_STATE_ERR, afsr); 40570Sstevel@tonic-gate cmn_err(CE_PANIC, "E$ tag state has bad parity"); 40580Sstevel@tonic-gate } 40590Sstevel@tonic-gate } 40600Sstevel@tonic-gate 40610Sstevel@tonic-gate static void 40620Sstevel@tonic-gate ecache_page_retire(void *arg) 40630Sstevel@tonic-gate { 40640Sstevel@tonic-gate uint64_t paddr = (uint64_t)arg; 4065*917Selowe (void) page_retire(paddr, PR_UE); 40660Sstevel@tonic-gate } 40670Sstevel@tonic-gate 40680Sstevel@tonic-gate void 40690Sstevel@tonic-gate sticksync_slave(void) 40700Sstevel@tonic-gate {} 40710Sstevel@tonic-gate 40720Sstevel@tonic-gate void 40730Sstevel@tonic-gate sticksync_master(void) 40740Sstevel@tonic-gate {} 40750Sstevel@tonic-gate 40760Sstevel@tonic-gate /*ARGSUSED*/ 40770Sstevel@tonic-gate void 40780Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 40790Sstevel@tonic-gate {} 40800Sstevel@tonic-gate 40810Sstevel@tonic-gate void 40820Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 40830Sstevel@tonic-gate { 40840Sstevel@tonic-gate int status; 40850Sstevel@tonic-gate ddi_fm_error_t de; 40860Sstevel@tonic-gate 40870Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t)); 40880Sstevel@tonic-gate 40890Sstevel@tonic-gate de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 40900Sstevel@tonic-gate FM_ENA_FMT1); 40910Sstevel@tonic-gate de.fme_flag = expected; 40920Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr; 40930Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 40940Sstevel@tonic-gate 40950Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 40960Sstevel@tonic-gate aflt->flt_panic = 1; 40970Sstevel@tonic-gate } 40980Sstevel@tonic-gate 40990Sstevel@tonic-gate /*ARGSUSED*/ 41000Sstevel@tonic-gate void 41010Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 41020Sstevel@tonic-gate errorq_t *eqp, uint_t flag) 41030Sstevel@tonic-gate { 41040Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)payload; 41050Sstevel@tonic-gate 41060Sstevel@tonic-gate aflt->flt_erpt_class = error_class; 41070Sstevel@tonic-gate errorq_dispatch(eqp, payload, payload_sz, flag); 41080Sstevel@tonic-gate } 41090Sstevel@tonic-gate 41100Sstevel@tonic-gate #define MAX_SIMM 8 41110Sstevel@tonic-gate 41120Sstevel@tonic-gate struct ce_info { 41130Sstevel@tonic-gate char name[UNUM_NAMLEN]; 41140Sstevel@tonic-gate uint64_t intermittent_total; 41150Sstevel@tonic-gate uint64_t persistent_total; 41160Sstevel@tonic-gate uint64_t sticky_total; 41170Sstevel@tonic-gate unsigned short leaky_bucket_cnt; 41180Sstevel@tonic-gate }; 41190Sstevel@tonic-gate 41200Sstevel@tonic-gate /* 41210Sstevel@tonic-gate * Separately-defined structure for use in reporting the ce_info 41220Sstevel@tonic-gate * to SunVTS without exposing the internal layout and implementation 41230Sstevel@tonic-gate * of struct ce_info. 41240Sstevel@tonic-gate */ 41250Sstevel@tonic-gate static struct ecc_error_info ecc_error_info_data = { 41260Sstevel@tonic-gate { "version", KSTAT_DATA_UINT32 }, 41270Sstevel@tonic-gate { "maxcount", KSTAT_DATA_UINT32 }, 41280Sstevel@tonic-gate { "count", KSTAT_DATA_UINT32 } 41290Sstevel@tonic-gate }; 41300Sstevel@tonic-gate static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 41310Sstevel@tonic-gate sizeof (struct kstat_named); 41320Sstevel@tonic-gate 41330Sstevel@tonic-gate #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 41340Sstevel@tonic-gate #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 41350Sstevel@tonic-gate #endif 41360Sstevel@tonic-gate 41370Sstevel@tonic-gate struct ce_info *mem_ce_simm = NULL; 41380Sstevel@tonic-gate size_t mem_ce_simm_size = 0; 41390Sstevel@tonic-gate 41400Sstevel@tonic-gate /* 41410Sstevel@tonic-gate * Default values for the number of CE's allowed per interval. 41420Sstevel@tonic-gate * Interval is defined in minutes 41430Sstevel@tonic-gate * SOFTERR_MIN_TIMEOUT is defined in microseconds 41440Sstevel@tonic-gate */ 41450Sstevel@tonic-gate #define SOFTERR_LIMIT_DEFAULT 2 41460Sstevel@tonic-gate #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 41470Sstevel@tonic-gate #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 41480Sstevel@tonic-gate #define TIMEOUT_NONE ((timeout_id_t)0) 41490Sstevel@tonic-gate #define TIMEOUT_SET ((timeout_id_t)1) 41500Sstevel@tonic-gate 41510Sstevel@tonic-gate /* 41520Sstevel@tonic-gate * timeout identifer for leaky_bucket 41530Sstevel@tonic-gate */ 41540Sstevel@tonic-gate static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 41550Sstevel@tonic-gate 41560Sstevel@tonic-gate /* 41570Sstevel@tonic-gate * Tunables for maximum number of allowed CE's in a given time 41580Sstevel@tonic-gate */ 41590Sstevel@tonic-gate int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 41600Sstevel@tonic-gate int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 41610Sstevel@tonic-gate 41620Sstevel@tonic-gate void 41630Sstevel@tonic-gate cpu_mp_init(void) 41640Sstevel@tonic-gate { 41650Sstevel@tonic-gate size_t size = cpu_aflt_size(); 41660Sstevel@tonic-gate size_t i; 41670Sstevel@tonic-gate kstat_t *ksp; 41680Sstevel@tonic-gate 41690Sstevel@tonic-gate /* 41700Sstevel@tonic-gate * Initialize the CE error handling buffers. 41710Sstevel@tonic-gate */ 41720Sstevel@tonic-gate mem_ce_simm_size = MAX_SIMM * max_ncpus; 41730Sstevel@tonic-gate size = sizeof (struct ce_info) * mem_ce_simm_size; 41740Sstevel@tonic-gate mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 41750Sstevel@tonic-gate 41760Sstevel@tonic-gate ksp = kstat_create("unix", 0, "ecc-info", "misc", 41770Sstevel@tonic-gate KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 41780Sstevel@tonic-gate if (ksp != NULL) { 41790Sstevel@tonic-gate ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 41800Sstevel@tonic-gate ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 41810Sstevel@tonic-gate ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 41820Sstevel@tonic-gate ecc_error_info_data.count.value.ui32 = 0; 41830Sstevel@tonic-gate kstat_install(ksp); 41840Sstevel@tonic-gate } 41850Sstevel@tonic-gate 41860Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 41870Sstevel@tonic-gate struct kstat_ecc_mm_info *kceip; 41880Sstevel@tonic-gate 41890Sstevel@tonic-gate kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 41900Sstevel@tonic-gate KM_SLEEP); 41910Sstevel@tonic-gate ksp = kstat_create("mm", i, "ecc-info", "misc", 41920Sstevel@tonic-gate KSTAT_TYPE_NAMED, 41930Sstevel@tonic-gate sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 41940Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL); 41950Sstevel@tonic-gate if (ksp != NULL) { 41960Sstevel@tonic-gate /* 41970Sstevel@tonic-gate * Re-declare ks_data_size to include room for the 41980Sstevel@tonic-gate * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 41990Sstevel@tonic-gate * set. 42000Sstevel@tonic-gate */ 42010Sstevel@tonic-gate ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 42020Sstevel@tonic-gate KSTAT_CE_UNUM_NAMLEN; 42030Sstevel@tonic-gate ksp->ks_data = kceip; 42040Sstevel@tonic-gate kstat_named_init(&kceip->name, 42050Sstevel@tonic-gate "name", KSTAT_DATA_STRING); 42060Sstevel@tonic-gate kstat_named_init(&kceip->intermittent_total, 42070Sstevel@tonic-gate "intermittent_total", KSTAT_DATA_UINT64); 42080Sstevel@tonic-gate kstat_named_init(&kceip->persistent_total, 42090Sstevel@tonic-gate "persistent_total", KSTAT_DATA_UINT64); 42100Sstevel@tonic-gate kstat_named_init(&kceip->sticky_total, 42110Sstevel@tonic-gate "sticky_total", KSTAT_DATA_UINT64); 42120Sstevel@tonic-gate /* 42130Sstevel@tonic-gate * Use the default snapshot routine as it knows how to 42140Sstevel@tonic-gate * deal with named kstats with long strings. 42150Sstevel@tonic-gate */ 42160Sstevel@tonic-gate ksp->ks_update = ecc_kstat_update; 42170Sstevel@tonic-gate kstat_install(ksp); 42180Sstevel@tonic-gate } else { 42190Sstevel@tonic-gate kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 42200Sstevel@tonic-gate } 42210Sstevel@tonic-gate } 42220Sstevel@tonic-gate } 42230Sstevel@tonic-gate 42240Sstevel@tonic-gate /*ARGSUSED*/ 42250Sstevel@tonic-gate static void 42260Sstevel@tonic-gate leaky_bucket_timeout(void *arg) 42270Sstevel@tonic-gate { 42280Sstevel@tonic-gate int i; 42290Sstevel@tonic-gate struct ce_info *psimm = mem_ce_simm; 42300Sstevel@tonic-gate 42310Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 42320Sstevel@tonic-gate if (psimm[i].leaky_bucket_cnt > 0) 42330Sstevel@tonic-gate atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 42340Sstevel@tonic-gate } 42350Sstevel@tonic-gate add_leaky_bucket_timeout(); 42360Sstevel@tonic-gate } 42370Sstevel@tonic-gate 42380Sstevel@tonic-gate static void 42390Sstevel@tonic-gate add_leaky_bucket_timeout(void) 42400Sstevel@tonic-gate { 42410Sstevel@tonic-gate long timeout_in_microsecs; 42420Sstevel@tonic-gate 42430Sstevel@tonic-gate /* 42440Sstevel@tonic-gate * create timeout for next leak. 42450Sstevel@tonic-gate * 42460Sstevel@tonic-gate * The timeout interval is calculated as follows 42470Sstevel@tonic-gate * 42480Sstevel@tonic-gate * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 42490Sstevel@tonic-gate * 42500Sstevel@tonic-gate * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 42510Sstevel@tonic-gate * in a minute), then multiply this by MICROSEC to get the interval 42520Sstevel@tonic-gate * in microseconds. Divide this total by ecc_softerr_limit so that 42530Sstevel@tonic-gate * the timeout interval is accurate to within a few microseconds. 42540Sstevel@tonic-gate */ 42550Sstevel@tonic-gate 42560Sstevel@tonic-gate if (ecc_softerr_limit <= 0) 42570Sstevel@tonic-gate ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 42580Sstevel@tonic-gate if (ecc_softerr_interval <= 0) 42590Sstevel@tonic-gate ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 42600Sstevel@tonic-gate 42610Sstevel@tonic-gate timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 42620Sstevel@tonic-gate ecc_softerr_limit; 42630Sstevel@tonic-gate 42640Sstevel@tonic-gate if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 42650Sstevel@tonic-gate timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 42660Sstevel@tonic-gate 42670Sstevel@tonic-gate leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 42680Sstevel@tonic-gate (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 42690Sstevel@tonic-gate } 42700Sstevel@tonic-gate 42710Sstevel@tonic-gate /* 42720Sstevel@tonic-gate * Legacy Correctable ECC Error Hash 42730Sstevel@tonic-gate * 42740Sstevel@tonic-gate * All of the code below this comment is used to implement a legacy array 42750Sstevel@tonic-gate * which counted intermittent, persistent, and sticky CE errors by unum, 42760Sstevel@tonic-gate * and then was later extended to publish the data as a kstat for SunVTS. 42770Sstevel@tonic-gate * All of this code is replaced by FMA, and remains here until such time 42780Sstevel@tonic-gate * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 42790Sstevel@tonic-gate * 42800Sstevel@tonic-gate * Errors are saved in three buckets per-unum: 42810Sstevel@tonic-gate * (1) sticky - scrub was unsuccessful, cannot be scrubbed 42820Sstevel@tonic-gate * This could represent a problem, and is immediately printed out. 42830Sstevel@tonic-gate * (2) persistent - was successfully scrubbed 42840Sstevel@tonic-gate * These errors use the leaky bucket algorithm to determine 42850Sstevel@tonic-gate * if there is a serious problem. 42860Sstevel@tonic-gate * (3) intermittent - may have originated from the cpu or upa/safari bus, 42870Sstevel@tonic-gate * and does not necessarily indicate any problem with the dimm itself, 42880Sstevel@tonic-gate * is critical information for debugging new hardware. 42890Sstevel@tonic-gate * Because we do not know if it came from the dimm, it would be 42900Sstevel@tonic-gate * inappropriate to include these in the leaky bucket counts. 42910Sstevel@tonic-gate * 42920Sstevel@tonic-gate * If the E$ line was modified before the scrub operation began, then the 42930Sstevel@tonic-gate * displacement flush at the beginning of scrubphys() will cause the modified 42940Sstevel@tonic-gate * line to be written out, which will clean up the CE. Then, any subsequent 42950Sstevel@tonic-gate * read will not cause an error, which will cause persistent errors to be 42960Sstevel@tonic-gate * identified as intermittent. 42970Sstevel@tonic-gate * 42980Sstevel@tonic-gate * If a DIMM is going bad, it will produce true persistents as well as 42990Sstevel@tonic-gate * false intermittents, so these intermittents can be safely ignored. 43000Sstevel@tonic-gate * 43010Sstevel@tonic-gate * If the error count is excessive for a DIMM, this function will return 4302*917Selowe * PR_MCE, and the CPU module may then decide to remove that page from use. 43030Sstevel@tonic-gate */ 43040Sstevel@tonic-gate static int 43050Sstevel@tonic-gate ce_count_unum(int status, int len, char *unum) 43060Sstevel@tonic-gate { 43070Sstevel@tonic-gate int i; 43080Sstevel@tonic-gate struct ce_info *psimm = mem_ce_simm; 4309*917Selowe int page_status = PR_OK; 43100Sstevel@tonic-gate 43110Sstevel@tonic-gate ASSERT(psimm != NULL); 43120Sstevel@tonic-gate 43130Sstevel@tonic-gate if (len <= 0 || 43140Sstevel@tonic-gate (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 43150Sstevel@tonic-gate return (page_status); 43160Sstevel@tonic-gate 43170Sstevel@tonic-gate /* 43180Sstevel@tonic-gate * Initialize the leaky_bucket timeout 43190Sstevel@tonic-gate */ 43200Sstevel@tonic-gate if (casptr(&leaky_bucket_timeout_id, 43210Sstevel@tonic-gate TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 43220Sstevel@tonic-gate add_leaky_bucket_timeout(); 43230Sstevel@tonic-gate 43240Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 43250Sstevel@tonic-gate if (psimm[i].name[0] == '\0') { 43260Sstevel@tonic-gate /* 43270Sstevel@tonic-gate * Hit the end of the valid entries, add 43280Sstevel@tonic-gate * a new one. 43290Sstevel@tonic-gate */ 43300Sstevel@tonic-gate (void) strncpy(psimm[i].name, unum, len); 43310Sstevel@tonic-gate if (status & ECC_STICKY) { 43320Sstevel@tonic-gate /* 43330Sstevel@tonic-gate * Sticky - the leaky bucket is used to track 43340Sstevel@tonic-gate * soft errors. Since a sticky error is a 43350Sstevel@tonic-gate * hard error and likely to be retired soon, 43360Sstevel@tonic-gate * we do not count it in the leaky bucket. 43370Sstevel@tonic-gate */ 43380Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 0; 43390Sstevel@tonic-gate psimm[i].intermittent_total = 0; 43400Sstevel@tonic-gate psimm[i].persistent_total = 0; 43410Sstevel@tonic-gate psimm[i].sticky_total = 1; 43420Sstevel@tonic-gate cmn_err(CE_WARN, 43430Sstevel@tonic-gate "[AFT0] Sticky Softerror encountered " 43440Sstevel@tonic-gate "on Memory Module %s\n", unum); 4345*917Selowe page_status = PR_MCE; 43460Sstevel@tonic-gate } else if (status & ECC_PERSISTENT) { 43470Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 1; 43480Sstevel@tonic-gate psimm[i].intermittent_total = 0; 43490Sstevel@tonic-gate psimm[i].persistent_total = 1; 43500Sstevel@tonic-gate psimm[i].sticky_total = 0; 43510Sstevel@tonic-gate } else { 43520Sstevel@tonic-gate /* 43530Sstevel@tonic-gate * Intermittent - Because the scrub operation 43540Sstevel@tonic-gate * cannot find the error in the DIMM, we will 43550Sstevel@tonic-gate * not count these in the leaky bucket 43560Sstevel@tonic-gate */ 43570Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 0; 43580Sstevel@tonic-gate psimm[i].intermittent_total = 1; 43590Sstevel@tonic-gate psimm[i].persistent_total = 0; 43600Sstevel@tonic-gate psimm[i].sticky_total = 0; 43610Sstevel@tonic-gate } 43620Sstevel@tonic-gate ecc_error_info_data.count.value.ui32++; 43630Sstevel@tonic-gate break; 43640Sstevel@tonic-gate } else if (strncmp(unum, psimm[i].name, len) == 0) { 43650Sstevel@tonic-gate /* 43660Sstevel@tonic-gate * Found an existing entry for the current 43670Sstevel@tonic-gate * memory module, adjust the counts. 43680Sstevel@tonic-gate */ 43690Sstevel@tonic-gate if (status & ECC_STICKY) { 43700Sstevel@tonic-gate psimm[i].sticky_total++; 43710Sstevel@tonic-gate cmn_err(CE_WARN, 43720Sstevel@tonic-gate "[AFT0] Sticky Softerror encountered " 43730Sstevel@tonic-gate "on Memory Module %s\n", unum); 4374*917Selowe page_status = PR_MCE; 43750Sstevel@tonic-gate } else if (status & ECC_PERSISTENT) { 43760Sstevel@tonic-gate int new_value; 43770Sstevel@tonic-gate 43780Sstevel@tonic-gate new_value = atomic_add_16_nv( 43790Sstevel@tonic-gate &psimm[i].leaky_bucket_cnt, 1); 43800Sstevel@tonic-gate psimm[i].persistent_total++; 43810Sstevel@tonic-gate if (new_value > ecc_softerr_limit) { 43820Sstevel@tonic-gate cmn_err(CE_WARN, "[AFT0] Most recent %d" 43830Sstevel@tonic-gate " soft errors from Memory Module" 43840Sstevel@tonic-gate " %s exceed threshold (N=%d," 43850Sstevel@tonic-gate " T=%dh:%02dm) triggering page" 43860Sstevel@tonic-gate " retire", new_value, unum, 43870Sstevel@tonic-gate ecc_softerr_limit, 43880Sstevel@tonic-gate ecc_softerr_interval / 60, 43890Sstevel@tonic-gate ecc_softerr_interval % 60); 43900Sstevel@tonic-gate atomic_add_16( 43910Sstevel@tonic-gate &psimm[i].leaky_bucket_cnt, -1); 4392*917Selowe page_status = PR_MCE; 43930Sstevel@tonic-gate } 43940Sstevel@tonic-gate } else { /* Intermittent */ 43950Sstevel@tonic-gate psimm[i].intermittent_total++; 43960Sstevel@tonic-gate } 43970Sstevel@tonic-gate break; 43980Sstevel@tonic-gate } 43990Sstevel@tonic-gate } 44000Sstevel@tonic-gate 44010Sstevel@tonic-gate if (i >= mem_ce_simm_size) 44020Sstevel@tonic-gate cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 44030Sstevel@tonic-gate "space.\n"); 44040Sstevel@tonic-gate 44050Sstevel@tonic-gate return (page_status); 44060Sstevel@tonic-gate } 44070Sstevel@tonic-gate 44080Sstevel@tonic-gate /* 44090Sstevel@tonic-gate * Function to support counting of IO detected CEs. 44100Sstevel@tonic-gate */ 44110Sstevel@tonic-gate void 44120Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 44130Sstevel@tonic-gate { 4414*917Selowe int err; 4415*917Selowe 4416*917Selowe err = ce_count_unum(ecc->flt_status, len, unum); 4417*917Selowe if (err != PR_OK && automatic_page_removal) { 4418*917Selowe (void) page_retire(ecc->flt_addr, err); 44190Sstevel@tonic-gate } 44200Sstevel@tonic-gate } 44210Sstevel@tonic-gate 44220Sstevel@tonic-gate static int 44230Sstevel@tonic-gate ecc_kstat_update(kstat_t *ksp, int rw) 44240Sstevel@tonic-gate { 44250Sstevel@tonic-gate struct kstat_ecc_mm_info *kceip = ksp->ks_data; 44260Sstevel@tonic-gate struct ce_info *ceip = mem_ce_simm; 44270Sstevel@tonic-gate int i = ksp->ks_instance; 44280Sstevel@tonic-gate 44290Sstevel@tonic-gate if (rw == KSTAT_WRITE) 44300Sstevel@tonic-gate return (EACCES); 44310Sstevel@tonic-gate 44320Sstevel@tonic-gate ASSERT(ksp->ks_data != NULL); 44330Sstevel@tonic-gate ASSERT(i < mem_ce_simm_size && i >= 0); 44340Sstevel@tonic-gate 44350Sstevel@tonic-gate /* 44360Sstevel@tonic-gate * Since we're not using locks, make sure that we don't get partial 44370Sstevel@tonic-gate * data. The name is always copied before the counters are incremented 44380Sstevel@tonic-gate * so only do this update routine if at least one of the counters is 44390Sstevel@tonic-gate * non-zero, which ensures that ce_count_unum() is done, and the 44400Sstevel@tonic-gate * string is fully copied. 44410Sstevel@tonic-gate */ 44420Sstevel@tonic-gate if (ceip[i].intermittent_total == 0 && 44430Sstevel@tonic-gate ceip[i].persistent_total == 0 && 44440Sstevel@tonic-gate ceip[i].sticky_total == 0) { 44450Sstevel@tonic-gate /* 44460Sstevel@tonic-gate * Uninitialized or partially initialized. Ignore. 44470Sstevel@tonic-gate * The ks_data buffer was allocated via kmem_zalloc, 44480Sstevel@tonic-gate * so no need to bzero it. 44490Sstevel@tonic-gate */ 44500Sstevel@tonic-gate return (0); 44510Sstevel@tonic-gate } 44520Sstevel@tonic-gate 44530Sstevel@tonic-gate kstat_named_setstr(&kceip->name, ceip[i].name); 44540Sstevel@tonic-gate kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 44550Sstevel@tonic-gate kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 44560Sstevel@tonic-gate kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 44570Sstevel@tonic-gate 44580Sstevel@tonic-gate return (0); 44590Sstevel@tonic-gate } 44600Sstevel@tonic-gate 44610Sstevel@tonic-gate #define VIS_BLOCKSIZE 64 44620Sstevel@tonic-gate 44630Sstevel@tonic-gate int 44640Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 44650Sstevel@tonic-gate { 44660Sstevel@tonic-gate int ret, watched; 44670Sstevel@tonic-gate 44680Sstevel@tonic-gate watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 44690Sstevel@tonic-gate ret = dtrace_blksuword32(addr, data, 0); 44700Sstevel@tonic-gate if (watched) 44710Sstevel@tonic-gate watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 44720Sstevel@tonic-gate 44730Sstevel@tonic-gate return (ret); 44740Sstevel@tonic-gate } 44750Sstevel@tonic-gate 44760Sstevel@tonic-gate /*ARGSUSED*/ 44770Sstevel@tonic-gate void 44780Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp) 44790Sstevel@tonic-gate { 44800Sstevel@tonic-gate } 44810Sstevel@tonic-gate 44820Sstevel@tonic-gate /*ARGSUSED*/ 44830Sstevel@tonic-gate void 44840Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp) 44850Sstevel@tonic-gate { 44860Sstevel@tonic-gate } 44870Sstevel@tonic-gate 44880Sstevel@tonic-gate static int mmu_disable_ism_large_pages = ((1 << TTE512K) | 44890Sstevel@tonic-gate (1 << TTE32M) | (1 << TTE256M)); 44900Sstevel@tonic-gate static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); 44910Sstevel@tonic-gate 44920Sstevel@tonic-gate /* 44930Sstevel@tonic-gate * The function returns the US_II mmu-specific values for the 44940Sstevel@tonic-gate * hat's disable_large_pages and disable_ism_large_pages variables. 44950Sstevel@tonic-gate */ 44960Sstevel@tonic-gate int 44970Sstevel@tonic-gate mmu_large_pages_disabled(uint_t flag) 44980Sstevel@tonic-gate { 44990Sstevel@tonic-gate int pages_disable = 0; 45000Sstevel@tonic-gate 45010Sstevel@tonic-gate if (flag == HAT_LOAD) { 45020Sstevel@tonic-gate pages_disable = mmu_disable_large_pages; 45030Sstevel@tonic-gate } else if (flag == HAT_LOAD_SHARE) { 45040Sstevel@tonic-gate pages_disable = mmu_disable_ism_large_pages; 45050Sstevel@tonic-gate } 45060Sstevel@tonic-gate return (pages_disable); 45070Sstevel@tonic-gate } 45080Sstevel@tonic-gate 45090Sstevel@tonic-gate /*ARGSUSED*/ 45100Sstevel@tonic-gate void 45110Sstevel@tonic-gate mmu_init_kernel_pgsz(struct hat *hat) 45120Sstevel@tonic-gate { 45130Sstevel@tonic-gate } 45140Sstevel@tonic-gate 45150Sstevel@tonic-gate size_t 45160Sstevel@tonic-gate mmu_get_kernel_lpsize(size_t lpsize) 45170Sstevel@tonic-gate { 45180Sstevel@tonic-gate uint_t tte; 45190Sstevel@tonic-gate 45200Sstevel@tonic-gate if (lpsize == 0) { 45210Sstevel@tonic-gate /* no setting for segkmem_lpsize in /etc/system: use default */ 45220Sstevel@tonic-gate return (MMU_PAGESIZE4M); 45230Sstevel@tonic-gate } 45240Sstevel@tonic-gate 45250Sstevel@tonic-gate for (tte = TTE8K; tte <= TTE4M; tte++) { 45260Sstevel@tonic-gate if (lpsize == TTEBYTES(tte)) 45270Sstevel@tonic-gate return (lpsize); 45280Sstevel@tonic-gate } 45290Sstevel@tonic-gate 45300Sstevel@tonic-gate return (TTEBYTES(TTE8K)); 45310Sstevel@tonic-gate } 4532