xref: /onnv-gate/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c (revision 12467:1f2119e1bc03)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51493Sgavinm  * Common Development and Distribution License (the "License").
61493Sgavinm  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211193Smws 
220Sstevel@tonic-gate /*
23*12467STrang.Do@Sun.COM  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <cma.h>
270Sstevel@tonic-gate 
287532SSean.Ye@Sun.COM #include <unistd.h>
297532SSean.Ye@Sun.COM #include <fcntl.h>
300Sstevel@tonic-gate #include <strings.h>
310Sstevel@tonic-gate #include <errno.h>
320Sstevel@tonic-gate #include <time.h>
330Sstevel@tonic-gate #include <fm/fmd_api.h>
340Sstevel@tonic-gate #include <sys/fm/protocol.h>
355084Sjohnlev #include <sys/systeminfo.h>
365084Sjohnlev #include <sys/utsname.h>
370Sstevel@tonic-gate 
386111Scy152378 #ifdef sun4v
396111Scy152378 #include <sys/fm/ldom.h>
406111Scy152378 
416111Scy152378 static fmd_hdl_t *init_hdl;
426111Scy152378 ldom_hdl_t *cma_lhp;
436111Scy152378 #endif
446111Scy152378 
457532SSean.Ye@Sun.COM #ifdef i386
467532SSean.Ye@Sun.COM boolean_t cma_is_native;
477532SSean.Ye@Sun.COM #endif
487532SSean.Ye@Sun.COM 
496111Scy152378 extern const char *fmd_fmri_get_platform();
506111Scy152378 
510Sstevel@tonic-gate cma_t cma;
520Sstevel@tonic-gate 
530Sstevel@tonic-gate cma_stats_t cma_stats = {
540Sstevel@tonic-gate 	{ "cpu_flts", FMD_TYPE_UINT64, "cpu faults resolved" },
556111Scy152378 	{ "cpu_repairs", FMD_TYPE_UINT64, "cpu faults repaired" },
560Sstevel@tonic-gate 	{ "cpu_fails", FMD_TYPE_UINT64, "cpu faults unresolveable" },
570Sstevel@tonic-gate 	{ "cpu_blfails", FMD_TYPE_UINT64, "failed cpu blacklists" },
580Sstevel@tonic-gate 	{ "cpu_supp", FMD_TYPE_UINT64, "cpu offlines suppressed" },
590Sstevel@tonic-gate 	{ "cpu_blsupp", FMD_TYPE_UINT64, "cpu blacklists suppressed" },
600Sstevel@tonic-gate 	{ "page_flts", FMD_TYPE_UINT64, "page faults resolved" },
616111Scy152378 	{ "page_repairs", FMD_TYPE_UINT64, "page faults repaired" },
620Sstevel@tonic-gate 	{ "page_fails", FMD_TYPE_UINT64, "page faults unresolveable" },
630Sstevel@tonic-gate 	{ "page_supp", FMD_TYPE_UINT64, "page retires suppressed" },
640Sstevel@tonic-gate 	{ "page_nonent", FMD_TYPE_UINT64, "retires for non-existent fmris" },
650Sstevel@tonic-gate 	{ "bad_flts", FMD_TYPE_UINT64, "invalid fault events received" },
660Sstevel@tonic-gate 	{ "nop_flts", FMD_TYPE_UINT64, "inapplicable fault events received" },
670Sstevel@tonic-gate 	{ "auto_flts", FMD_TYPE_UINT64, "auto-close faults received" }
680Sstevel@tonic-gate };
690Sstevel@tonic-gate 
700Sstevel@tonic-gate typedef struct cma_subscriber {
710Sstevel@tonic-gate 	const char *subr_class;
720Sstevel@tonic-gate 	const char *subr_sname;
730Sstevel@tonic-gate 	uint_t subr_svers;
746111Scy152378 	int (*subr_func)(fmd_hdl_t *, nvlist_t *, nvlist_t *, const char *,
756111Scy152378 	    boolean_t);
760Sstevel@tonic-gate } cma_subscriber_t;
770Sstevel@tonic-gate 
780Sstevel@tonic-gate static const cma_subscriber_t cma_subrs[] = {
797532SSean.Ye@Sun.COM #if defined(i386)
807532SSean.Ye@Sun.COM 	/*
817532SSean.Ye@Sun.COM 	 * On x86, the ASRUs are expected to be in hc scheme.  When
827532SSean.Ye@Sun.COM 	 * cpumem-retire wants to retire a cpu or mem page, it calls the
837532SSean.Ye@Sun.COM 	 * methods registered in the topo node to do that.  The topo
847532SSean.Ye@Sun.COM 	 * enumerator, which necessarily knows all the config info that
857532SSean.Ye@Sun.COM 	 * we'd ever need in deciding what/how to retire etc.  This takes
867532SSean.Ye@Sun.COM 	 * away much of that complexity from the agent into the entity
877532SSean.Ye@Sun.COM 	 * that knows all config/topo information.
887532SSean.Ye@Sun.COM 	 */
897532SSean.Ye@Sun.COM 	{ "fault.memory.page", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
907532SSean.Ye@Sun.COM 	    cma_page_retire },
917532SSean.Ye@Sun.COM 	{ "fault.memory.page_sb", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
927532SSean.Ye@Sun.COM 	    cma_page_retire },
937532SSean.Ye@Sun.COM 	{ "fault.memory.page_ck", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
947532SSean.Ye@Sun.COM 	    cma_page_retire },
957532SSean.Ye@Sun.COM 	{ "fault.memory.page_ue", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
967532SSean.Ye@Sun.COM 	    cma_page_retire },
977532SSean.Ye@Sun.COM 	{ "fault.memory.generic-x86.page_ce", FM_FMRI_SCHEME_HC,
987532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, cma_page_retire },
997532SSean.Ye@Sun.COM 	{ "fault.memory.generic-x86.page_ue", FM_FMRI_SCHEME_HC,
1007532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, cma_page_retire },
1017532SSean.Ye@Sun.COM 	{ "fault.memory.intel.page_ce", FM_FMRI_SCHEME_HC,
1027532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, cma_page_retire },
1037532SSean.Ye@Sun.COM 	{ "fault.memory.intel.page_ue", FM_FMRI_SCHEME_HC,
1047532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, cma_page_retire },
1057532SSean.Ye@Sun.COM 	{ "fault.memory.dimm", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1067532SSean.Ye@Sun.COM 	    NULL },
1077532SSean.Ye@Sun.COM 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1087532SSean.Ye@Sun.COM 	    NULL },
1097532SSean.Ye@Sun.COM 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1107532SSean.Ye@Sun.COM 	    NULL },
1117532SSean.Ye@Sun.COM 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1127532SSean.Ye@Sun.COM 	    NULL },
1137532SSean.Ye@Sun.COM 	{ "fault.memory.generic-x86.dimm_ce", FM_FMRI_SCHEME_HC,
1147532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1157532SSean.Ye@Sun.COM 	{ "fault.memory.generic-x86.dimm_ue", FM_FMRI_SCHEME_HC,
1167532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1177532SSean.Ye@Sun.COM 	{ "fault.memory.intel.dimm_ce", FM_FMRI_SCHEME_HC,
1187532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1197532SSean.Ye@Sun.COM 	{ "fault.memory.intel.dimm_ue", FM_FMRI_SCHEME_HC,
1207532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1217532SSean.Ye@Sun.COM 	{ "fault.memory.intel.fbd.*", FM_FMRI_SCHEME_HC,
1227532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1237532SSean.Ye@Sun.COM 	{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_HC,
1247532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1257532SSean.Ye@Sun.COM 	{ "fault.memory.bank", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1267532SSean.Ye@Sun.COM 	    NULL },
1277532SSean.Ye@Sun.COM 	{ "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1287532SSean.Ye@Sun.COM 	    NULL },
12910831SYanmin.Sun@Sun.COM 	{ "fault.cpu.intel.quickpath.mem_scrubbing", FM_FMRI_SCHEME_HC,
13010831SYanmin.Sun@Sun.COM 	    FM_HC_SCHEME_VERSION, cma_page_retire },
1317532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.quickpath.*", FM_FMRI_SCHEME_HC,
1327532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1337532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.mc", FM_FMRI_SCHEME_HC,
1347532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1357532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_HC,
1367532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1377532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_CPU,
1387532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1397532SSean.Ye@Sun.COM 
1407532SSean.Ye@Sun.COM 	/*
1417532SSean.Ye@Sun.COM 	 * The ASRU for cpu faults are in cpu scheme on native and in hc
1427532SSean.Ye@Sun.COM 	 * scheme on xpv.  So each cpu fault class needs to be listed twice.
1437532SSean.Ye@Sun.COM 	 */
1447532SSean.Ye@Sun.COM 
1457532SSean.Ye@Sun.COM 	/*
1467532SSean.Ye@Sun.COM 	 * The following faults do NOT retire a cpu thread,
1477532SSean.Ye@Sun.COM 	 * and therefore must be intercepted before
1488221SSean.Ye@Sun.COM 	 * the default "fault.cpu.*" dispatch to cma_cpu_hc_retire.
1497532SSean.Ye@Sun.COM 	 */
1507532SSean.Ye@Sun.COM 	{ "fault.cpu.amd.dramchannel", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1517532SSean.Ye@Sun.COM 	    NULL },
1527532SSean.Ye@Sun.COM 	{ "fault.cpu.amd.dramchannel", FM_FMRI_SCHEME_CPU,
1537532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1547532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_HC,
1557532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1567532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
1577532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1587532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_HC,
1597532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1607532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
1617532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1627532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_HC,
1637532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1647532SSean.Ye@Sun.COM 	{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_CPU,
1657532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1667532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_HC,
1677532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1687532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
1697532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1707532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_HC,
1717532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1727532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
1737532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1747532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_HC,
1757532SSean.Ye@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1767532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_CPU,
1777532SSean.Ye@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
1787532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.nb.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1797532SSean.Ye@Sun.COM 	    NULL },
1807532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.nb.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
1817532SSean.Ye@Sun.COM 	    NULL },
1827532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1837532SSean.Ye@Sun.COM 	    NULL },
1847532SSean.Ye@Sun.COM 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
1857532SSean.Ye@Sun.COM 	    NULL },
1867532SSean.Ye@Sun.COM 	{ "fault.cpu.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
1877532SSean.Ye@Sun.COM 	    cma_cpu_hc_retire },
1887532SSean.Ye@Sun.COM 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
1897532SSean.Ye@Sun.COM 	    cma_cpu_hc_retire },
1907532SSean.Ye@Sun.COM #elif defined(sun4v)
1918346SScott.Davenport@Sun.COM 	/*
1928346SScott.Davenport@Sun.COM 	 * The following are PI sun4v faults
1938346SScott.Davenport@Sun.COM 	 */
1948346SScott.Davenport@Sun.COM 	{ "fault.memory.memlink", FM_FMRI_SCHEME_HC,
1958346SScott.Davenport@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1968346SScott.Davenport@Sun.COM 	{ "fault.memory.memlink-uc", FM_FMRI_SCHEME_HC,
1978346SScott.Davenport@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
1988346SScott.Davenport@Sun.COM 	{ "fault.memory.memlink-failover", FM_FMRI_SCHEME_HC,
1998346SScott.Davenport@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
2008346SScott.Davenport@Sun.COM 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_HC,
2018346SScott.Davenport@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
2028346SScott.Davenport@Sun.COM 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_HC,
2038346SScott.Davenport@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
2048346SScott.Davenport@Sun.COM 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_HC,
2058346SScott.Davenport@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
2066111Scy152378 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2076111Scy152378 	    cma_page_retire },
2086111Scy152378 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2096111Scy152378 	    NULL },
2106111Scy152378 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2116111Scy152378 	    NULL },
2126111Scy152378 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2136111Scy152378 	    NULL },
2146111Scy152378 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2156111Scy152378 	    NULL },
2168297STom.Pothier@Sun.COM 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
2178297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
2188297STom.Pothier@Sun.COM 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
2198297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
2208297STom.Pothier@Sun.COM 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
2218297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
2226111Scy152378 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2236111Scy152378 	    NULL },
2246111Scy152378 	{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2256111Scy152378 	    NULL },
226*12467STrang.Do@Sun.COM 	{ "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
227*12467STrang.Do@Sun.COM 	    NULL },
2286111Scy152378 	{ "fault.memory.link-c", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2296111Scy152378 	    NULL },
2306111Scy152378 	{ "fault.memory.link-u", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2316111Scy152378 	    NULL },
2326111Scy152378 	{ "fault.memory.link-f", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2336111Scy152378 	    NULL },
2347374SJakub.Jermar@Sun.COM 	{ "fault.memory.link-c", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
2357374SJakub.Jermar@Sun.COM 	    NULL },
2367374SJakub.Jermar@Sun.COM 	{ "fault.memory.link-u", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
2377374SJakub.Jermar@Sun.COM 	    NULL },
2387374SJakub.Jermar@Sun.COM 	{ "fault.memory.link-f", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
2397374SJakub.Jermar@Sun.COM 	    NULL },
2406111Scy152378 
2416111Scy152378 	/*
2426111Scy152378 	 * The following ultraSPARC-T1/T2 faults do NOT retire a cpu thread,
2436111Scy152378 	 * and therefore must be intercepted before
2448221SSean.Ye@Sun.COM 	 * the default "fault.cpu.*" dispatch to cma_cpu_hc_retire.
2456111Scy152378 	 */
2466111Scy152378 	{ "fault.cpu.*.l2cachedata", FM_FMRI_SCHEME_CPU,
2476111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2486111Scy152378 	{ "fault.cpu.*.l2cachetag", FM_FMRI_SCHEME_CPU,
2496111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2506111Scy152378 	{ "fault.cpu.*.l2cachectl", FM_FMRI_SCHEME_CPU,
2516111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2526111Scy152378 	{ "fault.cpu.*.l2data-c", FM_FMRI_SCHEME_CPU,
2536111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2546111Scy152378 	{ "fault.cpu.*.l2data-u", FM_FMRI_SCHEME_CPU,
2556111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2566111Scy152378 	{ "fault.cpu.*.mau", FM_FMRI_SCHEME_CPU,
2576111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2586111Scy152378 	{ "fault.cpu.*.lfu-u", FM_FMRI_SCHEME_CPU,
2596111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2606111Scy152378 	{ "fault.cpu.*.lfu-f", FM_FMRI_SCHEME_CPU,
2616111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2626111Scy152378 	{ "fault.cpu.*.lfu-p", FM_FMRI_SCHEME_CPU,
2636111Scy152378 	    FM_CPU_SCHEME_VERSION, NULL },
2647374SJakub.Jermar@Sun.COM 	{ "fault.cpu.ultraSPARC-T1.freg", FM_FMRI_SCHEME_CPU,
2657374SJakub.Jermar@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
2667374SJakub.Jermar@Sun.COM 	{ "fault.cpu.ultraSPARC-T1.l2cachedata", FM_FMRI_SCHEME_CPU,
2677374SJakub.Jermar@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
2687374SJakub.Jermar@Sun.COM 	{ "fault.cpu.ultraSPARC-T1.l2cachetag", FM_FMRI_SCHEME_CPU,
2697374SJakub.Jermar@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
2707374SJakub.Jermar@Sun.COM 	{ "fault.cpu.ultraSPARC-T1.l2cachectl", FM_FMRI_SCHEME_CPU,
2717374SJakub.Jermar@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
2727374SJakub.Jermar@Sun.COM 	{ "fault.cpu.ultraSPARC-T1.mau", FM_FMRI_SCHEME_CPU,
2737374SJakub.Jermar@Sun.COM 	    FM_CPU_SCHEME_VERSION, NULL },
2747374SJakub.Jermar@Sun.COM 	{ "fault.cpu.ultraSPARC-T2plus.chip", FM_FMRI_SCHEME_HC,
2757374SJakub.Jermar@Sun.COM 	    FM_HC_SCHEME_VERSION, NULL },
2768221SSean.Ye@Sun.COM 	{ "fault.cpu.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
2778221SSean.Ye@Sun.COM 	    cma_cpu_hc_retire },
2786111Scy152378 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
2798221SSean.Ye@Sun.COM 	    cma_cpu_hc_retire },
2806111Scy152378 #elif defined(opl)
2816111Scy152378 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2826111Scy152378 	    cma_page_retire },
2836111Scy152378 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2846111Scy152378 	    NULL },
2858297STom.Pothier@Sun.COM 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
2868297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
2878297STom.Pothier@Sun.COM 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
2888297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
2898297STom.Pothier@Sun.COM 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
2908297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
2916111Scy152378 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
2926111Scy152378 	    NULL },
2936111Scy152378 	{ "fault.cpu.SPARC64-VI.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
2948221SSean.Ye@Sun.COM 	    cma_cpu_cpu_retire },
2956111Scy152378 	{ "fault.cpu.SPARC64-VII.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
2968221SSean.Ye@Sun.COM 	    cma_cpu_cpu_retire },
2976111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se",
2986111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
2996111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se-offlinereq",
3006111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3016111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce",
3026111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3036111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce-offlinereq",
3046111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3056111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se",
3066111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3076111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se-offlinereq",
3086111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3096111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce",
3106111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3116111Scy152378 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce-offlinereq",
3126111Scy152378 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
3137532SSean.Ye@Sun.COM #else
3147532SSean.Ye@Sun.COM 	/*
3157532SSean.Ye@Sun.COM 	 * For platforms excluding i386, sun4v and opl.
3167532SSean.Ye@Sun.COM 	 */
3170Sstevel@tonic-gate 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3180Sstevel@tonic-gate 	    cma_page_retire },
3192869Sgavinm 	{ "fault.memory.page_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3202869Sgavinm 	    cma_page_retire },
3212869Sgavinm 	{ "fault.memory.page_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3222869Sgavinm 	    cma_page_retire },
3232869Sgavinm 	{ "fault.memory.page_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3242869Sgavinm 	    cma_page_retire },
3250Sstevel@tonic-gate 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3260Sstevel@tonic-gate 	    NULL },
3271493Sgavinm 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3281414Scindi 	    NULL },
3291493Sgavinm 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3301414Scindi 	    NULL },
3311493Sgavinm 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3321414Scindi 	    NULL },
3338297STom.Pothier@Sun.COM 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
3348297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
3358297STom.Pothier@Sun.COM 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
3368297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
3378297STom.Pothier@Sun.COM 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
3388297STom.Pothier@Sun.COM 	    FM_MEM_SCHEME_VERSION, NULL },
3392869Sgavinm 	{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_MEM,
3402869Sgavinm 	    FM_MEM_SCHEME_VERSION, NULL },
3410Sstevel@tonic-gate 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3420Sstevel@tonic-gate 	    NULL },
3430Sstevel@tonic-gate 	{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
3440Sstevel@tonic-gate 	    NULL },
345*12467STrang.Do@Sun.COM 	{ "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
346*12467STrang.Do@Sun.COM 	    NULL },
347*12467STrang.Do@Sun.COM 	{ "fault.memory.datapath", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
348*12467STrang.Do@Sun.COM 	    NULL },
349962Stsien 
350962Stsien 	/*
3512869Sgavinm 	 * The following faults do NOT retire a cpu thread,
352962Stsien 	 * and therefore must be intercepted before
3538221SSean.Ye@Sun.COM 	 * the default "fault.cpu.*" dispatch to cma_cpu_cpu_retire.
354962Stsien 	 */
3556330Sjc25722 	{ "fault.cpu.ultraSPARC-IVplus.l2cachedata-line",
3566330Sjc25722 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
3576330Sjc25722 	    cma_cache_way_retire },
3586330Sjc25722 	{ "fault.cpu.ultraSPARC-IVplus.l3cachedata-line",
3596330Sjc25722 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
3606330Sjc25722 	    cma_cache_way_retire },
3616330Sjc25722 	{ "fault.cpu.ultraSPARC-IVplus.l2cachetag-line",
3626330Sjc25722 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
3636330Sjc25722 	    cma_cache_way_retire },
3646330Sjc25722 	{ "fault.cpu.ultraSPARC-IVplus.l3cachetag-line",
3656330Sjc25722 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
3666330Sjc25722 	    cma_cache_way_retire },
3677374SJakub.Jermar@Sun.COM 
3685254Sgavinm 	/*
3697532SSean.Ye@Sun.COM 	 * Default "fault.cpu.*" for "cpu" scheme ASRU dispatch.
3705254Sgavinm 	 */
371962Stsien 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
3728221SSean.Ye@Sun.COM 	    cma_cpu_cpu_retire },
3736111Scy152378 #endif
3740Sstevel@tonic-gate 	{ NULL, NULL, 0, NULL }
3750Sstevel@tonic-gate };
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate static const cma_subscriber_t *
nvl2subr(fmd_hdl_t * hdl,nvlist_t * nvl,nvlist_t ** asrup)3780Sstevel@tonic-gate nvl2subr(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t **asrup)
3790Sstevel@tonic-gate {
3800Sstevel@tonic-gate 	const cma_subscriber_t *sp;
3810Sstevel@tonic-gate 	nvlist_t *asru;
3820Sstevel@tonic-gate 	char *scheme;
3830Sstevel@tonic-gate 	uint8_t version;
3847197Sstephh 	boolean_t retire;
3857197Sstephh 
3867197Sstephh 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_RETIRE, &retire) == 0 &&
3877197Sstephh 	    retire == 0) {
3887197Sstephh 		fmd_hdl_debug(hdl, "cma_recv: retire suppressed");
3897197Sstephh 		return (NULL);
3907197Sstephh 	}
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) != 0 ||
3930Sstevel@tonic-gate 	    nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
3940Sstevel@tonic-gate 	    nvlist_lookup_uint8(asru, FM_VERSION, &version) != 0) {
3950Sstevel@tonic-gate 		cma_stats.bad_flts.fmds_value.ui64++;
3960Sstevel@tonic-gate 		return (NULL);
3970Sstevel@tonic-gate 	}
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	for (sp = cma_subrs; sp->subr_class != NULL; sp++) {
4000Sstevel@tonic-gate 		if (fmd_nvl_class_match(hdl, nvl, sp->subr_class) &&
4010Sstevel@tonic-gate 		    strcmp(scheme, sp->subr_sname) == 0 &&
4020Sstevel@tonic-gate 		    version <= sp->subr_svers) {
4030Sstevel@tonic-gate 			*asrup = asru;
4040Sstevel@tonic-gate 			return (sp);
4050Sstevel@tonic-gate 		}
4060Sstevel@tonic-gate 	}
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 	cma_stats.nop_flts.fmds_value.ui64++;
4090Sstevel@tonic-gate 	return (NULL);
4100Sstevel@tonic-gate }
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate static void
cma_recv_list(fmd_hdl_t * hdl,nvlist_t * nvl,const char * class)4137275Sstephh cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
4140Sstevel@tonic-gate {
4150Sstevel@tonic-gate 	char *uuid = NULL;
41610656SStephen.Hanson@Sun.COM 	nvlist_t **nva, **save_nva;
41710656SStephen.Hanson@Sun.COM 	uint_t nvc = 0, save_nvc;
4181772Sjl139090 	uint_t keepopen;
4190Sstevel@tonic-gate 	int err = 0;
4207470SScott.Davenport@Sun.COM 	nvlist_t *asru = NULL;
4217256Sjc25722 	uint32_t index;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid);
4240Sstevel@tonic-gate 	err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
4250Sstevel@tonic-gate 	    &nva, &nvc);
4260Sstevel@tonic-gate 	if (err != 0) {
4270Sstevel@tonic-gate 		cma_stats.bad_flts.fmds_value.ui64++;
4280Sstevel@tonic-gate 		return;
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate 
43110656SStephen.Hanson@Sun.COM 	save_nvc = keepopen = nvc;
43210656SStephen.Hanson@Sun.COM 	save_nva = nva;
4337275Sstephh 	while (nvc-- != 0 && (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0 ||
4347275Sstephh 	    !fmd_case_uuclosed(hdl, uuid))) {
4350Sstevel@tonic-gate 		nvlist_t *nvl = *nva++;
4360Sstevel@tonic-gate 		const cma_subscriber_t *subr;
4377275Sstephh 		int has_fault;
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 		if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
4400Sstevel@tonic-gate 			continue;
4410Sstevel@tonic-gate 
4421772Sjl139090 		/*
4431772Sjl139090 		 * A handler returns CMA_RA_SUCCESS to indicate that
4441772Sjl139090 		 * from this suspects  point-of-view the case may be
4451772Sjl139090 		 * closed, CMA_RA_FAILURE otherwise.
4461772Sjl139090 		 * A handler must not close the case itself.
4471772Sjl139090 		 */
4481772Sjl139090 		if (subr->subr_func != NULL) {
4497275Sstephh 			has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
4507275Sstephh 			    FMD_HAS_FAULT_ASRU, NULL);
4517275Sstephh 			if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
4527275Sstephh 				if (has_fault == 1)
4537275Sstephh 					err = subr->subr_func(hdl, nvl, asru,
4547275Sstephh 					    uuid, 0);
4557275Sstephh 			} else {
4567275Sstephh 				if (has_fault == 0)
4577275Sstephh 					err = subr->subr_func(hdl, nvl, asru,
4587275Sstephh 					    uuid, 1);
4597275Sstephh 			}
4601772Sjl139090 			if (err == CMA_RA_SUCCESS)
4611772Sjl139090 				keepopen--;
4621772Sjl139090 		}
4630Sstevel@tonic-gate 	}
4647532SSean.Ye@Sun.COM 
4657256Sjc25722 	/*
46610656SStephen.Hanson@Sun.COM 	 * Run though again to catch any new faults in list.updated.
46710656SStephen.Hanson@Sun.COM 	 */
46810656SStephen.Hanson@Sun.COM 	while (save_nvc-- != 0 && (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) {
46910656SStephen.Hanson@Sun.COM 		nvlist_t *nvl = *save_nva++;
47010656SStephen.Hanson@Sun.COM 		const cma_subscriber_t *subr;
47110656SStephen.Hanson@Sun.COM 		int has_fault;
47210656SStephen.Hanson@Sun.COM 
47310656SStephen.Hanson@Sun.COM 		if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
47410656SStephen.Hanson@Sun.COM 			continue;
47510656SStephen.Hanson@Sun.COM 		if (subr->subr_func != NULL) {
47610656SStephen.Hanson@Sun.COM 			has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
47710656SStephen.Hanson@Sun.COM 			    FMD_HAS_FAULT_ASRU, NULL);
47810656SStephen.Hanson@Sun.COM 			if (has_fault == 1)
47910656SStephen.Hanson@Sun.COM 				err = subr->subr_func(hdl, nvl, asru, uuid, 0);
48010656SStephen.Hanson@Sun.COM 		}
48110656SStephen.Hanson@Sun.COM 	}
48210656SStephen.Hanson@Sun.COM 
48310656SStephen.Hanson@Sun.COM 	/*
4847256Sjc25722 	 * Do not close the case if we are handling cache faults.
4857256Sjc25722 	 */
4867470SScott.Davenport@Sun.COM 	if (asru != NULL) {
4877470SScott.Davenport@Sun.COM 		if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_CACHE_INDEX,
4887470SScott.Davenport@Sun.COM 		    &index) != 0) {
4897470SScott.Davenport@Sun.COM 			if (!keepopen && strcmp(class,
4907470SScott.Davenport@Sun.COM 			    FM_LIST_SUSPECT_CLASS) == 0) {
4917470SScott.Davenport@Sun.COM 				fmd_case_uuclose(hdl, uuid);
4927470SScott.Davenport@Sun.COM 			}
4937256Sjc25722 		}
4947256Sjc25722 	}
4957532SSean.Ye@Sun.COM 
4967275Sstephh 	if (!keepopen && strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
4977275Sstephh 		fmd_case_uuresolved(hdl, uuid);
4980Sstevel@tonic-gate }
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate static void
cma_recv_one(fmd_hdl_t * hdl,nvlist_t * nvl)5010Sstevel@tonic-gate cma_recv_one(fmd_hdl_t *hdl, nvlist_t *nvl)
5020Sstevel@tonic-gate {
5030Sstevel@tonic-gate 	const cma_subscriber_t *subr;
5040Sstevel@tonic-gate 	nvlist_t *asru;
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate 	if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
5070Sstevel@tonic-gate 		return;
5080Sstevel@tonic-gate 
5097275Sstephh 	if (subr->subr_func != NULL) {
5107275Sstephh 		if (fmd_nvl_fmri_has_fault(hdl, asru,
5117275Sstephh 		    FMD_HAS_FAULT_ASRU, NULL) == 1)
5127275Sstephh 			(void) subr->subr_func(hdl, nvl, asru, NULL, 0);
5137275Sstephh 	}
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate /*ARGSUSED*/
5170Sstevel@tonic-gate static void
cma_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)5180Sstevel@tonic-gate cma_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
5190Sstevel@tonic-gate {
5200Sstevel@tonic-gate 	fmd_hdl_debug(hdl, "received %s\n", class);
5210Sstevel@tonic-gate 
5229120SStephen.Hanson@Sun.COM 	if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
5239120SStephen.Hanson@Sun.COM 		return;
5249120SStephen.Hanson@Sun.COM 
5256111Scy152378 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
5267275Sstephh 	    strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
5277275Sstephh 	    strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
5287275Sstephh 		cma_recv_list(hdl, nvl, class);
5290Sstevel@tonic-gate 	else
5300Sstevel@tonic-gate 		cma_recv_one(hdl, nvl);
5310Sstevel@tonic-gate }
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate /*ARGSUSED*/
5340Sstevel@tonic-gate static void
cma_timeout(fmd_hdl_t * hdl,id_t id,void * arg)5350Sstevel@tonic-gate cma_timeout(fmd_hdl_t *hdl, id_t id, void *arg)
5360Sstevel@tonic-gate {
5370Sstevel@tonic-gate 	if (id == cma.cma_page_timerid)
5380Sstevel@tonic-gate 		cma_page_retry(hdl);
5396111Scy152378 #ifdef sun4v
5406111Scy152378 	/*
5416111Scy152378 	 * cpu offline/online needs to be retried on sun4v because
5426111Scy152378 	 * ldom request can be asynchronous.
5436111Scy152378 	 */
5446111Scy152378 	else if (id == cma.cma_cpu_timerid)
5456111Scy152378 		cma_cpu_retry(hdl);
5466111Scy152378 #endif
5470Sstevel@tonic-gate }
5480Sstevel@tonic-gate 
5496111Scy152378 #ifdef sun4v
5506111Scy152378 static void *
cma_init_alloc(size_t size)5516111Scy152378 cma_init_alloc(size_t size)
5526111Scy152378 {
5536111Scy152378 	return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP));
5546111Scy152378 }
5556111Scy152378 
5566111Scy152378 static void
cma_init_free(void * addr,size_t size)5576111Scy152378 cma_init_free(void *addr, size_t size)
5586111Scy152378 {
5596111Scy152378 	fmd_hdl_free(init_hdl, addr, size);
5606111Scy152378 }
5616111Scy152378 #endif
5626111Scy152378 
5630Sstevel@tonic-gate static const fmd_hdl_ops_t fmd_ops = {
5640Sstevel@tonic-gate 	cma_recv,	/* fmdo_recv */
5650Sstevel@tonic-gate 	cma_timeout,	/* fmdo_timeout */
5660Sstevel@tonic-gate 	NULL,		/* fmdo_close */
5670Sstevel@tonic-gate 	NULL,		/* fmdo_stats */
5680Sstevel@tonic-gate 	NULL,		/* fmdo_gc */
5690Sstevel@tonic-gate };
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate static const fmd_prop_t fmd_props[] = {
5720Sstevel@tonic-gate 	{ "cpu_tries", FMD_TYPE_UINT32, "10" },
5730Sstevel@tonic-gate 	{ "cpu_delay", FMD_TYPE_TIME, "1sec" },
5746111Scy152378 #ifdef sun4v
5756111Scy152378 	{ "cpu_ret_mindelay", FMD_TYPE_TIME, "5sec" },
5766111Scy152378 	{ "cpu_ret_maxdelay", FMD_TYPE_TIME, "5min" },
5776111Scy152378 #endif /* sun4v */
5780Sstevel@tonic-gate 	{ "cpu_offline_enable", FMD_TYPE_BOOL, "true" },
5796111Scy152378 	{ "cpu_online_enable", FMD_TYPE_BOOL, "true" },
5800Sstevel@tonic-gate 	{ "cpu_forced_offline", FMD_TYPE_BOOL, "true" },
5816111Scy152378 #ifdef opl
5826111Scy152378 	{ "cpu_blacklist_enable", FMD_TYPE_BOOL, "false" },
5836111Scy152378 	{ "cpu_unblacklist_enable", FMD_TYPE_BOOL, "false" },
5846111Scy152378 #else
5850Sstevel@tonic-gate 	{ "cpu_blacklist_enable", FMD_TYPE_BOOL, "true" },
5866111Scy152378 	{ "cpu_unblacklist_enable", FMD_TYPE_BOOL, "true" },
5876111Scy152378 #endif /* opl */
5880Sstevel@tonic-gate 	{ "page_ret_mindelay", FMD_TYPE_TIME, "1sec" },
5890Sstevel@tonic-gate 	{ "page_ret_maxdelay", FMD_TYPE_TIME, "5min" },
5900Sstevel@tonic-gate 	{ "page_retire_enable", FMD_TYPE_BOOL, "true" },
5916111Scy152378 	{ "page_unretire_enable", FMD_TYPE_BOOL, "true" },
5920Sstevel@tonic-gate 	{ NULL, 0, NULL }
5930Sstevel@tonic-gate };
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate static const fmd_hdl_info_t fmd_info = {
5960Sstevel@tonic-gate 	"CPU/Memory Retire Agent", CMA_VERSION, &fmd_ops, fmd_props
5970Sstevel@tonic-gate };
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate void
_fmd_init(fmd_hdl_t * hdl)6000Sstevel@tonic-gate _fmd_init(fmd_hdl_t *hdl)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate 	hrtime_t nsec;
6036111Scy152378 #ifdef i386
6047532SSean.Ye@Sun.COM 	char buf[BUFSIZ];
6057532SSean.Ye@Sun.COM 	const char *dom0 = "control_d";
6067532SSean.Ye@Sun.COM 
6075084Sjohnlev 	/*
6087532SSean.Ye@Sun.COM 	 * Abort the cpumem-retire module if Solaris is running under DomU.
6095084Sjohnlev 	 */
6107532SSean.Ye@Sun.COM 	if (sysinfo(SI_PLATFORM, buf, sizeof (buf)) == -1)
6117532SSean.Ye@Sun.COM 		return;
6127532SSean.Ye@Sun.COM 
6137532SSean.Ye@Sun.COM 	if (strncmp(buf, "i86pc", sizeof (buf)) == 0) {
6147532SSean.Ye@Sun.COM 		cma_is_native = B_TRUE;
6157532SSean.Ye@Sun.COM 	} else if (strncmp(buf, "i86xpv", sizeof (buf)) != 0) {
6165084Sjohnlev 		return;
6177532SSean.Ye@Sun.COM 	} else {
6187532SSean.Ye@Sun.COM 		int fd = open("/dev/xen/domcaps", O_RDONLY);
6197532SSean.Ye@Sun.COM 
6207532SSean.Ye@Sun.COM 		if (fd != -1) {
6217532SSean.Ye@Sun.COM 			if (read(fd, buf, sizeof (buf)) <= 0 ||
6227532SSean.Ye@Sun.COM 			    strncmp(buf, dom0, strlen(dom0)) != 0) {
6237532SSean.Ye@Sun.COM 				(void) close(fd);
6247532SSean.Ye@Sun.COM 				return;
6257532SSean.Ye@Sun.COM 			}
6267532SSean.Ye@Sun.COM 			(void) close(fd);
6277532SSean.Ye@Sun.COM 		}
6287532SSean.Ye@Sun.COM 		cma_is_native = B_FALSE;
6297532SSean.Ye@Sun.COM 	}
6307532SSean.Ye@Sun.COM #endif /* i386 */
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
6330Sstevel@tonic-gate 		return; /* invalid data in configuration file */
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	fmd_hdl_subscribe(hdl, "fault.cpu.*");
6360Sstevel@tonic-gate 	fmd_hdl_subscribe(hdl, "fault.memory.*");
6376111Scy152378 #ifdef opl
6386111Scy152378 	fmd_hdl_subscribe(hdl, "fault.chassis.SPARC-Enterprise.cpu.*");
6396111Scy152378 #endif
6400Sstevel@tonic-gate 
6410Sstevel@tonic-gate 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (cma_stats) /
6420Sstevel@tonic-gate 	    sizeof (fmd_stat_t), (fmd_stat_t *)&cma_stats);
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	cma.cma_cpu_tries = fmd_prop_get_int32(hdl, "cpu_tries");
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	nsec = fmd_prop_get_int64(hdl, "cpu_delay");
6470Sstevel@tonic-gate 	cma.cma_cpu_delay.tv_sec = nsec / NANOSEC;
6480Sstevel@tonic-gate 	cma.cma_cpu_delay.tv_nsec = nsec % NANOSEC;
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 	cma.cma_page_mindelay = fmd_prop_get_int64(hdl, "page_ret_mindelay");
6510Sstevel@tonic-gate 	cma.cma_page_maxdelay = fmd_prop_get_int64(hdl, "page_ret_maxdelay");
6520Sstevel@tonic-gate 
6536111Scy152378 #ifdef sun4v
6546111Scy152378 	cma.cma_cpu_mindelay = fmd_prop_get_int64(hdl, "cpu_ret_mindelay");
6556111Scy152378 	cma.cma_cpu_maxdelay = fmd_prop_get_int64(hdl, "cpu_ret_maxdelay");
6566111Scy152378 #endif
6576111Scy152378 
6580Sstevel@tonic-gate 	cma.cma_cpu_dooffline = fmd_prop_get_int32(hdl, "cpu_offline_enable");
6590Sstevel@tonic-gate 	cma.cma_cpu_forcedoffline = fmd_prop_get_int32(hdl,
6600Sstevel@tonic-gate 	    "cpu_forced_offline");
6616111Scy152378 	cma.cma_cpu_doonline = fmd_prop_get_int32(hdl, "cpu_online_enable");
6620Sstevel@tonic-gate 	cma.cma_cpu_doblacklist = fmd_prop_get_int32(hdl,
6630Sstevel@tonic-gate 	    "cpu_blacklist_enable");
6646111Scy152378 	cma.cma_cpu_dounblacklist = fmd_prop_get_int32(hdl,
6656111Scy152378 	    "cpu_unblacklist_enable");
6660Sstevel@tonic-gate 	cma.cma_page_doretire = fmd_prop_get_int32(hdl, "page_retire_enable");
6676111Scy152378 	cma.cma_page_dounretire = fmd_prop_get_int32(hdl,
6686111Scy152378 	    "page_unretire_enable");
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	if (cma.cma_page_maxdelay < cma.cma_page_mindelay)
6710Sstevel@tonic-gate 		fmd_hdl_abort(hdl, "page retirement delays conflict\n");
6726111Scy152378 
6736111Scy152378 #ifdef sun4v
6746111Scy152378 	init_hdl = hdl;
6756111Scy152378 	cma_lhp = ldom_init(cma_init_alloc, cma_init_free);
6766111Scy152378 #endif
6770Sstevel@tonic-gate }
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate void
_fmd_fini(fmd_hdl_t * hdl)6800Sstevel@tonic-gate _fmd_fini(fmd_hdl_t *hdl)
6810Sstevel@tonic-gate {
6826111Scy152378 #ifdef sun4v
6836111Scy152378 	ldom_fini(cma_lhp);
6846111Scy152378 	cma_cpu_fini(hdl);
6856111Scy152378 #endif
6860Sstevel@tonic-gate 	cma_page_fini(hdl);
6870Sstevel@tonic-gate }
688