10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51538Sgavinm * Common Development and Distribution License (the "License").
61538Sgavinm * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211414Scindi
220Sstevel@tonic-gate /*
236111Scy152378 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate * Page retirement can be an extended process due to the fact that a retirement
290Sstevel@tonic-gate * may not be possible when the original request is made. The kernel will
300Sstevel@tonic-gate * repeatedly attempt to retire a given page, but will not let us know when the
310Sstevel@tonic-gate * page has been retired. We therefore have to poll to see if the retirement
320Sstevel@tonic-gate * has been completed. This poll is implemented with a bounded exponential
330Sstevel@tonic-gate * backoff to reduce the burden which we impose upon the system.
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * To reduce the burden on fmd in the face of retirement storms, we schedule
360Sstevel@tonic-gate * all retries as a group. In the simplest case, we attempt to retire a single
370Sstevel@tonic-gate * page. When forced to retry, we initially schedule a retry at a configurable
380Sstevel@tonic-gate * interval t. If the retry fails, we schedule another at 2 * t, and so on,
390Sstevel@tonic-gate * until t reaches the maximum interval (also configurable). Future retries
400Sstevel@tonic-gate * for that page will occur with t equal to the maximum interval value. We
410Sstevel@tonic-gate * will never give up on a retirement.
420Sstevel@tonic-gate *
430Sstevel@tonic-gate * With multiple retirements, the situation gets slightly more complicated. As
440Sstevel@tonic-gate * indicated above, we schedule retries as a group. We don't want to deny new
450Sstevel@tonic-gate * pages their short retry intervals, so we'll (re)set the retry interval to the
460Sstevel@tonic-gate * value appropriate for the newest page.
470Sstevel@tonic-gate */
480Sstevel@tonic-gate
490Sstevel@tonic-gate #include <cma.h>
500Sstevel@tonic-gate
510Sstevel@tonic-gate #include <time.h>
520Sstevel@tonic-gate #include <errno.h>
530Sstevel@tonic-gate #include <unistd.h>
540Sstevel@tonic-gate #include <strings.h>
550Sstevel@tonic-gate #include <fm/fmd_api.h>
561414Scindi #include <fm/libtopo.h>
577532SSean.Ye@Sun.COM #include <fm/fmd_fmri.h>
587532SSean.Ye@Sun.COM #include <fm/fmd_agent.h>
590Sstevel@tonic-gate #include <sys/fm/protocol.h>
600Sstevel@tonic-gate
610Sstevel@tonic-gate static void
cma_page_free(fmd_hdl_t * hdl,cma_page_t * page)620Sstevel@tonic-gate cma_page_free(fmd_hdl_t *hdl, cma_page_t *page)
630Sstevel@tonic-gate {
64*8221SSean.Ye@Sun.COM nvlist_free(page->pg_asru);
65*8221SSean.Ye@Sun.COM nvlist_free(page->pg_rsrc);
660Sstevel@tonic-gate fmd_hdl_free(hdl, page, sizeof (cma_page_t));
670Sstevel@tonic-gate }
680Sstevel@tonic-gate
691414Scindi /*
701414Scindi * Retire the specified ASRU, referring to a memory page by PA or by DIMM
711414Scindi * offset (i.e. the encoded coordinates internal bank, row, and column).
721414Scindi * In the initial FMA implementation, fault.memory.page exported an ASRU
731414Scindi * with an explicit physical address, which is valid at the initial time of
741414Scindi * diagnosis but may not be later following DR, DIMM removal, or interleave
751414Scindi * changes. On SPARC, this issue was solved by exporting the DIMM offset
761414Scindi * and pushing the entire FMRI to the platform memory controller through
777532SSean.Ye@Sun.COM * /dev/fm so it can derive the current PA from the DIMM and offset.
787532SSean.Ye@Sun.COM * On x86, we also encode DIMM and offset in hc-specific, which is then used
797532SSean.Ye@Sun.COM * by the x64 memory controller driver.
801414Scindi * At some point these three approaches need to be rationalized: all platforms
811414Scindi * should use the same scheme, either with decoding in the kernel or decoding
821414Scindi * in userland (i.e. with a libtopo method to compute and update the PA).
831414Scindi */
840Sstevel@tonic-gate /*ARGSUSED*/
851772Sjl139090 int
cma_page_retire(fmd_hdl_t * hdl,nvlist_t * nvl,nvlist_t * asru,const char * uuid,boolean_t repair)866111Scy152378 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
876111Scy152378 const char *uuid, boolean_t repair)
880Sstevel@tonic-gate {
890Sstevel@tonic-gate cma_page_t *page;
900Sstevel@tonic-gate uint64_t pageaddr;
916111Scy152378 const char *action = repair ? "unretire" : "retire";
927532SSean.Ye@Sun.COM int rc;
93*8221SSean.Ye@Sun.COM nvlist_t *rsrc = NULL, *asrucp = NULL, *hcsp;
940Sstevel@tonic-gate
95*8221SSean.Ye@Sun.COM (void) nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc);
96*8221SSean.Ye@Sun.COM
97*8221SSean.Ye@Sun.COM if (nvlist_dup(asru, &asrucp, 0) != 0) {
986198Stsien fmd_hdl_debug(hdl, "page retire nvlist dup failed\n");
996198Stsien return (CMA_RA_FAILURE);
1006198Stsien }
1016198Stsien
1020Sstevel@tonic-gate /* It should already be expanded, but we'll do it again anyway */
103*8221SSean.Ye@Sun.COM if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) {
1040Sstevel@tonic-gate fmd_hdl_debug(hdl, "failed to expand page asru\n");
1050Sstevel@tonic-gate cma_stats.bad_flts.fmds_value.ui64++;
106*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
1071772Sjl139090 return (CMA_RA_FAILURE);
1080Sstevel@tonic-gate }
1090Sstevel@tonic-gate
110*8221SSean.Ye@Sun.COM if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) {
1116111Scy152378 fmd_hdl_debug(hdl, "page retire overtaken by events\n");
1126111Scy152378 cma_stats.page_nonent.fmds_value.ui64++;
113*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
1146111Scy152378 return (CMA_RA_SUCCESS);
1156111Scy152378 }
1166111Scy152378
117*8221SSean.Ye@Sun.COM /* Figure out physaddr from resource or asru */
118*8221SSean.Ye@Sun.COM if (rsrc == NULL ||
119*8221SSean.Ye@Sun.COM nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcsp) != 0 ||
1207532SSean.Ye@Sun.COM (nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR,
1217532SSean.Ye@Sun.COM &pageaddr) != 0 && nvlist_lookup_uint64(hcsp,
1227532SSean.Ye@Sun.COM FM_FMRI_HC_SPECIFIC_PHYSADDR, &pageaddr) != 0)) {
123*8221SSean.Ye@Sun.COM if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR,
124*8221SSean.Ye@Sun.COM &pageaddr) != 0) {
125*8221SSean.Ye@Sun.COM fmd_hdl_debug(hdl, "mem fault missing 'physaddr'\n");
126*8221SSean.Ye@Sun.COM cma_stats.bad_flts.fmds_value.ui64++;
127*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
128*8221SSean.Ye@Sun.COM return (CMA_RA_FAILURE);
129*8221SSean.Ye@Sun.COM }
1300Sstevel@tonic-gate }
1310Sstevel@tonic-gate
1326111Scy152378 if (repair) {
1336111Scy152378 if (!cma.cma_page_dounretire) {
1346111Scy152378 fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n",
1356111Scy152378 (u_longlong_t)pageaddr);
1366111Scy152378 cma_stats.page_supp.fmds_value.ui64++;
137*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
1386111Scy152378 return (CMA_RA_SUCCESS);
1396111Scy152378 }
140*8221SSean.Ye@Sun.COM /* If unretire via topo fails, we fall back to legacy way */
141*8221SSean.Ye@Sun.COM if (rsrc == NULL || (rc = fmd_nvl_fmri_unretire(hdl, rsrc)) < 0)
142*8221SSean.Ye@Sun.COM rc = cma_fmri_page_unretire(hdl, asrucp);
1436111Scy152378 } else {
1446111Scy152378 if (!cma.cma_page_doretire) {
1456111Scy152378 fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
1466111Scy152378 (u_longlong_t)pageaddr);
1476111Scy152378 cma_stats.page_supp.fmds_value.ui64++;
148*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
1496111Scy152378 return (CMA_RA_FAILURE);
1506111Scy152378 }
151*8221SSean.Ye@Sun.COM /* If retire via topo fails, we fall back to legacy way */
152*8221SSean.Ye@Sun.COM if (rsrc == NULL || (rc = fmd_nvl_fmri_retire(hdl, rsrc)) < 0)
153*8221SSean.Ye@Sun.COM rc = cma_fmri_page_retire(hdl, asrucp);
1540Sstevel@tonic-gate }
1550Sstevel@tonic-gate
1567532SSean.Ye@Sun.COM if (rc == FMD_AGENT_RETIRE_DONE) {
1576111Scy152378 fmd_hdl_debug(hdl, "%sd page 0x%llx\n",
1586111Scy152378 action, (u_longlong_t)pageaddr);
1596111Scy152378 if (repair)
1606111Scy152378 cma_stats.page_repairs.fmds_value.ui64++;
1616111Scy152378 else
1626111Scy152378 cma_stats.page_flts.fmds_value.ui64++;
163*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
1641772Sjl139090 return (CMA_RA_SUCCESS);
1657532SSean.Ye@Sun.COM } else if (repair || rc != FMD_AGENT_RETIRE_ASYNC) {
1666111Scy152378 fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not "
1676111Scy152378 "retry: %s\n", action, (u_longlong_t)pageaddr,
1686111Scy152378 strerror(errno));
1696111Scy152378
1706111Scy152378 cma_stats.page_fails.fmds_value.ui64++;
171*8221SSean.Ye@Sun.COM nvlist_free(asrucp);
1721772Sjl139090 return (CMA_RA_FAILURE);
1730Sstevel@tonic-gate }
1740Sstevel@tonic-gate
1750Sstevel@tonic-gate /*
1760Sstevel@tonic-gate * The page didn't immediately retire. We'll need to periodically
1770Sstevel@tonic-gate * check to see if it has been retired.
1780Sstevel@tonic-gate */
1790Sstevel@tonic-gate fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
1820Sstevel@tonic-gate page->pg_addr = pageaddr;
183*8221SSean.Ye@Sun.COM if (rsrc != NULL)
184*8221SSean.Ye@Sun.COM (void) nvlist_dup(rsrc, &page->pg_rsrc, 0);
185*8221SSean.Ye@Sun.COM page->pg_asru = asrucp;
1860Sstevel@tonic-gate if (uuid != NULL)
1870Sstevel@tonic-gate page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate page->pg_next = cma.cma_pages;
1900Sstevel@tonic-gate cma.cma_pages = page;
1910Sstevel@tonic-gate
1920Sstevel@tonic-gate if (cma.cma_page_timerid != 0)
1930Sstevel@tonic-gate fmd_timer_remove(hdl, cma.cma_page_timerid);
1940Sstevel@tonic-gate
1950Sstevel@tonic-gate cma.cma_page_curdelay = cma.cma_page_mindelay;
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate cma.cma_page_timerid =
1980Sstevel@tonic-gate fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
1991772Sjl139090
200*8221SSean.Ye@Sun.COM /* Don't free asrucp here. This FMRI will be needed for retry. */
2011772Sjl139090 return (CMA_RA_FAILURE);
2020Sstevel@tonic-gate }
2030Sstevel@tonic-gate
2040Sstevel@tonic-gate static int
page_retry(fmd_hdl_t * hdl,cma_page_t * page)2050Sstevel@tonic-gate page_retry(fmd_hdl_t *hdl, cma_page_t *page)
2060Sstevel@tonic-gate {
2077532SSean.Ye@Sun.COM int rc;
2087532SSean.Ye@Sun.COM
209*8221SSean.Ye@Sun.COM if (page->pg_asru != NULL &&
210*8221SSean.Ye@Sun.COM !fmd_nvl_fmri_present(hdl, page->pg_asru)) {
2110Sstevel@tonic-gate fmd_hdl_debug(hdl, "page retire overtaken by events");
2120Sstevel@tonic-gate cma_stats.page_nonent.fmds_value.ui64++;
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate if (page->pg_uuid != NULL)
2150Sstevel@tonic-gate fmd_case_uuclose(hdl, page->pg_uuid);
2160Sstevel@tonic-gate return (1); /* no longer a page to retire */
2170Sstevel@tonic-gate }
2180Sstevel@tonic-gate
219*8221SSean.Ye@Sun.COM if (page->pg_rsrc == NULL ||
220*8221SSean.Ye@Sun.COM (rc = fmd_nvl_fmri_service_state(hdl, page->pg_rsrc)) < 0)
221*8221SSean.Ye@Sun.COM rc = cma_fmri_page_service_state(hdl, page->pg_asru);
222*8221SSean.Ye@Sun.COM
2237532SSean.Ye@Sun.COM if (rc == FMD_SERVICE_STATE_UNUSABLE) {
2240Sstevel@tonic-gate fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n",
2250Sstevel@tonic-gate page->pg_addr, page->pg_nretries);
2260Sstevel@tonic-gate cma_stats.page_flts.fmds_value.ui64++;
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate if (page->pg_uuid != NULL)
2290Sstevel@tonic-gate fmd_case_uuclose(hdl, page->pg_uuid);
2300Sstevel@tonic-gate return (1); /* page retired */
2310Sstevel@tonic-gate }
2320Sstevel@tonic-gate
2337532SSean.Ye@Sun.COM if (rc == FMD_SERVICE_STATE_ISOLATE_PENDING) {
2340Sstevel@tonic-gate fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n",
2350Sstevel@tonic-gate page->pg_addr);
2360Sstevel@tonic-gate return (0); /* schedule another retry */
2370Sstevel@tonic-gate } else {
2387532SSean.Ye@Sun.COM fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
2397532SSean.Ye@Sun.COM "retirement: %s\n", page->pg_addr,
2407532SSean.Ye@Sun.COM strerror(errno));
2410Sstevel@tonic-gate
2420Sstevel@tonic-gate cma_stats.page_fails.fmds_value.ui64++;
2430Sstevel@tonic-gate return (1); /* give up */
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate void
cma_page_retry(fmd_hdl_t * hdl)2480Sstevel@tonic-gate cma_page_retry(fmd_hdl_t *hdl)
2490Sstevel@tonic-gate {
2500Sstevel@tonic-gate cma_page_t **pagep;
2510Sstevel@tonic-gate
2520Sstevel@tonic-gate cma.cma_page_timerid = 0;
2530Sstevel@tonic-gate
2540Sstevel@tonic-gate fmd_hdl_debug(hdl, "page_retry: timer fired\n");
2550Sstevel@tonic-gate
2560Sstevel@tonic-gate pagep = &cma.cma_pages;
2570Sstevel@tonic-gate while (*pagep != NULL) {
2580Sstevel@tonic-gate cma_page_t *page = *pagep;
2590Sstevel@tonic-gate
2600Sstevel@tonic-gate if (page_retry(hdl, page)) {
2610Sstevel@tonic-gate /*
2620Sstevel@tonic-gate * Successful retry or we're giving up - remove from
2630Sstevel@tonic-gate * the list
2640Sstevel@tonic-gate */
2650Sstevel@tonic-gate *pagep = page->pg_next;
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate if (page->pg_uuid != NULL)
2680Sstevel@tonic-gate fmd_hdl_strfree(hdl, page->pg_uuid);
2690Sstevel@tonic-gate
2700Sstevel@tonic-gate cma_page_free(hdl, page);
2716341Scy152378 } else {
2720Sstevel@tonic-gate page->pg_nretries++;
2730Sstevel@tonic-gate pagep = &page->pg_next;
2740Sstevel@tonic-gate }
2750Sstevel@tonic-gate }
2760Sstevel@tonic-gate
2770Sstevel@tonic-gate if (cma.cma_pages == NULL)
2780Sstevel@tonic-gate return; /* no more retirements */
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate /*
2810Sstevel@tonic-gate * We still have retirements that haven't completed. Back the delay
2820Sstevel@tonic-gate * off, and schedule a retry.
2830Sstevel@tonic-gate */
2840Sstevel@tonic-gate cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2,
2850Sstevel@tonic-gate cma.cma_page_maxdelay);
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n",
2880Sstevel@tonic-gate (u_longlong_t)(cma.cma_page_curdelay / NANOSEC));
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate cma.cma_page_timerid =
2910Sstevel@tonic-gate fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
2920Sstevel@tonic-gate }
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate void
cma_page_fini(fmd_hdl_t * hdl)2950Sstevel@tonic-gate cma_page_fini(fmd_hdl_t *hdl)
2960Sstevel@tonic-gate {
2970Sstevel@tonic-gate cma_page_t *page;
2980Sstevel@tonic-gate
2990Sstevel@tonic-gate while ((page = cma.cma_pages) != NULL) {
3000Sstevel@tonic-gate cma.cma_pages = page->pg_next;
301*8221SSean.Ye@Sun.COM if (page->pg_uuid != NULL)
302*8221SSean.Ye@Sun.COM fmd_hdl_strfree(hdl, page->pg_uuid);
3030Sstevel@tonic-gate cma_page_free(hdl, page);
3040Sstevel@tonic-gate }
3050Sstevel@tonic-gate }
306