12082Seschrock /*
22082Seschrock * CDDL HEADER START
32082Seschrock *
42082Seschrock * The contents of this file are subject to the terms of the
52082Seschrock * Common Development and Distribution License (the "License").
62082Seschrock * You may not use this file except in compliance with the License.
72082Seschrock *
82082Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
92082Seschrock * or http://www.opensolaris.org/os/licensing.
102082Seschrock * See the License for the specific language governing permissions
112082Seschrock * and limitations under the License.
122082Seschrock *
132082Seschrock * When distributing Covered Code, include this CDDL HEADER in each
142082Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
152082Seschrock * If applicable, add the following below this CDDL HEADER, with the
162082Seschrock * fields enclosed by brackets "[]" replaced with your own identifying
172082Seschrock * information: Portions Copyright [yyyy] [name of copyright owner]
182082Seschrock *
192082Seschrock * CDDL HEADER END
202082Seschrock */
212082Seschrock /*
22*12066SRobert.Johnston@Sun.COM * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
232082Seschrock */
242082Seschrock
252082Seschrock /*
262082Seschrock * The ZFS retire agent is responsible for managing hot spares across all pools.
274451Seschrock * When we see a device fault or a device removal, we try to open the associated
284451Seschrock * pool and look for any hot spares. We iterate over any available hot spares
294451Seschrock * and attempt a 'zpool replace' for each one.
304451Seschrock *
314451Seschrock * For vdevs diagnosed as faulty, the agent is also responsible for proactively
324451Seschrock * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors).
332082Seschrock */
342082Seschrock
352082Seschrock #include <fm/fmd_api.h>
362082Seschrock #include <sys/fs/zfs.h>
372082Seschrock #include <sys/fm/protocol.h>
382082Seschrock #include <sys/fm/fs/zfs.h>
392082Seschrock #include <libzfs.h>
4010817SEric.Schrock@Sun.COM #include <fm/libtopo.h>
414451Seschrock #include <string.h>
422082Seschrock
4310817SEric.Schrock@Sun.COM typedef struct zfs_retire_repaired {
4410817SEric.Schrock@Sun.COM struct zfs_retire_repaired *zrr_next;
4510817SEric.Schrock@Sun.COM uint64_t zrr_pool;
4610817SEric.Schrock@Sun.COM uint64_t zrr_vdev;
4710817SEric.Schrock@Sun.COM } zfs_retire_repaired_t;
4810817SEric.Schrock@Sun.COM
4910817SEric.Schrock@Sun.COM typedef struct zfs_retire_data {
5010817SEric.Schrock@Sun.COM libzfs_handle_t *zrd_hdl;
5110817SEric.Schrock@Sun.COM zfs_retire_repaired_t *zrd_repaired;
5210817SEric.Schrock@Sun.COM } zfs_retire_data_t;
5310817SEric.Schrock@Sun.COM
5410817SEric.Schrock@Sun.COM static void
zfs_retire_clear_data(fmd_hdl_t * hdl,zfs_retire_data_t * zdp)5510817SEric.Schrock@Sun.COM zfs_retire_clear_data(fmd_hdl_t *hdl, zfs_retire_data_t *zdp)
5610817SEric.Schrock@Sun.COM {
5710817SEric.Schrock@Sun.COM zfs_retire_repaired_t *zrp;
5810817SEric.Schrock@Sun.COM
5910817SEric.Schrock@Sun.COM while ((zrp = zdp->zrd_repaired) != NULL) {
6010817SEric.Schrock@Sun.COM zdp->zrd_repaired = zrp->zrr_next;
6110817SEric.Schrock@Sun.COM fmd_hdl_free(hdl, zrp, sizeof (zfs_retire_repaired_t));
6210817SEric.Schrock@Sun.COM }
6310817SEric.Schrock@Sun.COM }
6410817SEric.Schrock@Sun.COM
652082Seschrock /*
662082Seschrock * Find a pool with a matching GUID.
672082Seschrock */
682082Seschrock typedef struct find_cbdata {
692082Seschrock uint64_t cb_guid;
7010817SEric.Schrock@Sun.COM const char *cb_fru;
712082Seschrock zpool_handle_t *cb_zhp;
7210817SEric.Schrock@Sun.COM nvlist_t *cb_vdev;
732082Seschrock } find_cbdata_t;
742082Seschrock
752082Seschrock static int
find_pool(zpool_handle_t * zhp,void * data)762082Seschrock find_pool(zpool_handle_t *zhp, void *data)
772082Seschrock {
782082Seschrock find_cbdata_t *cbp = data;
792082Seschrock
805094Slling if (cbp->cb_guid ==
815094Slling zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) {
822082Seschrock cbp->cb_zhp = zhp;
832082Seschrock return (1);
842082Seschrock }
852082Seschrock
862082Seschrock zpool_close(zhp);
872082Seschrock return (0);
882082Seschrock }
892082Seschrock
902082Seschrock /*
912082Seschrock * Find a vdev within a tree with a matching GUID.
922082Seschrock */
932082Seschrock static nvlist_t *
find_vdev(libzfs_handle_t * zhdl,nvlist_t * nv,const char * search_fru,uint64_t search_guid)9410817SEric.Schrock@Sun.COM find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, const char *search_fru,
9510817SEric.Schrock@Sun.COM uint64_t search_guid)
962082Seschrock {
972082Seschrock uint64_t guid;
982082Seschrock nvlist_t **child;
992082Seschrock uint_t c, children;
1002082Seschrock nvlist_t *ret;
10110817SEric.Schrock@Sun.COM char *fru;
1022082Seschrock
10310817SEric.Schrock@Sun.COM if (search_fru != NULL) {
10410817SEric.Schrock@Sun.COM if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0 &&
10510817SEric.Schrock@Sun.COM libzfs_fru_compare(zhdl, fru, search_fru))
10610817SEric.Schrock@Sun.COM return (nv);
10710817SEric.Schrock@Sun.COM } else {
10810817SEric.Schrock@Sun.COM if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
10910817SEric.Schrock@Sun.COM guid == search_guid)
11010817SEric.Schrock@Sun.COM return (nv);
11110817SEric.Schrock@Sun.COM }
1122082Seschrock
1132082Seschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1142082Seschrock &child, &children) != 0)
1152082Seschrock return (NULL);
1162082Seschrock
1172082Seschrock for (c = 0; c < children; c++) {
11810817SEric.Schrock@Sun.COM if ((ret = find_vdev(zhdl, child[c], search_fru,
11910817SEric.Schrock@Sun.COM search_guid)) != NULL)
1202082Seschrock return (ret);
1212082Seschrock }
1222082Seschrock
1236643Seschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1246643Seschrock &child, &children) != 0)
1256643Seschrock return (NULL);
1266643Seschrock
1276643Seschrock for (c = 0; c < children; c++) {
12810817SEric.Schrock@Sun.COM if ((ret = find_vdev(zhdl, child[c], search_fru,
12910817SEric.Schrock@Sun.COM search_guid)) != NULL)
1306643Seschrock return (ret);
1316643Seschrock }
1326643Seschrock
1332082Seschrock return (NULL);
1342082Seschrock }
1352082Seschrock
1364451Seschrock /*
1374451Seschrock * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
1384451Seschrock */
1394451Seschrock static zpool_handle_t *
find_by_guid(libzfs_handle_t * zhdl,uint64_t pool_guid,uint64_t vdev_guid,nvlist_t ** vdevp)1404451Seschrock find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
1414451Seschrock nvlist_t **vdevp)
1424451Seschrock {
1434451Seschrock find_cbdata_t cb;
1444451Seschrock zpool_handle_t *zhp;
1454451Seschrock nvlist_t *config, *nvroot;
1464451Seschrock
1474451Seschrock /*
1484451Seschrock * Find the corresponding pool and make sure the vdev still exists.
1494451Seschrock */
1504451Seschrock cb.cb_guid = pool_guid;
1514451Seschrock if (zpool_iter(zhdl, find_pool, &cb) != 1)
1524451Seschrock return (NULL);
1534451Seschrock
1544451Seschrock zhp = cb.cb_zhp;
1554451Seschrock config = zpool_get_config(zhp, NULL);
1564451Seschrock if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
1574451Seschrock &nvroot) != 0) {
1584451Seschrock zpool_close(zhp);
1594451Seschrock return (NULL);
1604451Seschrock }
1614451Seschrock
16210817SEric.Schrock@Sun.COM if (vdev_guid != 0) {
16310817SEric.Schrock@Sun.COM if ((*vdevp = find_vdev(zhdl, nvroot, NULL,
16410817SEric.Schrock@Sun.COM vdev_guid)) == NULL) {
16510817SEric.Schrock@Sun.COM zpool_close(zhp);
16610817SEric.Schrock@Sun.COM return (NULL);
16710817SEric.Schrock@Sun.COM }
1684451Seschrock }
1694451Seschrock
1704451Seschrock return (zhp);
1714451Seschrock }
1724451Seschrock
17310817SEric.Schrock@Sun.COM static int
search_pool(zpool_handle_t * zhp,void * data)17410817SEric.Schrock@Sun.COM search_pool(zpool_handle_t *zhp, void *data)
17510817SEric.Schrock@Sun.COM {
17610817SEric.Schrock@Sun.COM find_cbdata_t *cbp = data;
17710817SEric.Schrock@Sun.COM nvlist_t *config;
17810817SEric.Schrock@Sun.COM nvlist_t *nvroot;
17910817SEric.Schrock@Sun.COM
18010817SEric.Schrock@Sun.COM config = zpool_get_config(zhp, NULL);
18110817SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
18210817SEric.Schrock@Sun.COM &nvroot) != 0) {
18310817SEric.Schrock@Sun.COM zpool_close(zhp);
18410817SEric.Schrock@Sun.COM return (0);
18510817SEric.Schrock@Sun.COM }
18610817SEric.Schrock@Sun.COM
18710817SEric.Schrock@Sun.COM if ((cbp->cb_vdev = find_vdev(zpool_get_handle(zhp), nvroot,
18810817SEric.Schrock@Sun.COM cbp->cb_fru, 0)) != NULL) {
18910817SEric.Schrock@Sun.COM cbp->cb_zhp = zhp;
19010817SEric.Schrock@Sun.COM return (1);
19110817SEric.Schrock@Sun.COM }
19210817SEric.Schrock@Sun.COM
19310817SEric.Schrock@Sun.COM zpool_close(zhp);
19410817SEric.Schrock@Sun.COM return (0);
19510817SEric.Schrock@Sun.COM }
19610817SEric.Schrock@Sun.COM
19710817SEric.Schrock@Sun.COM /*
19810817SEric.Schrock@Sun.COM * Given a FRU FMRI, find the matching pool and vdev.
19910817SEric.Schrock@Sun.COM */
20010817SEric.Schrock@Sun.COM static zpool_handle_t *
find_by_fru(libzfs_handle_t * zhdl,const char * fru,nvlist_t ** vdevp)20110817SEric.Schrock@Sun.COM find_by_fru(libzfs_handle_t *zhdl, const char *fru, nvlist_t **vdevp)
20210817SEric.Schrock@Sun.COM {
20310817SEric.Schrock@Sun.COM find_cbdata_t cb;
20410817SEric.Schrock@Sun.COM
20510817SEric.Schrock@Sun.COM cb.cb_fru = fru;
20610817SEric.Schrock@Sun.COM cb.cb_zhp = NULL;
20710817SEric.Schrock@Sun.COM if (zpool_iter(zhdl, search_pool, &cb) != 1)
20810817SEric.Schrock@Sun.COM return (NULL);
20910817SEric.Schrock@Sun.COM
21010817SEric.Schrock@Sun.COM *vdevp = cb.cb_vdev;
21110817SEric.Schrock@Sun.COM return (cb.cb_zhp);
21210817SEric.Schrock@Sun.COM }
21310817SEric.Schrock@Sun.COM
2144451Seschrock /*
2154451Seschrock * Given a vdev, attempt to replace it with every known spare until one
2164451Seschrock * succeeds.
2174451Seschrock */
2184451Seschrock static void
replace_with_spare(fmd_hdl_t * hdl,zpool_handle_t * zhp,nvlist_t * vdev)21910817SEric.Schrock@Sun.COM replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
2204451Seschrock {
2214451Seschrock nvlist_t *config, *nvroot, *replacement;
2224451Seschrock nvlist_t **spares;
2234451Seschrock uint_t s, nspares;
2244451Seschrock char *dev_name;
2254451Seschrock
2264451Seschrock config = zpool_get_config(zhp, NULL);
2274451Seschrock if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2284451Seschrock &nvroot) != 0)
2294451Seschrock return;
2304451Seschrock
2314451Seschrock /*
2324451Seschrock * Find out if there are any hot spares available in the pool.
2334451Seschrock */
2344451Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
2354451Seschrock &spares, &nspares) != 0)
2364451Seschrock return;
2374451Seschrock
23810817SEric.Schrock@Sun.COM replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);
2394451Seschrock
24010817SEric.Schrock@Sun.COM (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
24110817SEric.Schrock@Sun.COM VDEV_TYPE_ROOT);
2424451Seschrock
24310594SGeorge.Wilson@Sun.COM dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
2444451Seschrock
2454451Seschrock /*
2464451Seschrock * Try to replace each spare, ending when we successfully
2474451Seschrock * replace it.
2484451Seschrock */
2494451Seschrock for (s = 0; s < nspares; s++) {
2504451Seschrock char *spare_name;
2514451Seschrock
2524451Seschrock if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
2534451Seschrock &spare_name) != 0)
2544451Seschrock continue;
2554451Seschrock
25610817SEric.Schrock@Sun.COM (void) nvlist_add_nvlist_array(replacement,
25710817SEric.Schrock@Sun.COM ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
2584451Seschrock
2594451Seschrock if (zpool_vdev_attach(zhp, dev_name, spare_name,
2604451Seschrock replacement, B_TRUE) == 0)
2614451Seschrock break;
2624451Seschrock }
2634451Seschrock
2644451Seschrock free(dev_name);
2654451Seschrock nvlist_free(replacement);
2664451Seschrock }
2674451Seschrock
26810817SEric.Schrock@Sun.COM /*
26910817SEric.Schrock@Sun.COM * Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and
27010817SEric.Schrock@Sun.COM * ASRU is now usable. ZFS has found the device to be present and
27110817SEric.Schrock@Sun.COM * functioning.
27210817SEric.Schrock@Sun.COM */
27310817SEric.Schrock@Sun.COM /*ARGSUSED*/
27410817SEric.Schrock@Sun.COM void
zfs_vdev_repair(fmd_hdl_t * hdl,nvlist_t * nvl)27510817SEric.Schrock@Sun.COM zfs_vdev_repair(fmd_hdl_t *hdl, nvlist_t *nvl)
27610817SEric.Schrock@Sun.COM {
27710817SEric.Schrock@Sun.COM zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
27810817SEric.Schrock@Sun.COM zfs_retire_repaired_t *zrp;
27910817SEric.Schrock@Sun.COM uint64_t pool_guid, vdev_guid;
28010817SEric.Schrock@Sun.COM nvlist_t *asru;
28110817SEric.Schrock@Sun.COM
28210817SEric.Schrock@Sun.COM if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
28310817SEric.Schrock@Sun.COM &pool_guid) != 0 || nvlist_lookup_uint64(nvl,
28410817SEric.Schrock@Sun.COM FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
28510817SEric.Schrock@Sun.COM return;
28610817SEric.Schrock@Sun.COM
28710817SEric.Schrock@Sun.COM /*
28810817SEric.Schrock@Sun.COM * Before checking the state of the ASRU, go through and see if we've
28910817SEric.Schrock@Sun.COM * already made an attempt to repair this ASRU. This list is cleared
29010817SEric.Schrock@Sun.COM * whenever we receive any kind of list event, and is designed to
29110817SEric.Schrock@Sun.COM * prevent us from generating a feedback loop when we attempt repairs
29210817SEric.Schrock@Sun.COM * against a faulted pool. The problem is that checking the unusable
29310817SEric.Schrock@Sun.COM * state of the ASRU can involve opening the pool, which can post
29410817SEric.Schrock@Sun.COM * statechange events but otherwise leave the pool in the faulted
29510817SEric.Schrock@Sun.COM * state. This list allows us to detect when a statechange event is
29610817SEric.Schrock@Sun.COM * due to our own request.
29710817SEric.Schrock@Sun.COM */
29810817SEric.Schrock@Sun.COM for (zrp = zdp->zrd_repaired; zrp != NULL; zrp = zrp->zrr_next) {
29910817SEric.Schrock@Sun.COM if (zrp->zrr_pool == pool_guid &&
30010817SEric.Schrock@Sun.COM zrp->zrr_vdev == vdev_guid)
30110817SEric.Schrock@Sun.COM return;
30210817SEric.Schrock@Sun.COM }
30310817SEric.Schrock@Sun.COM
30410817SEric.Schrock@Sun.COM asru = fmd_nvl_alloc(hdl, FMD_SLEEP);
30510817SEric.Schrock@Sun.COM
30610817SEric.Schrock@Sun.COM (void) nvlist_add_uint8(asru, FM_VERSION, ZFS_SCHEME_VERSION0);
30710817SEric.Schrock@Sun.COM (void) nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS);
30810817SEric.Schrock@Sun.COM (void) nvlist_add_uint64(asru, FM_FMRI_ZFS_POOL, pool_guid);
30910817SEric.Schrock@Sun.COM (void) nvlist_add_uint64(asru, FM_FMRI_ZFS_VDEV, vdev_guid);
31010817SEric.Schrock@Sun.COM
31110817SEric.Schrock@Sun.COM /*
31210817SEric.Schrock@Sun.COM * We explicitly check for the unusable state here to make sure we
31310817SEric.Schrock@Sun.COM * aren't responding to a transient state change. As part of opening a
31410817SEric.Schrock@Sun.COM * vdev, it's possible to see the 'statechange' event, only to be
31510817SEric.Schrock@Sun.COM * followed by a vdev failure later. If we don't check the current
31610817SEric.Schrock@Sun.COM * state of the vdev (or pool) before marking it repaired, then we risk
31710817SEric.Schrock@Sun.COM * generating spurious repair events followed immediately by the same
31810817SEric.Schrock@Sun.COM * diagnosis.
31910817SEric.Schrock@Sun.COM *
32010817SEric.Schrock@Sun.COM * This assumes that the ZFS scheme code associated unusable (i.e.
32110817SEric.Schrock@Sun.COM * isolated) with its own definition of faulty state. In the case of a
32210817SEric.Schrock@Sun.COM * DEGRADED leaf vdev (due to checksum errors), this is not the case.
32310817SEric.Schrock@Sun.COM * This works, however, because the transient state change is not
32410817SEric.Schrock@Sun.COM * posted in this case. This could be made more explicit by not
32510817SEric.Schrock@Sun.COM * relying on the scheme's unusable callback and instead directly
32610817SEric.Schrock@Sun.COM * checking the vdev state, where we could correctly account for
32710817SEric.Schrock@Sun.COM * DEGRADED state.
32810817SEric.Schrock@Sun.COM */
32910817SEric.Schrock@Sun.COM if (!fmd_nvl_fmri_unusable(hdl, asru) && fmd_nvl_fmri_has_fault(hdl,
33010817SEric.Schrock@Sun.COM asru, FMD_HAS_FAULT_ASRU, NULL)) {
33110817SEric.Schrock@Sun.COM topo_hdl_t *thp;
33210817SEric.Schrock@Sun.COM char *fmri = NULL;
33310817SEric.Schrock@Sun.COM int err;
33410817SEric.Schrock@Sun.COM
33510817SEric.Schrock@Sun.COM thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
33610817SEric.Schrock@Sun.COM if (topo_fmri_nvl2str(thp, asru, &fmri, &err) == 0)
33710866SEric.Schrock@Sun.COM (void) fmd_repair_asru(hdl, fmri);
33810817SEric.Schrock@Sun.COM fmd_hdl_topo_rele(hdl, thp);
33910817SEric.Schrock@Sun.COM
34010817SEric.Schrock@Sun.COM topo_hdl_strfree(thp, fmri);
34110817SEric.Schrock@Sun.COM }
342*12066SRobert.Johnston@Sun.COM nvlist_free(asru);
34310817SEric.Schrock@Sun.COM zrp = fmd_hdl_alloc(hdl, sizeof (zfs_retire_repaired_t), FMD_SLEEP);
34410817SEric.Schrock@Sun.COM zrp->zrr_next = zdp->zrd_repaired;
34510817SEric.Schrock@Sun.COM zrp->zrr_pool = pool_guid;
34610817SEric.Schrock@Sun.COM zrp->zrr_vdev = vdev_guid;
34710817SEric.Schrock@Sun.COM zdp->zrd_repaired = zrp;
34810817SEric.Schrock@Sun.COM }
34910817SEric.Schrock@Sun.COM
3502082Seschrock /*ARGSUSED*/
3512082Seschrock static void
zfs_retire_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)3522082Seschrock zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
3532082Seschrock const char *class)
3542082Seschrock {
3552082Seschrock uint64_t pool_guid, vdev_guid;
3562082Seschrock zpool_handle_t *zhp;
35710817SEric.Schrock@Sun.COM nvlist_t *resource, *fault, *fru;
3584451Seschrock nvlist_t **faults;
3594451Seschrock uint_t f, nfaults;
36010817SEric.Schrock@Sun.COM zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
36110817SEric.Schrock@Sun.COM libzfs_handle_t *zhdl = zdp->zrd_hdl;
3624451Seschrock boolean_t fault_device, degrade_device;
3634451Seschrock boolean_t is_repair;
36410817SEric.Schrock@Sun.COM char *scheme, *fmri;
3652082Seschrock nvlist_t *vdev;
3667275Sstephh char *uuid;
3677275Sstephh int repair_done = 0;
3689120SStephen.Hanson@Sun.COM boolean_t retire;
36910817SEric.Schrock@Sun.COM boolean_t is_disk;
37010817SEric.Schrock@Sun.COM vdev_aux_t aux;
37110817SEric.Schrock@Sun.COM topo_hdl_t *thp;
37210817SEric.Schrock@Sun.COM int err;
3732082Seschrock
3742082Seschrock /*
3754451Seschrock * If this is a resource notifying us of device removal, then simply
3764451Seschrock * check for an available spare and continue.
3774451Seschrock */
3784451Seschrock if (strcmp(class, "resource.fs.zfs.removed") == 0) {
3794451Seschrock if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
3804451Seschrock &pool_guid) != 0 ||
3814451Seschrock nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
3824451Seschrock &vdev_guid) != 0)
3834451Seschrock return;
3844451Seschrock
3854451Seschrock if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
3864451Seschrock &vdev)) == NULL)
3874451Seschrock return;
3884451Seschrock
3894451Seschrock if (fmd_prop_get_int32(hdl, "spare_on_remove"))
39010817SEric.Schrock@Sun.COM replace_with_spare(hdl, zhp, vdev);
3914451Seschrock zpool_close(zhp);
3924451Seschrock return;
3934451Seschrock }
3944451Seschrock
3959120SStephen.Hanson@Sun.COM if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
3969120SStephen.Hanson@Sun.COM return;
3979120SStephen.Hanson@Sun.COM
39810817SEric.Schrock@Sun.COM if (strcmp(class, "resource.fs.zfs.statechange") == 0 ||
39910817SEric.Schrock@Sun.COM strcmp(class,
40010817SEric.Schrock@Sun.COM "resource.sysevent.EC_zfs.ESC_ZFS_vdev_remove") == 0) {
40110817SEric.Schrock@Sun.COM zfs_vdev_repair(hdl, nvl);
40210817SEric.Schrock@Sun.COM return;
40310817SEric.Schrock@Sun.COM }
40410817SEric.Schrock@Sun.COM
40510817SEric.Schrock@Sun.COM zfs_retire_clear_data(hdl, zdp);
40610817SEric.Schrock@Sun.COM
4077275Sstephh if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
4084451Seschrock is_repair = B_TRUE;
4094451Seschrock else
4104451Seschrock is_repair = B_FALSE;
4114451Seschrock
4124451Seschrock /*
4134451Seschrock * We subscribe to zfs faults as well as all repair events.
4142082Seschrock */
4152082Seschrock if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
4162082Seschrock &faults, &nfaults) != 0)
4172082Seschrock return;
4182082Seschrock
4192082Seschrock for (f = 0; f < nfaults; f++) {
4204451Seschrock fault = faults[f];
4214451Seschrock
4224451Seschrock fault_device = B_FALSE;
4234451Seschrock degrade_device = B_FALSE;
42410817SEric.Schrock@Sun.COM is_disk = B_FALSE;
4254451Seschrock
4269120SStephen.Hanson@Sun.COM if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE,
4279120SStephen.Hanson@Sun.COM &retire) == 0 && retire == 0)
4289120SStephen.Hanson@Sun.COM continue;
4299120SStephen.Hanson@Sun.COM
4304451Seschrock /*
4314451Seschrock * While we subscribe to fault.fs.zfs.*, we only take action
4324451Seschrock * for faults targeting a specific vdev (open failure or SERD
43310817SEric.Schrock@Sun.COM * failure). We also subscribe to fault.io.* events, so that
43410817SEric.Schrock@Sun.COM * faulty disks will be faulted in the ZFS configuration.
4354451Seschrock */
43610817SEric.Schrock@Sun.COM if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) {
43710817SEric.Schrock@Sun.COM fault_device = B_TRUE;
43810817SEric.Schrock@Sun.COM } else if (fmd_nvl_class_match(hdl, fault,
43910817SEric.Schrock@Sun.COM "fault.fs.zfs.vdev.checksum")) {
44010817SEric.Schrock@Sun.COM degrade_device = B_TRUE;
44110817SEric.Schrock@Sun.COM } else if (fmd_nvl_class_match(hdl, fault,
44210817SEric.Schrock@Sun.COM "fault.fs.zfs.device")) {
44310817SEric.Schrock@Sun.COM fault_device = B_FALSE;
44410817SEric.Schrock@Sun.COM } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) {
44510817SEric.Schrock@Sun.COM is_disk = B_TRUE;
4464451Seschrock fault_device = B_TRUE;
44710817SEric.Schrock@Sun.COM } else {
4484451Seschrock continue;
44910817SEric.Schrock@Sun.COM }
45010817SEric.Schrock@Sun.COM
45110817SEric.Schrock@Sun.COM if (is_disk) {
45210817SEric.Schrock@Sun.COM /*
45310817SEric.Schrock@Sun.COM * This is a disk fault. Lookup the FRU, convert it to
45410817SEric.Schrock@Sun.COM * an FMRI string, and attempt to find a matching vdev.
45510817SEric.Schrock@Sun.COM */
45610817SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist(fault, FM_FAULT_FRU,
45710817SEric.Schrock@Sun.COM &fru) != 0 ||
45810817SEric.Schrock@Sun.COM nvlist_lookup_string(fru, FM_FMRI_SCHEME,
45910817SEric.Schrock@Sun.COM &scheme) != 0)
46010817SEric.Schrock@Sun.COM continue;
46110817SEric.Schrock@Sun.COM
46210817SEric.Schrock@Sun.COM if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0)
46310817SEric.Schrock@Sun.COM continue;
46410817SEric.Schrock@Sun.COM
46510817SEric.Schrock@Sun.COM thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
46610817SEric.Schrock@Sun.COM if (topo_fmri_nvl2str(thp, fru, &fmri, &err) != 0) {
46710817SEric.Schrock@Sun.COM fmd_hdl_topo_rele(hdl, thp);
46810817SEric.Schrock@Sun.COM continue;
46910817SEric.Schrock@Sun.COM }
47010817SEric.Schrock@Sun.COM
47110817SEric.Schrock@Sun.COM zhp = find_by_fru(zhdl, fmri, &vdev);
47210817SEric.Schrock@Sun.COM topo_hdl_strfree(thp, fmri);
47310817SEric.Schrock@Sun.COM fmd_hdl_topo_rele(hdl, thp);
47410817SEric.Schrock@Sun.COM
47510817SEric.Schrock@Sun.COM if (zhp == NULL)
47610817SEric.Schrock@Sun.COM continue;
4774451Seschrock
47810817SEric.Schrock@Sun.COM (void) nvlist_lookup_uint64(vdev,
47910817SEric.Schrock@Sun.COM ZPOOL_CONFIG_GUID, &vdev_guid);
48010817SEric.Schrock@Sun.COM aux = VDEV_AUX_EXTERNAL;
48110817SEric.Schrock@Sun.COM } else {
48210817SEric.Schrock@Sun.COM /*
48310817SEric.Schrock@Sun.COM * This is a ZFS fault. Lookup the resource, and
48410817SEric.Schrock@Sun.COM * attempt to find the matching vdev.
48510817SEric.Schrock@Sun.COM */
48610817SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE,
48710817SEric.Schrock@Sun.COM &resource) != 0 ||
48810817SEric.Schrock@Sun.COM nvlist_lookup_string(resource, FM_FMRI_SCHEME,
48910817SEric.Schrock@Sun.COM &scheme) != 0)
49010817SEric.Schrock@Sun.COM continue;
4914451Seschrock
49210817SEric.Schrock@Sun.COM if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0)
49310817SEric.Schrock@Sun.COM continue;
49410817SEric.Schrock@Sun.COM
49510817SEric.Schrock@Sun.COM if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL,
49610817SEric.Schrock@Sun.COM &pool_guid) != 0)
49710817SEric.Schrock@Sun.COM continue;
4984451Seschrock
49910817SEric.Schrock@Sun.COM if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV,
50010817SEric.Schrock@Sun.COM &vdev_guid) != 0) {
50110817SEric.Schrock@Sun.COM if (is_repair)
50210817SEric.Schrock@Sun.COM vdev_guid = 0;
50310817SEric.Schrock@Sun.COM else
50410817SEric.Schrock@Sun.COM continue;
50510817SEric.Schrock@Sun.COM }
50610817SEric.Schrock@Sun.COM
50710817SEric.Schrock@Sun.COM if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
50810817SEric.Schrock@Sun.COM &vdev)) == NULL)
50910817SEric.Schrock@Sun.COM continue;
51010817SEric.Schrock@Sun.COM
51110817SEric.Schrock@Sun.COM aux = VDEV_AUX_ERR_EXCEEDED;
51210817SEric.Schrock@Sun.COM }
51310817SEric.Schrock@Sun.COM
51410817SEric.Schrock@Sun.COM if (vdev_guid == 0) {
51510817SEric.Schrock@Sun.COM /*
51610817SEric.Schrock@Sun.COM * For pool-level repair events, clear the entire pool.
51710817SEric.Schrock@Sun.COM */
51810921STim.Haley@Sun.COM (void) zpool_clear(zhp, NULL, NULL);
51910817SEric.Schrock@Sun.COM zpool_close(zhp);
5202082Seschrock continue;
52110817SEric.Schrock@Sun.COM }
5222082Seschrock
5234451Seschrock /*
5244451Seschrock * If this is a repair event, then mark the vdev as repaired and
5254451Seschrock * continue.
5264451Seschrock */
5274451Seschrock if (is_repair) {
5287275Sstephh repair_done = 1;
5294451Seschrock (void) zpool_vdev_clear(zhp, vdev_guid);
5302082Seschrock zpool_close(zhp);
5312082Seschrock continue;
5322082Seschrock }
5332082Seschrock
5342082Seschrock /*
5354451Seschrock * Actively fault the device if needed.
5362082Seschrock */
5374451Seschrock if (fault_device)
53810817SEric.Schrock@Sun.COM (void) zpool_vdev_fault(zhp, vdev_guid, aux);
5394451Seschrock if (degrade_device)
54010817SEric.Schrock@Sun.COM (void) zpool_vdev_degrade(zhp, vdev_guid, aux);
5412082Seschrock
5422082Seschrock /*
5434451Seschrock * Attempt to substitute a hot spare.
5442082Seschrock */
54510817SEric.Schrock@Sun.COM replace_with_spare(hdl, zhp, vdev);
5462082Seschrock zpool_close(zhp);
5472082Seschrock }
5487275Sstephh
5497275Sstephh if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done &&
5507275Sstephh nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
5517275Sstephh fmd_case_uuresolved(hdl, uuid);
5522082Seschrock }
5532082Seschrock
5542082Seschrock static const fmd_hdl_ops_t fmd_ops = {
5552082Seschrock zfs_retire_recv, /* fmdo_recv */
5562082Seschrock NULL, /* fmdo_timeout */
5572082Seschrock NULL, /* fmdo_close */
5582082Seschrock NULL, /* fmdo_stats */
5592082Seschrock NULL, /* fmdo_gc */
5602082Seschrock };
5612082Seschrock
5622082Seschrock static const fmd_prop_t fmd_props[] = {
5634451Seschrock { "spare_on_remove", FMD_TYPE_BOOL, "true" },
5642082Seschrock { NULL, 0, NULL }
5652082Seschrock };
5662082Seschrock
5672082Seschrock static const fmd_hdl_info_t fmd_info = {
5682082Seschrock "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
5692082Seschrock };
5702082Seschrock
5712082Seschrock void
_fmd_init(fmd_hdl_t * hdl)5722082Seschrock _fmd_init(fmd_hdl_t *hdl)
5732082Seschrock {
57410817SEric.Schrock@Sun.COM zfs_retire_data_t *zdp;
5752082Seschrock libzfs_handle_t *zhdl;
5762082Seschrock
5772082Seschrock if ((zhdl = libzfs_init()) == NULL)
5782082Seschrock return;
5792082Seschrock
5802082Seschrock if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
5812082Seschrock libzfs_fini(zhdl);
5822082Seschrock return;
5832082Seschrock }
5842082Seschrock
58510817SEric.Schrock@Sun.COM zdp = fmd_hdl_zalloc(hdl, sizeof (zfs_retire_data_t), FMD_SLEEP);
58610817SEric.Schrock@Sun.COM zdp->zrd_hdl = zhdl;
58710817SEric.Schrock@Sun.COM
58810817SEric.Schrock@Sun.COM fmd_hdl_setspecific(hdl, zdp);
5892082Seschrock }
5902082Seschrock
5912082Seschrock void
_fmd_fini(fmd_hdl_t * hdl)5922082Seschrock _fmd_fini(fmd_hdl_t *hdl)
5932082Seschrock {
59410817SEric.Schrock@Sun.COM zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
5952082Seschrock
59610817SEric.Schrock@Sun.COM if (zdp != NULL) {
59710817SEric.Schrock@Sun.COM zfs_retire_clear_data(hdl, zdp);
59810817SEric.Schrock@Sun.COM libzfs_fini(zdp->zrd_hdl);
59910817SEric.Schrock@Sun.COM fmd_hdl_free(hdl, zdp, sizeof (zfs_retire_data_t));
60010817SEric.Schrock@Sun.COM }
6012082Seschrock }
602