1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy * 24eda14cbcSMatt Macy * Copyright (c) 2016, Intel Corporation. 25eda14cbcSMatt Macy * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 26eda14cbcSMatt Macy */ 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy /* 29eda14cbcSMatt Macy * The ZFS retire agent is responsible for managing hot spares across all pools. 30eda14cbcSMatt Macy * When we see a device fault or a device removal, we try to open the associated 31eda14cbcSMatt Macy * pool and look for any hot spares. We iterate over any available hot spares 32eda14cbcSMatt Macy * and attempt a 'zpool replace' for each one. 33eda14cbcSMatt Macy * 34eda14cbcSMatt Macy * For vdevs diagnosed as faulty, the agent is also responsible for proactively 35eda14cbcSMatt Macy * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 36eda14cbcSMatt Macy */ 37eda14cbcSMatt Macy 38eda14cbcSMatt Macy #include <sys/fs/zfs.h> 39eda14cbcSMatt Macy #include <sys/fm/protocol.h> 40eda14cbcSMatt Macy #include <sys/fm/fs/zfs.h> 413ff01b23SMartin Matuska #include <libzutil.h> 42eda14cbcSMatt Macy #include <libzfs.h> 43eda14cbcSMatt Macy #include <string.h> 4453b70c86SMartin Matuska #include <libgen.h> 45eda14cbcSMatt Macy 46eda14cbcSMatt Macy #include "zfs_agents.h" 47eda14cbcSMatt Macy #include "fmd_api.h" 48eda14cbcSMatt Macy 49eda14cbcSMatt Macy 50eda14cbcSMatt Macy typedef struct zfs_retire_repaired { 51eda14cbcSMatt Macy struct zfs_retire_repaired *zrr_next; 52eda14cbcSMatt Macy uint64_t zrr_pool; 53eda14cbcSMatt Macy uint64_t zrr_vdev; 54eda14cbcSMatt Macy } zfs_retire_repaired_t; 55eda14cbcSMatt Macy 56eda14cbcSMatt Macy typedef struct zfs_retire_data { 57eda14cbcSMatt Macy libzfs_handle_t *zrd_hdl; 58eda14cbcSMatt Macy zfs_retire_repaired_t *zrd_repaired; 59eda14cbcSMatt Macy } zfs_retire_data_t; 60eda14cbcSMatt Macy 61eda14cbcSMatt Macy static void 62eda14cbcSMatt Macy zfs_retire_clear_data(fmd_hdl_t *hdl, zfs_retire_data_t *zdp) 63eda14cbcSMatt Macy { 64eda14cbcSMatt Macy zfs_retire_repaired_t *zrp; 65eda14cbcSMatt Macy 66eda14cbcSMatt Macy while ((zrp = zdp->zrd_repaired) != NULL) { 67eda14cbcSMatt Macy zdp->zrd_repaired = zrp->zrr_next; 68eda14cbcSMatt Macy fmd_hdl_free(hdl, zrp, sizeof (zfs_retire_repaired_t)); 69eda14cbcSMatt Macy } 70eda14cbcSMatt Macy } 71eda14cbcSMatt Macy 72eda14cbcSMatt Macy /* 73eda14cbcSMatt Macy * Find a pool with a matching GUID. 74eda14cbcSMatt Macy */ 75eda14cbcSMatt Macy typedef struct find_cbdata { 76eda14cbcSMatt Macy uint64_t cb_guid; 77eda14cbcSMatt Macy zpool_handle_t *cb_zhp; 78eda14cbcSMatt Macy nvlist_t *cb_vdev; 79*15f0b8c3SMartin Matuska uint64_t cb_vdev_guid; 80*15f0b8c3SMartin Matuska uint64_t cb_num_spares; 81eda14cbcSMatt Macy } find_cbdata_t; 82eda14cbcSMatt Macy 83eda14cbcSMatt Macy static int 84eda14cbcSMatt Macy find_pool(zpool_handle_t *zhp, void *data) 85eda14cbcSMatt Macy { 86eda14cbcSMatt Macy find_cbdata_t *cbp = data; 87eda14cbcSMatt Macy 88eda14cbcSMatt Macy if (cbp->cb_guid == 89eda14cbcSMatt Macy zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 90eda14cbcSMatt Macy cbp->cb_zhp = zhp; 91eda14cbcSMatt Macy return (1); 92eda14cbcSMatt Macy } 93eda14cbcSMatt Macy 94eda14cbcSMatt Macy zpool_close(zhp); 95eda14cbcSMatt Macy return (0); 96eda14cbcSMatt Macy } 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy /* 99eda14cbcSMatt Macy * Find a vdev within a tree with a matching GUID. 100eda14cbcSMatt Macy */ 101eda14cbcSMatt Macy static nvlist_t * 102eda14cbcSMatt Macy find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) 103eda14cbcSMatt Macy { 104eda14cbcSMatt Macy uint64_t guid; 105eda14cbcSMatt Macy nvlist_t **child; 106eda14cbcSMatt Macy uint_t c, children; 107eda14cbcSMatt Macy nvlist_t *ret; 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 110eda14cbcSMatt Macy guid == search_guid) { 111eda14cbcSMatt Macy fmd_hdl_debug(fmd_module_hdl("zfs-retire"), 112eda14cbcSMatt Macy "matched vdev %llu", guid); 113eda14cbcSMatt Macy return (nv); 114eda14cbcSMatt Macy } 115eda14cbcSMatt Macy 116eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 117eda14cbcSMatt Macy &child, &children) != 0) 118eda14cbcSMatt Macy return (NULL); 119eda14cbcSMatt Macy 120eda14cbcSMatt Macy for (c = 0; c < children; c++) { 121eda14cbcSMatt Macy if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) 122eda14cbcSMatt Macy return (ret); 123eda14cbcSMatt Macy } 124eda14cbcSMatt Macy 125eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 126eda14cbcSMatt Macy &child, &children) != 0) 127eda14cbcSMatt Macy return (NULL); 128eda14cbcSMatt Macy 129eda14cbcSMatt Macy for (c = 0; c < children; c++) { 130eda14cbcSMatt Macy if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) 131eda14cbcSMatt Macy return (ret); 132eda14cbcSMatt Macy } 133eda14cbcSMatt Macy 134eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 135eda14cbcSMatt Macy &child, &children) != 0) 136eda14cbcSMatt Macy return (NULL); 137eda14cbcSMatt Macy 138eda14cbcSMatt Macy for (c = 0; c < children; c++) { 139eda14cbcSMatt Macy if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) 140eda14cbcSMatt Macy return (ret); 141eda14cbcSMatt Macy } 142eda14cbcSMatt Macy 143eda14cbcSMatt Macy return (NULL); 144eda14cbcSMatt Macy } 145eda14cbcSMatt Macy 146*15f0b8c3SMartin Matuska static int 147*15f0b8c3SMartin Matuska remove_spares(zpool_handle_t *zhp, void *data) 148*15f0b8c3SMartin Matuska { 149*15f0b8c3SMartin Matuska nvlist_t *config, *nvroot; 150*15f0b8c3SMartin Matuska nvlist_t **spares; 151*15f0b8c3SMartin Matuska uint_t nspares; 152*15f0b8c3SMartin Matuska char *devname; 153*15f0b8c3SMartin Matuska find_cbdata_t *cbp = data; 154*15f0b8c3SMartin Matuska uint64_t spareguid = 0; 155*15f0b8c3SMartin Matuska vdev_stat_t *vs; 156*15f0b8c3SMartin Matuska unsigned int c; 157*15f0b8c3SMartin Matuska 158*15f0b8c3SMartin Matuska config = zpool_get_config(zhp, NULL); 159*15f0b8c3SMartin Matuska if (nvlist_lookup_nvlist(config, 160*15f0b8c3SMartin Matuska ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 161*15f0b8c3SMartin Matuska zpool_close(zhp); 162*15f0b8c3SMartin Matuska return (0); 163*15f0b8c3SMartin Matuska } 164*15f0b8c3SMartin Matuska 165*15f0b8c3SMartin Matuska if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 166*15f0b8c3SMartin Matuska &spares, &nspares) != 0) { 167*15f0b8c3SMartin Matuska zpool_close(zhp); 168*15f0b8c3SMartin Matuska return (0); 169*15f0b8c3SMartin Matuska } 170*15f0b8c3SMartin Matuska 171*15f0b8c3SMartin Matuska for (int i = 0; i < nspares; i++) { 172*15f0b8c3SMartin Matuska if (nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, 173*15f0b8c3SMartin Matuska &spareguid) == 0 && spareguid == cbp->cb_vdev_guid) { 174*15f0b8c3SMartin Matuska devname = zpool_vdev_name(NULL, zhp, spares[i], 175*15f0b8c3SMartin Matuska B_FALSE); 176*15f0b8c3SMartin Matuska nvlist_lookup_uint64_array(spares[i], 177*15f0b8c3SMartin Matuska ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c); 178*15f0b8c3SMartin Matuska if (vs->vs_state != VDEV_STATE_REMOVED && 179*15f0b8c3SMartin Matuska zpool_vdev_remove_wanted(zhp, devname) == 0) 180*15f0b8c3SMartin Matuska cbp->cb_num_spares++; 181*15f0b8c3SMartin Matuska break; 182*15f0b8c3SMartin Matuska } 183*15f0b8c3SMartin Matuska } 184*15f0b8c3SMartin Matuska 185*15f0b8c3SMartin Matuska zpool_close(zhp); 186*15f0b8c3SMartin Matuska return (0); 187*15f0b8c3SMartin Matuska } 188*15f0b8c3SMartin Matuska 189*15f0b8c3SMartin Matuska /* 190*15f0b8c3SMartin Matuska * Given a vdev guid, find and remove all spares associated with it. 191*15f0b8c3SMartin Matuska */ 192*15f0b8c3SMartin Matuska static int 193*15f0b8c3SMartin Matuska find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid) 194*15f0b8c3SMartin Matuska { 195*15f0b8c3SMartin Matuska find_cbdata_t cb; 196*15f0b8c3SMartin Matuska 197*15f0b8c3SMartin Matuska cb.cb_num_spares = 0; 198*15f0b8c3SMartin Matuska cb.cb_vdev_guid = vdev_guid; 199*15f0b8c3SMartin Matuska zpool_iter(zhdl, remove_spares, &cb); 200*15f0b8c3SMartin Matuska 201*15f0b8c3SMartin Matuska return (cb.cb_num_spares); 202*15f0b8c3SMartin Matuska } 203*15f0b8c3SMartin Matuska 204eda14cbcSMatt Macy /* 205eda14cbcSMatt Macy * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 206eda14cbcSMatt Macy */ 207eda14cbcSMatt Macy static zpool_handle_t * 208eda14cbcSMatt Macy find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 209eda14cbcSMatt Macy nvlist_t **vdevp) 210eda14cbcSMatt Macy { 211eda14cbcSMatt Macy find_cbdata_t cb; 212eda14cbcSMatt Macy zpool_handle_t *zhp; 213eda14cbcSMatt Macy nvlist_t *config, *nvroot; 214eda14cbcSMatt Macy 215eda14cbcSMatt Macy /* 216eda14cbcSMatt Macy * Find the corresponding pool and make sure the vdev still exists. 217eda14cbcSMatt Macy */ 218eda14cbcSMatt Macy cb.cb_guid = pool_guid; 219eda14cbcSMatt Macy if (zpool_iter(zhdl, find_pool, &cb) != 1) 220eda14cbcSMatt Macy return (NULL); 221eda14cbcSMatt Macy 222eda14cbcSMatt Macy zhp = cb.cb_zhp; 223eda14cbcSMatt Macy config = zpool_get_config(zhp, NULL); 224eda14cbcSMatt Macy if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 225eda14cbcSMatt Macy &nvroot) != 0) { 226eda14cbcSMatt Macy zpool_close(zhp); 227eda14cbcSMatt Macy return (NULL); 228eda14cbcSMatt Macy } 229eda14cbcSMatt Macy 230eda14cbcSMatt Macy if (vdev_guid != 0) { 231eda14cbcSMatt Macy if ((*vdevp = find_vdev(zhdl, nvroot, vdev_guid)) == NULL) { 232eda14cbcSMatt Macy zpool_close(zhp); 233eda14cbcSMatt Macy return (NULL); 234eda14cbcSMatt Macy } 235eda14cbcSMatt Macy } 236eda14cbcSMatt Macy 237eda14cbcSMatt Macy return (zhp); 238eda14cbcSMatt Macy } 239eda14cbcSMatt Macy 240eda14cbcSMatt Macy /* 241eda14cbcSMatt Macy * Given a vdev, attempt to replace it with every known spare until one 242eda14cbcSMatt Macy * succeeds or we run out of devices to try. 243eda14cbcSMatt Macy * Return whether we were successful or not in replacing the device. 244eda14cbcSMatt Macy */ 245eda14cbcSMatt Macy static boolean_t 246eda14cbcSMatt Macy replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) 247eda14cbcSMatt Macy { 248eda14cbcSMatt Macy nvlist_t *config, *nvroot, *replacement; 249eda14cbcSMatt Macy nvlist_t **spares; 250eda14cbcSMatt Macy uint_t s, nspares; 251eda14cbcSMatt Macy char *dev_name; 252eda14cbcSMatt Macy zprop_source_t source; 253eda14cbcSMatt Macy int ashift; 254eda14cbcSMatt Macy 255eda14cbcSMatt Macy config = zpool_get_config(zhp, NULL); 256eda14cbcSMatt Macy if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 257eda14cbcSMatt Macy &nvroot) != 0) 258eda14cbcSMatt Macy return (B_FALSE); 259eda14cbcSMatt Macy 260eda14cbcSMatt Macy /* 261eda14cbcSMatt Macy * Find out if there are any hot spares available in the pool. 262eda14cbcSMatt Macy */ 263eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 264eda14cbcSMatt Macy &spares, &nspares) != 0) 265eda14cbcSMatt Macy return (B_FALSE); 266eda14cbcSMatt Macy 267eda14cbcSMatt Macy /* 268eda14cbcSMatt Macy * lookup "ashift" pool property, we may need it for the replacement 269eda14cbcSMatt Macy */ 270eda14cbcSMatt Macy ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source); 271eda14cbcSMatt Macy 272eda14cbcSMatt Macy replacement = fmd_nvl_alloc(hdl, FMD_SLEEP); 273eda14cbcSMatt Macy 274eda14cbcSMatt Macy (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 275eda14cbcSMatt Macy VDEV_TYPE_ROOT); 276eda14cbcSMatt Macy 277eda14cbcSMatt Macy dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); 278eda14cbcSMatt Macy 279eda14cbcSMatt Macy /* 280eda14cbcSMatt Macy * Try to replace each spare, ending when we successfully 281eda14cbcSMatt Macy * replace it. 282eda14cbcSMatt Macy */ 283eda14cbcSMatt Macy for (s = 0; s < nspares; s++) { 2847877fdebSMatt Macy boolean_t rebuild = B_FALSE; 2857877fdebSMatt Macy char *spare_name, *type; 286eda14cbcSMatt Macy 287eda14cbcSMatt Macy if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 288eda14cbcSMatt Macy &spare_name) != 0) 289eda14cbcSMatt Macy continue; 290eda14cbcSMatt Macy 2917877fdebSMatt Macy /* prefer sequential resilvering for distributed spares */ 2927877fdebSMatt Macy if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE, 2937877fdebSMatt Macy &type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0) 2947877fdebSMatt Macy rebuild = B_TRUE; 2957877fdebSMatt Macy 296eda14cbcSMatt Macy /* if set, add the "ashift" pool property to the spare nvlist */ 297eda14cbcSMatt Macy if (source != ZPROP_SRC_DEFAULT) 298eda14cbcSMatt Macy (void) nvlist_add_uint64(spares[s], 299eda14cbcSMatt Macy ZPOOL_CONFIG_ASHIFT, ashift); 300eda14cbcSMatt Macy 301eda14cbcSMatt Macy (void) nvlist_add_nvlist_array(replacement, 302681ce946SMartin Matuska ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spares[s], 1); 303eda14cbcSMatt Macy 304eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_vdev_replace '%s' with spare '%s'", 3053ff01b23SMartin Matuska dev_name, zfs_basename(spare_name)); 306eda14cbcSMatt Macy 307eda14cbcSMatt Macy if (zpool_vdev_attach(zhp, dev_name, spare_name, 3087877fdebSMatt Macy replacement, B_TRUE, rebuild) == 0) { 309eda14cbcSMatt Macy free(dev_name); 310eda14cbcSMatt Macy nvlist_free(replacement); 311eda14cbcSMatt Macy return (B_TRUE); 312eda14cbcSMatt Macy } 313eda14cbcSMatt Macy } 314eda14cbcSMatt Macy 315eda14cbcSMatt Macy free(dev_name); 316eda14cbcSMatt Macy nvlist_free(replacement); 317eda14cbcSMatt Macy 318eda14cbcSMatt Macy return (B_FALSE); 319eda14cbcSMatt Macy } 320eda14cbcSMatt Macy 321eda14cbcSMatt Macy /* 322eda14cbcSMatt Macy * Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and 323eda14cbcSMatt Macy * ASRU is now usable. ZFS has found the device to be present and 324eda14cbcSMatt Macy * functioning. 325eda14cbcSMatt Macy */ 326eda14cbcSMatt Macy static void 327eda14cbcSMatt Macy zfs_vdev_repair(fmd_hdl_t *hdl, nvlist_t *nvl) 328eda14cbcSMatt Macy { 329eda14cbcSMatt Macy zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 330eda14cbcSMatt Macy zfs_retire_repaired_t *zrp; 331eda14cbcSMatt Macy uint64_t pool_guid, vdev_guid; 332eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 333eda14cbcSMatt Macy &pool_guid) != 0 || nvlist_lookup_uint64(nvl, 334eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) 335eda14cbcSMatt Macy return; 336eda14cbcSMatt Macy 337eda14cbcSMatt Macy /* 338eda14cbcSMatt Macy * Before checking the state of the ASRU, go through and see if we've 339eda14cbcSMatt Macy * already made an attempt to repair this ASRU. This list is cleared 340eda14cbcSMatt Macy * whenever we receive any kind of list event, and is designed to 341eda14cbcSMatt Macy * prevent us from generating a feedback loop when we attempt repairs 342eda14cbcSMatt Macy * against a faulted pool. The problem is that checking the unusable 343eda14cbcSMatt Macy * state of the ASRU can involve opening the pool, which can post 344eda14cbcSMatt Macy * statechange events but otherwise leave the pool in the faulted 345eda14cbcSMatt Macy * state. This list allows us to detect when a statechange event is 346eda14cbcSMatt Macy * due to our own request. 347eda14cbcSMatt Macy */ 348eda14cbcSMatt Macy for (zrp = zdp->zrd_repaired; zrp != NULL; zrp = zrp->zrr_next) { 349eda14cbcSMatt Macy if (zrp->zrr_pool == pool_guid && 350eda14cbcSMatt Macy zrp->zrr_vdev == vdev_guid) 351eda14cbcSMatt Macy return; 352eda14cbcSMatt Macy } 353eda14cbcSMatt Macy 354eda14cbcSMatt Macy zrp = fmd_hdl_alloc(hdl, sizeof (zfs_retire_repaired_t), FMD_SLEEP); 355eda14cbcSMatt Macy zrp->zrr_next = zdp->zrd_repaired; 356eda14cbcSMatt Macy zrp->zrr_pool = pool_guid; 357eda14cbcSMatt Macy zrp->zrr_vdev = vdev_guid; 358eda14cbcSMatt Macy zdp->zrd_repaired = zrp; 359eda14cbcSMatt Macy 360eda14cbcSMatt Macy fmd_hdl_debug(hdl, "marking repaired vdev %llu on pool %llu", 361eda14cbcSMatt Macy vdev_guid, pool_guid); 362eda14cbcSMatt Macy } 363eda14cbcSMatt Macy 364eda14cbcSMatt Macy static void 365eda14cbcSMatt Macy zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 366eda14cbcSMatt Macy const char *class) 367eda14cbcSMatt Macy { 368e92ffd9bSMartin Matuska (void) ep; 369eda14cbcSMatt Macy uint64_t pool_guid, vdev_guid; 370eda14cbcSMatt Macy zpool_handle_t *zhp; 371eda14cbcSMatt Macy nvlist_t *resource, *fault; 372eda14cbcSMatt Macy nvlist_t **faults; 373eda14cbcSMatt Macy uint_t f, nfaults; 374eda14cbcSMatt Macy zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 375eda14cbcSMatt Macy libzfs_handle_t *zhdl = zdp->zrd_hdl; 376eda14cbcSMatt Macy boolean_t fault_device, degrade_device; 377eda14cbcSMatt Macy boolean_t is_repair; 378*15f0b8c3SMartin Matuska boolean_t l2arc = B_FALSE; 379*15f0b8c3SMartin Matuska boolean_t spare = B_FALSE; 380eda14cbcSMatt Macy char *scheme; 381eda14cbcSMatt Macy nvlist_t *vdev = NULL; 382eda14cbcSMatt Macy char *uuid; 383eda14cbcSMatt Macy int repair_done = 0; 384eda14cbcSMatt Macy boolean_t retire; 385eda14cbcSMatt Macy boolean_t is_disk; 386eda14cbcSMatt Macy vdev_aux_t aux; 387eda14cbcSMatt Macy uint64_t state = 0; 388be181ee2SMartin Matuska vdev_stat_t *vs; 389be181ee2SMartin Matuska unsigned int c; 390eda14cbcSMatt Macy 391eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); 392eda14cbcSMatt Macy 393be181ee2SMartin Matuska (void) nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, 394be181ee2SMartin Matuska &state); 395eda14cbcSMatt Macy 396eda14cbcSMatt Macy /* 397eda14cbcSMatt Macy * If this is a resource notifying us of device removal then simply 398eda14cbcSMatt Macy * check for an available spare and continue unless the device is a 399eda14cbcSMatt Macy * l2arc vdev, in which case we just offline it. 400eda14cbcSMatt Macy */ 401eda14cbcSMatt Macy if (strcmp(class, "resource.fs.zfs.removed") == 0 || 402eda14cbcSMatt Macy (strcmp(class, "resource.fs.zfs.statechange") == 0 && 403184c1b94SMartin Matuska (state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) { 404eda14cbcSMatt Macy char *devtype; 405eda14cbcSMatt Macy char *devname; 406eda14cbcSMatt Macy 407*15f0b8c3SMartin Matuska if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 408*15f0b8c3SMartin Matuska &devtype) == 0) { 409*15f0b8c3SMartin Matuska if (strcmp(devtype, VDEV_TYPE_SPARE) == 0) 410*15f0b8c3SMartin Matuska spare = B_TRUE; 411*15f0b8c3SMartin Matuska else if (strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) 412*15f0b8c3SMartin Matuska l2arc = B_TRUE; 413*15f0b8c3SMartin Matuska } 414*15f0b8c3SMartin Matuska 415*15f0b8c3SMartin Matuska if (nvlist_lookup_uint64(nvl, 416*15f0b8c3SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) 417*15f0b8c3SMartin Matuska return; 418*15f0b8c3SMartin Matuska 419*15f0b8c3SMartin Matuska if (spare) { 420*15f0b8c3SMartin Matuska int nspares = find_and_remove_spares(zhdl, vdev_guid); 421*15f0b8c3SMartin Matuska fmd_hdl_debug(hdl, "%d spares removed", nspares); 422*15f0b8c3SMartin Matuska return; 423*15f0b8c3SMartin Matuska } 424*15f0b8c3SMartin Matuska 425eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 426*15f0b8c3SMartin Matuska &pool_guid) != 0) 427eda14cbcSMatt Macy return; 428eda14cbcSMatt Macy 429eda14cbcSMatt Macy if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 430eda14cbcSMatt Macy &vdev)) == NULL) 431eda14cbcSMatt Macy return; 432eda14cbcSMatt Macy 433eda14cbcSMatt Macy devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); 434eda14cbcSMatt Macy 435be181ee2SMartin Matuska nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, 436be181ee2SMartin Matuska (uint64_t **)&vs, &c); 437be181ee2SMartin Matuska 438be181ee2SMartin Matuska /* 439be181ee2SMartin Matuska * If state removed is requested for already removed vdev, 440be181ee2SMartin Matuska * its a loopback event from spa_async_remove(). Just 441be181ee2SMartin Matuska * ignore it. 442be181ee2SMartin Matuska */ 443be181ee2SMartin Matuska if (vs->vs_state == VDEV_STATE_REMOVED && 444be181ee2SMartin Matuska state == VDEV_STATE_REMOVED) 445be181ee2SMartin Matuska return; 446be181ee2SMartin Matuska 447be181ee2SMartin Matuska /* Remove the vdev since device is unplugged */ 448be181ee2SMartin Matuska if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { 449be181ee2SMartin Matuska int status = zpool_vdev_remove_wanted(zhp, devname); 450be181ee2SMartin Matuska fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" 451be181ee2SMartin Matuska ", ret:%d", devname, status); 452be181ee2SMartin Matuska } 453be181ee2SMartin Matuska 454be181ee2SMartin Matuska /* Replace the vdev with a spare if its not a l2arc */ 455be181ee2SMartin Matuska if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") || 456be181ee2SMartin Matuska replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { 457eda14cbcSMatt Macy /* Could not handle with spare */ 458eda14cbcSMatt Macy fmd_hdl_debug(hdl, "no spare for '%s'", devname); 459eda14cbcSMatt Macy } 460eda14cbcSMatt Macy 461eda14cbcSMatt Macy free(devname); 462eda14cbcSMatt Macy zpool_close(zhp); 463eda14cbcSMatt Macy return; 464eda14cbcSMatt Macy } 465eda14cbcSMatt Macy 466eda14cbcSMatt Macy if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) 467eda14cbcSMatt Macy return; 468eda14cbcSMatt Macy 469eda14cbcSMatt Macy /* 470180f8225SMatt Macy * Note: on Linux statechange events are more than just 471eda14cbcSMatt Macy * healthy ones so we need to confirm the actual state value. 472eda14cbcSMatt Macy */ 473eda14cbcSMatt Macy if (strcmp(class, "resource.fs.zfs.statechange") == 0 && 474eda14cbcSMatt Macy state == VDEV_STATE_HEALTHY) { 475eda14cbcSMatt Macy zfs_vdev_repair(hdl, nvl); 476eda14cbcSMatt Macy return; 477eda14cbcSMatt Macy } 478eda14cbcSMatt Macy if (strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { 479eda14cbcSMatt Macy zfs_vdev_repair(hdl, nvl); 480eda14cbcSMatt Macy return; 481eda14cbcSMatt Macy } 482eda14cbcSMatt Macy 483eda14cbcSMatt Macy zfs_retire_clear_data(hdl, zdp); 484eda14cbcSMatt Macy 485eda14cbcSMatt Macy if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 486eda14cbcSMatt Macy is_repair = B_TRUE; 487eda14cbcSMatt Macy else 488eda14cbcSMatt Macy is_repair = B_FALSE; 489eda14cbcSMatt Macy 490eda14cbcSMatt Macy /* 491eda14cbcSMatt Macy * We subscribe to zfs faults as well as all repair events. 492eda14cbcSMatt Macy */ 493eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 494eda14cbcSMatt Macy &faults, &nfaults) != 0) 495eda14cbcSMatt Macy return; 496eda14cbcSMatt Macy 497eda14cbcSMatt Macy for (f = 0; f < nfaults; f++) { 498eda14cbcSMatt Macy fault = faults[f]; 499eda14cbcSMatt Macy 500eda14cbcSMatt Macy fault_device = B_FALSE; 501eda14cbcSMatt Macy degrade_device = B_FALSE; 502eda14cbcSMatt Macy is_disk = B_FALSE; 503eda14cbcSMatt Macy 504eda14cbcSMatt Macy if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE, 505eda14cbcSMatt Macy &retire) == 0 && retire == 0) 506eda14cbcSMatt Macy continue; 507eda14cbcSMatt Macy 508eda14cbcSMatt Macy /* 509eda14cbcSMatt Macy * While we subscribe to fault.fs.zfs.*, we only take action 510eda14cbcSMatt Macy * for faults targeting a specific vdev (open failure or SERD 511eda14cbcSMatt Macy * failure). We also subscribe to fault.io.* events, so that 512eda14cbcSMatt Macy * faulty disks will be faulted in the ZFS configuration. 513eda14cbcSMatt Macy */ 514eda14cbcSMatt Macy if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) { 515eda14cbcSMatt Macy fault_device = B_TRUE; 516eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, fault, 517eda14cbcSMatt Macy "fault.fs.zfs.vdev.checksum")) { 518eda14cbcSMatt Macy degrade_device = B_TRUE; 519eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, fault, 520eda14cbcSMatt Macy "fault.fs.zfs.device")) { 521eda14cbcSMatt Macy fault_device = B_FALSE; 522eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) { 523eda14cbcSMatt Macy is_disk = B_TRUE; 524eda14cbcSMatt Macy fault_device = B_TRUE; 525eda14cbcSMatt Macy } else { 526eda14cbcSMatt Macy continue; 527eda14cbcSMatt Macy } 528eda14cbcSMatt Macy 529eda14cbcSMatt Macy if (is_disk) { 530eda14cbcSMatt Macy continue; 531eda14cbcSMatt Macy } else { 532eda14cbcSMatt Macy /* 533eda14cbcSMatt Macy * This is a ZFS fault. Lookup the resource, and 534eda14cbcSMatt Macy * attempt to find the matching vdev. 535eda14cbcSMatt Macy */ 536eda14cbcSMatt Macy if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 537eda14cbcSMatt Macy &resource) != 0 || 538eda14cbcSMatt Macy nvlist_lookup_string(resource, FM_FMRI_SCHEME, 539eda14cbcSMatt Macy &scheme) != 0) 540eda14cbcSMatt Macy continue; 541eda14cbcSMatt Macy 542eda14cbcSMatt Macy if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 543eda14cbcSMatt Macy continue; 544eda14cbcSMatt Macy 545eda14cbcSMatt Macy if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 546eda14cbcSMatt Macy &pool_guid) != 0) 547eda14cbcSMatt Macy continue; 548eda14cbcSMatt Macy 549eda14cbcSMatt Macy if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 550eda14cbcSMatt Macy &vdev_guid) != 0) { 551eda14cbcSMatt Macy if (is_repair) 552eda14cbcSMatt Macy vdev_guid = 0; 553eda14cbcSMatt Macy else 554eda14cbcSMatt Macy continue; 555eda14cbcSMatt Macy } 556eda14cbcSMatt Macy 557eda14cbcSMatt Macy if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 558eda14cbcSMatt Macy &vdev)) == NULL) 559eda14cbcSMatt Macy continue; 560eda14cbcSMatt Macy 561eda14cbcSMatt Macy aux = VDEV_AUX_ERR_EXCEEDED; 562eda14cbcSMatt Macy } 563eda14cbcSMatt Macy 564eda14cbcSMatt Macy if (vdev_guid == 0) { 565eda14cbcSMatt Macy /* 566eda14cbcSMatt Macy * For pool-level repair events, clear the entire pool. 567eda14cbcSMatt Macy */ 568eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_clear of pool '%s'", 569eda14cbcSMatt Macy zpool_get_name(zhp)); 570eda14cbcSMatt Macy (void) zpool_clear(zhp, NULL, NULL); 571eda14cbcSMatt Macy zpool_close(zhp); 572eda14cbcSMatt Macy continue; 573eda14cbcSMatt Macy } 574eda14cbcSMatt Macy 575eda14cbcSMatt Macy /* 576eda14cbcSMatt Macy * If this is a repair event, then mark the vdev as repaired and 577eda14cbcSMatt Macy * continue. 578eda14cbcSMatt Macy */ 579eda14cbcSMatt Macy if (is_repair) { 580eda14cbcSMatt Macy repair_done = 1; 581eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_clear of pool '%s' vdev %llu", 582eda14cbcSMatt Macy zpool_get_name(zhp), vdev_guid); 583eda14cbcSMatt Macy (void) zpool_vdev_clear(zhp, vdev_guid); 584eda14cbcSMatt Macy zpool_close(zhp); 585eda14cbcSMatt Macy continue; 586eda14cbcSMatt Macy } 587eda14cbcSMatt Macy 588eda14cbcSMatt Macy /* 589eda14cbcSMatt Macy * Actively fault the device if needed. 590eda14cbcSMatt Macy */ 591eda14cbcSMatt Macy if (fault_device) 592eda14cbcSMatt Macy (void) zpool_vdev_fault(zhp, vdev_guid, aux); 593eda14cbcSMatt Macy if (degrade_device) 594eda14cbcSMatt Macy (void) zpool_vdev_degrade(zhp, vdev_guid, aux); 595eda14cbcSMatt Macy 596eda14cbcSMatt Macy if (fault_device || degrade_device) 597eda14cbcSMatt Macy fmd_hdl_debug(hdl, "zpool_vdev_%s: vdev %llu on '%s'", 598eda14cbcSMatt Macy fault_device ? "fault" : "degrade", vdev_guid, 599eda14cbcSMatt Macy zpool_get_name(zhp)); 600eda14cbcSMatt Macy 601eda14cbcSMatt Macy /* 602eda14cbcSMatt Macy * Attempt to substitute a hot spare. 603eda14cbcSMatt Macy */ 604eda14cbcSMatt Macy (void) replace_with_spare(hdl, zhp, vdev); 6057877fdebSMatt Macy 606eda14cbcSMatt Macy zpool_close(zhp); 607eda14cbcSMatt Macy } 608eda14cbcSMatt Macy 609eda14cbcSMatt Macy if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done && 610eda14cbcSMatt Macy nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 611eda14cbcSMatt Macy fmd_case_uuresolved(hdl, uuid); 612eda14cbcSMatt Macy } 613eda14cbcSMatt Macy 614eda14cbcSMatt Macy static const fmd_hdl_ops_t fmd_ops = { 615eda14cbcSMatt Macy zfs_retire_recv, /* fmdo_recv */ 616eda14cbcSMatt Macy NULL, /* fmdo_timeout */ 617eda14cbcSMatt Macy NULL, /* fmdo_close */ 618eda14cbcSMatt Macy NULL, /* fmdo_stats */ 619eda14cbcSMatt Macy NULL, /* fmdo_gc */ 620eda14cbcSMatt Macy }; 621eda14cbcSMatt Macy 622eda14cbcSMatt Macy static const fmd_prop_t fmd_props[] = { 623eda14cbcSMatt Macy { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 624eda14cbcSMatt Macy { NULL, 0, NULL } 625eda14cbcSMatt Macy }; 626eda14cbcSMatt Macy 627eda14cbcSMatt Macy static const fmd_hdl_info_t fmd_info = { 628eda14cbcSMatt Macy "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 629eda14cbcSMatt Macy }; 630eda14cbcSMatt Macy 631eda14cbcSMatt Macy void 632eda14cbcSMatt Macy _zfs_retire_init(fmd_hdl_t *hdl) 633eda14cbcSMatt Macy { 634eda14cbcSMatt Macy zfs_retire_data_t *zdp; 635eda14cbcSMatt Macy libzfs_handle_t *zhdl; 636eda14cbcSMatt Macy 637eda14cbcSMatt Macy if ((zhdl = libzfs_init()) == NULL) 638eda14cbcSMatt Macy return; 639eda14cbcSMatt Macy 640eda14cbcSMatt Macy if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 641eda14cbcSMatt Macy libzfs_fini(zhdl); 642eda14cbcSMatt Macy return; 643eda14cbcSMatt Macy } 644eda14cbcSMatt Macy 645eda14cbcSMatt Macy zdp = fmd_hdl_zalloc(hdl, sizeof (zfs_retire_data_t), FMD_SLEEP); 646eda14cbcSMatt Macy zdp->zrd_hdl = zhdl; 647eda14cbcSMatt Macy 648eda14cbcSMatt Macy fmd_hdl_setspecific(hdl, zdp); 649eda14cbcSMatt Macy } 650eda14cbcSMatt Macy 651eda14cbcSMatt Macy void 652eda14cbcSMatt Macy _zfs_retire_fini(fmd_hdl_t *hdl) 653eda14cbcSMatt Macy { 654eda14cbcSMatt Macy zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 655eda14cbcSMatt Macy 656eda14cbcSMatt Macy if (zdp != NULL) { 657eda14cbcSMatt Macy zfs_retire_clear_data(hdl, zdp); 658eda14cbcSMatt Macy libzfs_fini(zdp->zrd_hdl); 659eda14cbcSMatt Macy fmd_hdl_free(hdl, zdp, sizeof (zfs_retire_data_t)); 660eda14cbcSMatt Macy } 661eda14cbcSMatt Macy } 662