1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 25eda14cbcSMatt Macy * Copyright (c) 2016, Intel Corporation. 26e2257b31SMartin Matuska * Copyright (c) 2023, Klara Inc. 27eda14cbcSMatt Macy */ 28eda14cbcSMatt Macy 29eda14cbcSMatt Macy #include <stddef.h> 30eda14cbcSMatt Macy #include <string.h> 31eda14cbcSMatt Macy #include <libuutil.h> 32eda14cbcSMatt Macy #include <libzfs.h> 33eda14cbcSMatt Macy #include <sys/types.h> 34eda14cbcSMatt Macy #include <sys/time.h> 35eda14cbcSMatt Macy #include <sys/fs/zfs.h> 36eda14cbcSMatt Macy #include <sys/fm/protocol.h> 37eda14cbcSMatt Macy #include <sys/fm/fs/zfs.h> 38e3aa18adSMartin Matuska #include <sys/zio.h> 39eda14cbcSMatt Macy 40eda14cbcSMatt Macy #include "zfs_agents.h" 41eda14cbcSMatt Macy #include "fmd_api.h" 42eda14cbcSMatt Macy 43eda14cbcSMatt Macy /* 4415f0b8c3SMartin Matuska * Default values for the serd engine when processing checksum or io errors. The 4515f0b8c3SMartin Matuska * semantics are N <events> in T <seconds>. 4615f0b8c3SMartin Matuska */ 4715f0b8c3SMartin Matuska #define DEFAULT_CHECKSUM_N 10 /* events */ 4815f0b8c3SMartin Matuska #define DEFAULT_CHECKSUM_T 600 /* seconds */ 4915f0b8c3SMartin Matuska #define DEFAULT_IO_N 10 /* events */ 5015f0b8c3SMartin Matuska #define DEFAULT_IO_T 600 /* seconds */ 51e2257b31SMartin Matuska #define DEFAULT_SLOW_IO_N 10 /* events */ 52e2257b31SMartin Matuska #define DEFAULT_SLOW_IO_T 30 /* seconds */ 53e2257b31SMartin Matuska 54e2257b31SMartin Matuska #define CASE_GC_TIMEOUT_SECS 43200 /* 12 hours */ 5515f0b8c3SMartin Matuska 5615f0b8c3SMartin Matuska /* 57e2257b31SMartin Matuska * Our serd engines are named in the following format: 58e2257b31SMartin Matuska * 'zfs_<pool_guid>_<vdev_guid>_{checksum,io,slow_io}' 59e2257b31SMartin Matuska * This #define reserves enough space for two 64-bit hex values plus the 60e2257b31SMartin Matuska * length of the longest string. 61eda14cbcSMatt Macy */ 62eda14cbcSMatt Macy #define MAX_SERDLEN (16 * 2 + sizeof ("zfs___checksum")) 63eda14cbcSMatt Macy 64eda14cbcSMatt Macy /* 65eda14cbcSMatt Macy * On-disk case structure. This must maintain backwards compatibility with 66eda14cbcSMatt Macy * previous versions of the DE. By default, any members appended to the end 67eda14cbcSMatt Macy * will be filled with zeros if they don't exist in a previous version. 68eda14cbcSMatt Macy */ 69eda14cbcSMatt Macy typedef struct zfs_case_data { 70eda14cbcSMatt Macy uint64_t zc_version; 71eda14cbcSMatt Macy uint64_t zc_ena; 72eda14cbcSMatt Macy uint64_t zc_pool_guid; 73eda14cbcSMatt Macy uint64_t zc_vdev_guid; 74*7a7741afSMartin Matuska uint64_t zc_parent_guid; 75eda14cbcSMatt Macy int zc_pool_state; 76eda14cbcSMatt Macy char zc_serd_checksum[MAX_SERDLEN]; 77eda14cbcSMatt Macy char zc_serd_io[MAX_SERDLEN]; 78e2257b31SMartin Matuska char zc_serd_slow_io[MAX_SERDLEN]; 79eda14cbcSMatt Macy int zc_has_remove_timer; 80eda14cbcSMatt Macy } zfs_case_data_t; 81eda14cbcSMatt Macy 82eda14cbcSMatt Macy /* 83eda14cbcSMatt Macy * Time-of-day 84eda14cbcSMatt Macy */ 85eda14cbcSMatt Macy typedef struct er_timeval { 86eda14cbcSMatt Macy uint64_t ertv_sec; 87eda14cbcSMatt Macy uint64_t ertv_nsec; 88eda14cbcSMatt Macy } er_timeval_t; 89eda14cbcSMatt Macy 90eda14cbcSMatt Macy /* 91eda14cbcSMatt Macy * In-core case structure. 92eda14cbcSMatt Macy */ 93eda14cbcSMatt Macy typedef struct zfs_case { 94eda14cbcSMatt Macy boolean_t zc_present; 95eda14cbcSMatt Macy uint32_t zc_version; 96eda14cbcSMatt Macy zfs_case_data_t zc_data; 97eda14cbcSMatt Macy fmd_case_t *zc_case; 98eda14cbcSMatt Macy uu_list_node_t zc_node; 99eda14cbcSMatt Macy id_t zc_remove_timer; 100eda14cbcSMatt Macy char *zc_fru; 101eda14cbcSMatt Macy er_timeval_t zc_when; 102eda14cbcSMatt Macy } zfs_case_t; 103eda14cbcSMatt Macy 104eda14cbcSMatt Macy #define CASE_DATA "data" 105eda14cbcSMatt Macy #define CASE_FRU "fru" 106eda14cbcSMatt Macy #define CASE_DATA_VERSION_INITIAL 1 107eda14cbcSMatt Macy #define CASE_DATA_VERSION_SERD 2 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy typedef struct zfs_de_stats { 110eda14cbcSMatt Macy fmd_stat_t old_drops; 111eda14cbcSMatt Macy fmd_stat_t dev_drops; 112eda14cbcSMatt Macy fmd_stat_t vdev_drops; 113eda14cbcSMatt Macy fmd_stat_t import_drops; 114eda14cbcSMatt Macy fmd_stat_t resource_drops; 115eda14cbcSMatt Macy } zfs_de_stats_t; 116eda14cbcSMatt Macy 117eda14cbcSMatt Macy zfs_de_stats_t zfs_stats = { 118eda14cbcSMatt Macy { "old_drops", FMD_TYPE_UINT64, "ereports dropped (from before load)" }, 119eda14cbcSMatt Macy { "dev_drops", FMD_TYPE_UINT64, "ereports dropped (dev during open)"}, 120eda14cbcSMatt Macy { "vdev_drops", FMD_TYPE_UINT64, "ereports dropped (weird vdev types)"}, 121eda14cbcSMatt Macy { "import_drops", FMD_TYPE_UINT64, "ereports dropped (during import)" }, 122eda14cbcSMatt Macy { "resource_drops", FMD_TYPE_UINT64, "resource related ereports" } 123eda14cbcSMatt Macy }; 124eda14cbcSMatt Macy 125e2257b31SMartin Matuska /* wait 15 seconds after a removal */ 126e2257b31SMartin Matuska static hrtime_t zfs_remove_timeout = SEC2NSEC(15); 127eda14cbcSMatt Macy 128eda14cbcSMatt Macy uu_list_pool_t *zfs_case_pool; 129eda14cbcSMatt Macy uu_list_t *zfs_cases; 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy #define ZFS_MAKE_RSRC(type) \ 132eda14cbcSMatt Macy FM_RSRC_CLASS "." ZFS_ERROR_CLASS "." type 133eda14cbcSMatt Macy #define ZFS_MAKE_EREPORT(type) \ 134eda14cbcSMatt Macy FM_EREPORT_CLASS "." ZFS_ERROR_CLASS "." type 135eda14cbcSMatt Macy 136e2257b31SMartin Matuska static void zfs_purge_cases(fmd_hdl_t *hdl); 137e2257b31SMartin Matuska 138eda14cbcSMatt Macy /* 139eda14cbcSMatt Macy * Write out the persistent representation of an active case. 140eda14cbcSMatt Macy */ 141eda14cbcSMatt Macy static void 142e92ffd9bSMartin Matuska zfs_case_serialize(zfs_case_t *zcp) 143eda14cbcSMatt Macy { 144eda14cbcSMatt Macy zcp->zc_data.zc_version = CASE_DATA_VERSION_SERD; 145eda14cbcSMatt Macy } 146eda14cbcSMatt Macy 147eda14cbcSMatt Macy /* 148eda14cbcSMatt Macy * Read back the persistent representation of an active case. 149eda14cbcSMatt Macy */ 150eda14cbcSMatt Macy static zfs_case_t * 151eda14cbcSMatt Macy zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp) 152eda14cbcSMatt Macy { 153eda14cbcSMatt Macy zfs_case_t *zcp; 154eda14cbcSMatt Macy 155eda14cbcSMatt Macy zcp = fmd_hdl_zalloc(hdl, sizeof (zfs_case_t), FMD_SLEEP); 156eda14cbcSMatt Macy zcp->zc_case = cp; 157eda14cbcSMatt Macy 158eda14cbcSMatt Macy fmd_buf_read(hdl, cp, CASE_DATA, &zcp->zc_data, 159eda14cbcSMatt Macy sizeof (zcp->zc_data)); 160eda14cbcSMatt Macy 161eda14cbcSMatt Macy if (zcp->zc_data.zc_version > CASE_DATA_VERSION_SERD) { 162eda14cbcSMatt Macy fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t)); 163eda14cbcSMatt Macy return (NULL); 164eda14cbcSMatt Macy } 165eda14cbcSMatt Macy 166eda14cbcSMatt Macy /* 167eda14cbcSMatt Macy * fmd_buf_read() will have already zeroed out the remainder of the 168eda14cbcSMatt Macy * buffer, so we don't have to do anything special if the version 169eda14cbcSMatt Macy * doesn't include the SERD engine name. 170eda14cbcSMatt Macy */ 171eda14cbcSMatt Macy 172eda14cbcSMatt Macy if (zcp->zc_data.zc_has_remove_timer) 173eda14cbcSMatt Macy zcp->zc_remove_timer = fmd_timer_install(hdl, zcp, 174eda14cbcSMatt Macy NULL, zfs_remove_timeout); 175eda14cbcSMatt Macy 176eda14cbcSMatt Macy uu_list_node_init(zcp, &zcp->zc_node, zfs_case_pool); 177eda14cbcSMatt Macy (void) uu_list_insert_before(zfs_cases, NULL, zcp); 178eda14cbcSMatt Macy 179eda14cbcSMatt Macy fmd_case_setspecific(hdl, cp, zcp); 180eda14cbcSMatt Macy 181eda14cbcSMatt Macy return (zcp); 182eda14cbcSMatt Macy } 183eda14cbcSMatt Macy 184eda14cbcSMatt Macy /* 185*7a7741afSMartin Matuska * Return count of other unique SERD cases under same vdev parent 186e2257b31SMartin Matuska */ 187e2257b31SMartin Matuska static uint_t 188*7a7741afSMartin Matuska zfs_other_serd_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) 189e2257b31SMartin Matuska { 190e2257b31SMartin Matuska zfs_case_t *zcp; 191e2257b31SMartin Matuska uint_t cases = 0; 192e2257b31SMartin Matuska static hrtime_t next_check = 0; 193e2257b31SMartin Matuska 194e2257b31SMartin Matuska /* 195e2257b31SMartin Matuska * Note that plumbing in some external GC would require adding locking, 196e2257b31SMartin Matuska * since most of this module code is not thread safe and assumes there 197e2257b31SMartin Matuska * is only one thread running against the module. So we perform GC here 198e2257b31SMartin Matuska * inline periodically so that future delay induced faults will be 199e2257b31SMartin Matuska * possible once the issue causing multiple vdev delays is resolved. 200e2257b31SMartin Matuska */ 201e2257b31SMartin Matuska if (gethrestime_sec() > next_check) { 202e2257b31SMartin Matuska /* Periodically purge old SERD entries and stale cases */ 203e2257b31SMartin Matuska fmd_serd_gc(hdl); 204e2257b31SMartin Matuska zfs_purge_cases(hdl); 205e2257b31SMartin Matuska next_check = gethrestime_sec() + CASE_GC_TIMEOUT_SECS; 206e2257b31SMartin Matuska } 207e2257b31SMartin Matuska 208e2257b31SMartin Matuska for (zcp = uu_list_first(zfs_cases); zcp != NULL; 209e2257b31SMartin Matuska zcp = uu_list_next(zfs_cases, zcp)) { 210*7a7741afSMartin Matuska zfs_case_data_t *zcd = &zcp->zc_data; 211*7a7741afSMartin Matuska 212*7a7741afSMartin Matuska /* 213*7a7741afSMartin Matuska * must be same pool and parent vdev but different leaf vdev 214*7a7741afSMartin Matuska */ 215*7a7741afSMartin Matuska if (zcd->zc_pool_guid != zfs_case->zc_pool_guid || 216*7a7741afSMartin Matuska zcd->zc_parent_guid != zfs_case->zc_parent_guid || 217*7a7741afSMartin Matuska zcd->zc_vdev_guid == zfs_case->zc_vdev_guid) { 218*7a7741afSMartin Matuska continue; 219*7a7741afSMartin Matuska } 220*7a7741afSMartin Matuska 221*7a7741afSMartin Matuska /* 222*7a7741afSMartin Matuska * Check if there is another active serd case besides zfs_case 223*7a7741afSMartin Matuska * 224*7a7741afSMartin Matuska * Only one serd engine will be assigned to the case 225*7a7741afSMartin Matuska */ 226*7a7741afSMartin Matuska if (zcd->zc_serd_checksum[0] == zfs_case->zc_serd_checksum[0] && 227*7a7741afSMartin Matuska fmd_serd_active(hdl, zcd->zc_serd_checksum)) { 228*7a7741afSMartin Matuska cases++; 229*7a7741afSMartin Matuska } 230*7a7741afSMartin Matuska if (zcd->zc_serd_io[0] == zfs_case->zc_serd_io[0] && 231*7a7741afSMartin Matuska fmd_serd_active(hdl, zcd->zc_serd_io)) { 232*7a7741afSMartin Matuska cases++; 233*7a7741afSMartin Matuska } 234*7a7741afSMartin Matuska if (zcd->zc_serd_slow_io[0] == zfs_case->zc_serd_slow_io[0] && 235*7a7741afSMartin Matuska fmd_serd_active(hdl, zcd->zc_serd_slow_io)) { 236e2257b31SMartin Matuska cases++; 237e2257b31SMartin Matuska } 238e2257b31SMartin Matuska } 239e2257b31SMartin Matuska return (cases); 240e2257b31SMartin Matuska } 241e2257b31SMartin Matuska 242e2257b31SMartin Matuska /* 243eda14cbcSMatt Macy * Iterate over any active cases. If any cases are associated with a pool or 244eda14cbcSMatt Macy * vdev which is no longer present on the system, close the associated case. 245eda14cbcSMatt Macy */ 246eda14cbcSMatt Macy static void 247eda14cbcSMatt Macy zfs_mark_vdev(uint64_t pool_guid, nvlist_t *vd, er_timeval_t *loaded) 248eda14cbcSMatt Macy { 249eda14cbcSMatt Macy uint64_t vdev_guid = 0; 250eda14cbcSMatt Macy uint_t c, children; 251eda14cbcSMatt Macy nvlist_t **child; 252eda14cbcSMatt Macy zfs_case_t *zcp; 253eda14cbcSMatt Macy 254eda14cbcSMatt Macy (void) nvlist_lookup_uint64(vd, ZPOOL_CONFIG_GUID, &vdev_guid); 255eda14cbcSMatt Macy 256eda14cbcSMatt Macy /* 257eda14cbcSMatt Macy * Mark any cases associated with this (pool, vdev) pair. 258eda14cbcSMatt Macy */ 259eda14cbcSMatt Macy for (zcp = uu_list_first(zfs_cases); zcp != NULL; 260eda14cbcSMatt Macy zcp = uu_list_next(zfs_cases, zcp)) { 261eda14cbcSMatt Macy if (zcp->zc_data.zc_pool_guid == pool_guid && 262eda14cbcSMatt Macy zcp->zc_data.zc_vdev_guid == vdev_guid) { 263eda14cbcSMatt Macy zcp->zc_present = B_TRUE; 264eda14cbcSMatt Macy zcp->zc_when = *loaded; 265eda14cbcSMatt Macy } 266eda14cbcSMatt Macy } 267eda14cbcSMatt Macy 268eda14cbcSMatt Macy /* 269eda14cbcSMatt Macy * Iterate over all children. 270eda14cbcSMatt Macy */ 271eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(vd, ZPOOL_CONFIG_CHILDREN, &child, 272eda14cbcSMatt Macy &children) == 0) { 273eda14cbcSMatt Macy for (c = 0; c < children; c++) 274eda14cbcSMatt Macy zfs_mark_vdev(pool_guid, child[c], loaded); 275eda14cbcSMatt Macy } 276eda14cbcSMatt Macy 277eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(vd, ZPOOL_CONFIG_L2CACHE, &child, 278eda14cbcSMatt Macy &children) == 0) { 279eda14cbcSMatt Macy for (c = 0; c < children; c++) 280eda14cbcSMatt Macy zfs_mark_vdev(pool_guid, child[c], loaded); 281eda14cbcSMatt Macy } 282eda14cbcSMatt Macy 283eda14cbcSMatt Macy if (nvlist_lookup_nvlist_array(vd, ZPOOL_CONFIG_SPARES, &child, 284eda14cbcSMatt Macy &children) == 0) { 285eda14cbcSMatt Macy for (c = 0; c < children; c++) 286eda14cbcSMatt Macy zfs_mark_vdev(pool_guid, child[c], loaded); 287eda14cbcSMatt Macy } 288eda14cbcSMatt Macy } 289eda14cbcSMatt Macy 290eda14cbcSMatt Macy static int 291eda14cbcSMatt Macy zfs_mark_pool(zpool_handle_t *zhp, void *unused) 292eda14cbcSMatt Macy { 293e92ffd9bSMartin Matuska (void) unused; 294eda14cbcSMatt Macy zfs_case_t *zcp; 295eda14cbcSMatt Macy uint64_t pool_guid; 296eda14cbcSMatt Macy uint64_t *tod; 297eda14cbcSMatt Macy er_timeval_t loaded = { 0 }; 298eda14cbcSMatt Macy nvlist_t *config, *vd; 299eda14cbcSMatt Macy uint_t nelem = 0; 300eda14cbcSMatt Macy int ret; 301eda14cbcSMatt Macy 302eda14cbcSMatt Macy pool_guid = zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL); 303eda14cbcSMatt Macy /* 304eda14cbcSMatt Macy * Mark any cases associated with just this pool. 305eda14cbcSMatt Macy */ 306eda14cbcSMatt Macy for (zcp = uu_list_first(zfs_cases); zcp != NULL; 307eda14cbcSMatt Macy zcp = uu_list_next(zfs_cases, zcp)) { 308eda14cbcSMatt Macy if (zcp->zc_data.zc_pool_guid == pool_guid && 309eda14cbcSMatt Macy zcp->zc_data.zc_vdev_guid == 0) 310eda14cbcSMatt Macy zcp->zc_present = B_TRUE; 311eda14cbcSMatt Macy } 312eda14cbcSMatt Macy 313eda14cbcSMatt Macy if ((config = zpool_get_config(zhp, NULL)) == NULL) { 314eda14cbcSMatt Macy zpool_close(zhp); 315eda14cbcSMatt Macy return (-1); 316eda14cbcSMatt Macy } 317eda14cbcSMatt Macy 318eda14cbcSMatt Macy (void) nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_LOADED_TIME, 319eda14cbcSMatt Macy &tod, &nelem); 320eda14cbcSMatt Macy if (nelem == 2) { 321eda14cbcSMatt Macy loaded.ertv_sec = tod[0]; 322eda14cbcSMatt Macy loaded.ertv_nsec = tod[1]; 323eda14cbcSMatt Macy for (zcp = uu_list_first(zfs_cases); zcp != NULL; 324eda14cbcSMatt Macy zcp = uu_list_next(zfs_cases, zcp)) { 325eda14cbcSMatt Macy if (zcp->zc_data.zc_pool_guid == pool_guid && 326eda14cbcSMatt Macy zcp->zc_data.zc_vdev_guid == 0) { 327eda14cbcSMatt Macy zcp->zc_when = loaded; 328eda14cbcSMatt Macy } 329eda14cbcSMatt Macy } 330eda14cbcSMatt Macy } 331eda14cbcSMatt Macy 332eda14cbcSMatt Macy ret = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vd); 333eda14cbcSMatt Macy if (ret) { 334eda14cbcSMatt Macy zpool_close(zhp); 335eda14cbcSMatt Macy return (-1); 336eda14cbcSMatt Macy } 337eda14cbcSMatt Macy 338eda14cbcSMatt Macy zfs_mark_vdev(pool_guid, vd, &loaded); 339eda14cbcSMatt Macy 340eda14cbcSMatt Macy zpool_close(zhp); 341eda14cbcSMatt Macy 342eda14cbcSMatt Macy return (0); 343eda14cbcSMatt Macy } 344eda14cbcSMatt Macy 345eda14cbcSMatt Macy struct load_time_arg { 346eda14cbcSMatt Macy uint64_t lt_guid; 347eda14cbcSMatt Macy er_timeval_t *lt_time; 348eda14cbcSMatt Macy boolean_t lt_found; 349eda14cbcSMatt Macy }; 350eda14cbcSMatt Macy 351eda14cbcSMatt Macy static int 352eda14cbcSMatt Macy zpool_find_load_time(zpool_handle_t *zhp, void *arg) 353eda14cbcSMatt Macy { 354eda14cbcSMatt Macy struct load_time_arg *lta = arg; 355eda14cbcSMatt Macy uint64_t pool_guid; 356eda14cbcSMatt Macy uint64_t *tod; 357eda14cbcSMatt Macy nvlist_t *config; 358eda14cbcSMatt Macy uint_t nelem; 359eda14cbcSMatt Macy 360eda14cbcSMatt Macy if (lta->lt_found) { 361eda14cbcSMatt Macy zpool_close(zhp); 362eda14cbcSMatt Macy return (0); 363eda14cbcSMatt Macy } 364eda14cbcSMatt Macy 365eda14cbcSMatt Macy pool_guid = zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL); 366eda14cbcSMatt Macy if (pool_guid != lta->lt_guid) { 367eda14cbcSMatt Macy zpool_close(zhp); 368eda14cbcSMatt Macy return (0); 369eda14cbcSMatt Macy } 370eda14cbcSMatt Macy 371eda14cbcSMatt Macy if ((config = zpool_get_config(zhp, NULL)) == NULL) { 372eda14cbcSMatt Macy zpool_close(zhp); 373eda14cbcSMatt Macy return (-1); 374eda14cbcSMatt Macy } 375eda14cbcSMatt Macy 376eda14cbcSMatt Macy if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_LOADED_TIME, 377eda14cbcSMatt Macy &tod, &nelem) == 0 && nelem == 2) { 378eda14cbcSMatt Macy lta->lt_found = B_TRUE; 379eda14cbcSMatt Macy lta->lt_time->ertv_sec = tod[0]; 380eda14cbcSMatt Macy lta->lt_time->ertv_nsec = tod[1]; 381eda14cbcSMatt Macy } 382eda14cbcSMatt Macy 383eda14cbcSMatt Macy zpool_close(zhp); 384eda14cbcSMatt Macy 385eda14cbcSMatt Macy return (0); 386eda14cbcSMatt Macy } 387eda14cbcSMatt Macy 388eda14cbcSMatt Macy static void 389eda14cbcSMatt Macy zfs_purge_cases(fmd_hdl_t *hdl) 390eda14cbcSMatt Macy { 391eda14cbcSMatt Macy zfs_case_t *zcp; 392eda14cbcSMatt Macy uu_list_walk_t *walk; 393eda14cbcSMatt Macy libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 394eda14cbcSMatt Macy 395eda14cbcSMatt Macy /* 396eda14cbcSMatt Macy * There is no way to open a pool by GUID, or lookup a vdev by GUID. No 397eda14cbcSMatt Macy * matter what we do, we're going to have to stomach an O(vdevs * cases) 398eda14cbcSMatt Macy * algorithm. In reality, both quantities are likely so small that 399eda14cbcSMatt Macy * neither will matter. Given that iterating over pools is more 400eda14cbcSMatt Macy * expensive than iterating over the in-memory case list, we opt for a 401eda14cbcSMatt Macy * 'present' flag in each case that starts off cleared. We then iterate 402eda14cbcSMatt Macy * over all pools, marking those that are still present, and removing 403eda14cbcSMatt Macy * those that aren't found. 404eda14cbcSMatt Macy * 405eda14cbcSMatt Macy * Note that we could also construct an FMRI and rely on 406eda14cbcSMatt Macy * fmd_nvl_fmri_present(), but this would end up doing the same search. 407eda14cbcSMatt Macy */ 408eda14cbcSMatt Macy 409eda14cbcSMatt Macy /* 410eda14cbcSMatt Macy * Mark the cases as not present. 411eda14cbcSMatt Macy */ 412eda14cbcSMatt Macy for (zcp = uu_list_first(zfs_cases); zcp != NULL; 413eda14cbcSMatt Macy zcp = uu_list_next(zfs_cases, zcp)) 414eda14cbcSMatt Macy zcp->zc_present = B_FALSE; 415eda14cbcSMatt Macy 416eda14cbcSMatt Macy /* 417eda14cbcSMatt Macy * Iterate over all pools and mark the pools and vdevs found. If this 418eda14cbcSMatt Macy * fails (most probably because we're out of memory), then don't close 419eda14cbcSMatt Macy * any of the cases and we cannot be sure they are accurate. 420eda14cbcSMatt Macy */ 421eda14cbcSMatt Macy if (zpool_iter(zhdl, zfs_mark_pool, NULL) != 0) 422eda14cbcSMatt Macy return; 423eda14cbcSMatt Macy 424eda14cbcSMatt Macy /* 425eda14cbcSMatt Macy * Remove those cases which were not found. 426eda14cbcSMatt Macy */ 427eda14cbcSMatt Macy walk = uu_list_walk_start(zfs_cases, UU_WALK_ROBUST); 428eda14cbcSMatt Macy while ((zcp = uu_list_walk_next(walk)) != NULL) { 429eda14cbcSMatt Macy if (!zcp->zc_present) 430eda14cbcSMatt Macy fmd_case_close(hdl, zcp->zc_case); 431eda14cbcSMatt Macy } 432eda14cbcSMatt Macy uu_list_walk_end(walk); 433eda14cbcSMatt Macy } 434eda14cbcSMatt Macy 435eda14cbcSMatt Macy /* 436eda14cbcSMatt Macy * Construct the name of a serd engine given the pool/vdev GUID and type (io or 437eda14cbcSMatt Macy * checksum). 438eda14cbcSMatt Macy */ 439eda14cbcSMatt Macy static void 440eda14cbcSMatt Macy zfs_serd_name(char *buf, uint64_t pool_guid, uint64_t vdev_guid, 441eda14cbcSMatt Macy const char *type) 442eda14cbcSMatt Macy { 443eda14cbcSMatt Macy (void) snprintf(buf, MAX_SERDLEN, "zfs_%llx_%llx_%s", 444eda14cbcSMatt Macy (long long unsigned int)pool_guid, 445eda14cbcSMatt Macy (long long unsigned int)vdev_guid, type); 446eda14cbcSMatt Macy } 447eda14cbcSMatt Macy 448e2257b31SMartin Matuska static void 449e2257b31SMartin Matuska zfs_case_retire(fmd_hdl_t *hdl, zfs_case_t *zcp) 450e2257b31SMartin Matuska { 451e2257b31SMartin Matuska fmd_hdl_debug(hdl, "retiring case"); 452e2257b31SMartin Matuska 453e2257b31SMartin Matuska fmd_case_close(hdl, zcp->zc_case); 454e2257b31SMartin Matuska } 455e2257b31SMartin Matuska 456eda14cbcSMatt Macy /* 457eda14cbcSMatt Macy * Solve a given ZFS case. This first checks to make sure the diagnosis is 458eda14cbcSMatt Macy * still valid, as well as cleaning up any pending timer associated with the 459eda14cbcSMatt Macy * case. 460eda14cbcSMatt Macy */ 461eda14cbcSMatt Macy static void 462e92ffd9bSMartin Matuska zfs_case_solve(fmd_hdl_t *hdl, zfs_case_t *zcp, const char *faultname) 463eda14cbcSMatt Macy { 464eda14cbcSMatt Macy nvlist_t *detector, *fault; 465eda14cbcSMatt Macy boolean_t serialize; 466eda14cbcSMatt Macy nvlist_t *fru = NULL; 467eda14cbcSMatt Macy fmd_hdl_debug(hdl, "solving fault '%s'", faultname); 468eda14cbcSMatt Macy 469eda14cbcSMatt Macy /* 470eda14cbcSMatt Macy * Construct the detector from the case data. The detector is in the 471eda14cbcSMatt Macy * ZFS scheme, and is either the pool or the vdev, depending on whether 472eda14cbcSMatt Macy * this is a vdev or pool fault. 473eda14cbcSMatt Macy */ 474eda14cbcSMatt Macy detector = fmd_nvl_alloc(hdl, FMD_SLEEP); 475eda14cbcSMatt Macy 476eda14cbcSMatt Macy (void) nvlist_add_uint8(detector, FM_VERSION, ZFS_SCHEME_VERSION0); 477eda14cbcSMatt Macy (void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS); 478eda14cbcSMatt Macy (void) nvlist_add_uint64(detector, FM_FMRI_ZFS_POOL, 479eda14cbcSMatt Macy zcp->zc_data.zc_pool_guid); 480eda14cbcSMatt Macy if (zcp->zc_data.zc_vdev_guid != 0) { 481eda14cbcSMatt Macy (void) nvlist_add_uint64(detector, FM_FMRI_ZFS_VDEV, 482eda14cbcSMatt Macy zcp->zc_data.zc_vdev_guid); 483eda14cbcSMatt Macy } 484eda14cbcSMatt Macy 485eda14cbcSMatt Macy fault = fmd_nvl_create_fault(hdl, faultname, 100, detector, 486eda14cbcSMatt Macy fru, detector); 487eda14cbcSMatt Macy fmd_case_add_suspect(hdl, zcp->zc_case, fault); 488eda14cbcSMatt Macy 489eda14cbcSMatt Macy nvlist_free(fru); 490eda14cbcSMatt Macy 491eda14cbcSMatt Macy fmd_case_solve(hdl, zcp->zc_case); 492eda14cbcSMatt Macy 493eda14cbcSMatt Macy serialize = B_FALSE; 494eda14cbcSMatt Macy if (zcp->zc_data.zc_has_remove_timer) { 495eda14cbcSMatt Macy fmd_timer_remove(hdl, zcp->zc_remove_timer); 496eda14cbcSMatt Macy zcp->zc_data.zc_has_remove_timer = 0; 497eda14cbcSMatt Macy serialize = B_TRUE; 498eda14cbcSMatt Macy } 499eda14cbcSMatt Macy if (serialize) 500e92ffd9bSMartin Matuska zfs_case_serialize(zcp); 501eda14cbcSMatt Macy 502eda14cbcSMatt Macy nvlist_free(detector); 503eda14cbcSMatt Macy } 504eda14cbcSMatt Macy 505eda14cbcSMatt Macy static boolean_t 506eda14cbcSMatt Macy timeval_earlier(er_timeval_t *a, er_timeval_t *b) 507eda14cbcSMatt Macy { 508eda14cbcSMatt Macy return (a->ertv_sec < b->ertv_sec || 509eda14cbcSMatt Macy (a->ertv_sec == b->ertv_sec && a->ertv_nsec < b->ertv_nsec)); 510eda14cbcSMatt Macy } 511eda14cbcSMatt Macy 512eda14cbcSMatt Macy static void 513eda14cbcSMatt Macy zfs_ereport_when(fmd_hdl_t *hdl, nvlist_t *nvl, er_timeval_t *when) 514eda14cbcSMatt Macy { 515e92ffd9bSMartin Matuska (void) hdl; 516eda14cbcSMatt Macy int64_t *tod; 517eda14cbcSMatt Macy uint_t nelem; 518eda14cbcSMatt Macy 519eda14cbcSMatt Macy if (nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tod, 520eda14cbcSMatt Macy &nelem) == 0 && nelem == 2) { 521eda14cbcSMatt Macy when->ertv_sec = tod[0]; 522eda14cbcSMatt Macy when->ertv_nsec = tod[1]; 523eda14cbcSMatt Macy } else { 524eda14cbcSMatt Macy when->ertv_sec = when->ertv_nsec = UINT64_MAX; 525eda14cbcSMatt Macy } 526eda14cbcSMatt Macy } 527eda14cbcSMatt Macy 528eda14cbcSMatt Macy /* 529*7a7741afSMartin Matuska * Record the specified event in the SERD engine and return a 530*7a7741afSMartin Matuska * boolean value indicating whether or not the engine fired as 531*7a7741afSMartin Matuska * the result of inserting this event. 532*7a7741afSMartin Matuska * 533*7a7741afSMartin Matuska * When the pool has similar active cases on other vdevs, then 534*7a7741afSMartin Matuska * the fired state is disregarded and the case is retired. 535*7a7741afSMartin Matuska */ 536*7a7741afSMartin Matuska static int 537*7a7741afSMartin Matuska zfs_fm_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep, 538*7a7741afSMartin Matuska zfs_case_t *zcp, const char *err_type) 539*7a7741afSMartin Matuska { 540*7a7741afSMartin Matuska int fired = fmd_serd_record(hdl, name, ep); 541*7a7741afSMartin Matuska int peers = 0; 542*7a7741afSMartin Matuska 543*7a7741afSMartin Matuska if (fired && (peers = zfs_other_serd_cases(hdl, &zcp->zc_data)) > 0) { 544*7a7741afSMartin Matuska fmd_hdl_debug(hdl, "pool %llu is tracking %d other %s cases " 545*7a7741afSMartin Matuska "-- skip faulting the vdev %llu", 546*7a7741afSMartin Matuska (u_longlong_t)zcp->zc_data.zc_pool_guid, 547*7a7741afSMartin Matuska peers, err_type, 548*7a7741afSMartin Matuska (u_longlong_t)zcp->zc_data.zc_vdev_guid); 549*7a7741afSMartin Matuska zfs_case_retire(hdl, zcp); 550*7a7741afSMartin Matuska fired = 0; 551*7a7741afSMartin Matuska } 552*7a7741afSMartin Matuska 553*7a7741afSMartin Matuska return (fired); 554*7a7741afSMartin Matuska } 555*7a7741afSMartin Matuska 556*7a7741afSMartin Matuska /* 557eda14cbcSMatt Macy * Main fmd entry point. 558eda14cbcSMatt Macy */ 559eda14cbcSMatt Macy static void 560eda14cbcSMatt Macy zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 561eda14cbcSMatt Macy { 562eda14cbcSMatt Macy zfs_case_t *zcp, *dcp; 563eda14cbcSMatt Macy int32_t pool_state; 564*7a7741afSMartin Matuska uint64_t ena, pool_guid, vdev_guid, parent_guid; 56515f0b8c3SMartin Matuska uint64_t checksum_n, checksum_t; 56615f0b8c3SMartin Matuska uint64_t io_n, io_t; 567eda14cbcSMatt Macy er_timeval_t pool_load; 568eda14cbcSMatt Macy er_timeval_t er_when; 569eda14cbcSMatt Macy nvlist_t *detector; 570eda14cbcSMatt Macy boolean_t pool_found = B_FALSE; 571eda14cbcSMatt Macy boolean_t isresource; 5722a58b312SMartin Matuska const char *type; 573eda14cbcSMatt Macy 574eda14cbcSMatt Macy /* 575eda14cbcSMatt Macy * We subscribe to notifications for vdev or pool removal. In these 576eda14cbcSMatt Macy * cases, there may be cases that no longer apply. Purge any cases 577eda14cbcSMatt Macy * that no longer apply. 578eda14cbcSMatt Macy */ 579eda14cbcSMatt Macy if (fmd_nvl_class_match(hdl, nvl, "sysevent.fs.zfs.*")) { 580eda14cbcSMatt Macy fmd_hdl_debug(hdl, "purging orphaned cases from %s", 581eda14cbcSMatt Macy strrchr(class, '.') + 1); 582eda14cbcSMatt Macy zfs_purge_cases(hdl); 583eda14cbcSMatt Macy zfs_stats.resource_drops.fmds_value.ui64++; 584eda14cbcSMatt Macy return; 585eda14cbcSMatt Macy } 586eda14cbcSMatt Macy 587eda14cbcSMatt Macy isresource = fmd_nvl_class_match(hdl, nvl, "resource.fs.zfs.*"); 588eda14cbcSMatt Macy 589eda14cbcSMatt Macy if (isresource) { 590eda14cbcSMatt Macy /* 591eda14cbcSMatt Macy * For resources, we don't have a normal payload. 592eda14cbcSMatt Macy */ 593eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 594eda14cbcSMatt Macy &vdev_guid) != 0) 595eda14cbcSMatt Macy pool_state = SPA_LOAD_OPEN; 596eda14cbcSMatt Macy else 597eda14cbcSMatt Macy pool_state = SPA_LOAD_NONE; 598eda14cbcSMatt Macy detector = NULL; 599eda14cbcSMatt Macy } else { 600eda14cbcSMatt Macy (void) nvlist_lookup_nvlist(nvl, 601eda14cbcSMatt Macy FM_EREPORT_DETECTOR, &detector); 602eda14cbcSMatt Macy (void) nvlist_lookup_int32(nvl, 603eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, &pool_state); 604eda14cbcSMatt Macy } 605eda14cbcSMatt Macy 606eda14cbcSMatt Macy /* 607eda14cbcSMatt Macy * We also ignore all ereports generated during an import of a pool, 608eda14cbcSMatt Macy * since the only possible fault (.pool) would result in import failure, 609eda14cbcSMatt Macy * and hence no persistent fault. Some day we may want to do something 610eda14cbcSMatt Macy * with these ereports, so we continue generating them internally. 611eda14cbcSMatt Macy */ 612eda14cbcSMatt Macy if (pool_state == SPA_LOAD_IMPORT) { 613eda14cbcSMatt Macy zfs_stats.import_drops.fmds_value.ui64++; 614eda14cbcSMatt Macy fmd_hdl_debug(hdl, "ignoring '%s' during import", class); 615eda14cbcSMatt Macy return; 616eda14cbcSMatt Macy } 617eda14cbcSMatt Macy 618eda14cbcSMatt Macy /* 619eda14cbcSMatt Macy * Device I/O errors are ignored during pool open. 620eda14cbcSMatt Macy */ 621eda14cbcSMatt Macy if (pool_state == SPA_LOAD_OPEN && 622eda14cbcSMatt Macy (fmd_nvl_class_match(hdl, nvl, 623eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM)) || 624eda14cbcSMatt Macy fmd_nvl_class_match(hdl, nvl, 625eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO)) || 626eda14cbcSMatt Macy fmd_nvl_class_match(hdl, nvl, 627eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE)))) { 628eda14cbcSMatt Macy fmd_hdl_debug(hdl, "ignoring '%s' during pool open", class); 629eda14cbcSMatt Macy zfs_stats.dev_drops.fmds_value.ui64++; 630eda14cbcSMatt Macy return; 631eda14cbcSMatt Macy } 632eda14cbcSMatt Macy 633eda14cbcSMatt Macy /* 634eda14cbcSMatt Macy * We ignore ereports for anything except disks and files. 635eda14cbcSMatt Macy */ 636eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 637eda14cbcSMatt Macy &type) == 0) { 638eda14cbcSMatt Macy if (strcmp(type, VDEV_TYPE_DISK) != 0 && 639eda14cbcSMatt Macy strcmp(type, VDEV_TYPE_FILE) != 0) { 640eda14cbcSMatt Macy zfs_stats.vdev_drops.fmds_value.ui64++; 641eda14cbcSMatt Macy return; 642eda14cbcSMatt Macy } 643eda14cbcSMatt Macy } 644eda14cbcSMatt Macy 645eda14cbcSMatt Macy /* 646eda14cbcSMatt Macy * Determine if this ereport corresponds to an open case. 647eda14cbcSMatt Macy * Each vdev or pool can have a single case. 648eda14cbcSMatt Macy */ 649eda14cbcSMatt Macy (void) nvlist_lookup_uint64(nvl, 650eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid); 651eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, 652eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) 653eda14cbcSMatt Macy vdev_guid = 0; 654*7a7741afSMartin Matuska if (nvlist_lookup_uint64(nvl, 655*7a7741afSMartin Matuska FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, &parent_guid) != 0) 656*7a7741afSMartin Matuska parent_guid = 0; 657eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) != 0) 658eda14cbcSMatt Macy ena = 0; 659eda14cbcSMatt Macy 660eda14cbcSMatt Macy zfs_ereport_when(hdl, nvl, &er_when); 661eda14cbcSMatt Macy 662eda14cbcSMatt Macy for (zcp = uu_list_first(zfs_cases); zcp != NULL; 663eda14cbcSMatt Macy zcp = uu_list_next(zfs_cases, zcp)) { 664eda14cbcSMatt Macy if (zcp->zc_data.zc_pool_guid == pool_guid) { 665eda14cbcSMatt Macy pool_found = B_TRUE; 666eda14cbcSMatt Macy pool_load = zcp->zc_when; 667eda14cbcSMatt Macy } 668eda14cbcSMatt Macy if (zcp->zc_data.zc_vdev_guid == vdev_guid) 669eda14cbcSMatt Macy break; 670eda14cbcSMatt Macy } 671eda14cbcSMatt Macy 672eda14cbcSMatt Macy /* 673eda14cbcSMatt Macy * Avoid falsely accusing a pool of being faulty. Do so by 674eda14cbcSMatt Macy * not replaying ereports that were generated prior to the 675eda14cbcSMatt Macy * current import. If the failure that generated them was 676eda14cbcSMatt Macy * transient because the device was actually removed but we 677eda14cbcSMatt Macy * didn't receive the normal asynchronous notification, we 678eda14cbcSMatt Macy * don't want to mark it as faulted and potentially panic. If 679eda14cbcSMatt Macy * there is still a problem we'd expect not to be able to 680eda14cbcSMatt Macy * import the pool, or that new ereports will be generated 681eda14cbcSMatt Macy * once the pool is used. 682eda14cbcSMatt Macy */ 683eda14cbcSMatt Macy if (pool_found && timeval_earlier(&er_when, &pool_load)) { 684eda14cbcSMatt Macy fmd_hdl_debug(hdl, "ignoring pool %llx, " 685eda14cbcSMatt Macy "ereport time %lld.%lld, pool load time = %lld.%lld", 686eda14cbcSMatt Macy pool_guid, er_when.ertv_sec, er_when.ertv_nsec, 687eda14cbcSMatt Macy pool_load.ertv_sec, pool_load.ertv_nsec); 688eda14cbcSMatt Macy zfs_stats.old_drops.fmds_value.ui64++; 689eda14cbcSMatt Macy return; 690eda14cbcSMatt Macy } 691eda14cbcSMatt Macy 692eda14cbcSMatt Macy if (!pool_found) { 693eda14cbcSMatt Macy /* 694eda14cbcSMatt Macy * Haven't yet seen this pool, but same situation 695eda14cbcSMatt Macy * may apply. 696eda14cbcSMatt Macy */ 697eda14cbcSMatt Macy libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 698eda14cbcSMatt Macy struct load_time_arg la; 699eda14cbcSMatt Macy 700eda14cbcSMatt Macy la.lt_guid = pool_guid; 701eda14cbcSMatt Macy la.lt_time = &pool_load; 702eda14cbcSMatt Macy la.lt_found = B_FALSE; 703eda14cbcSMatt Macy 704eda14cbcSMatt Macy if (zhdl != NULL && 705eda14cbcSMatt Macy zpool_iter(zhdl, zpool_find_load_time, &la) == 0 && 706eda14cbcSMatt Macy la.lt_found == B_TRUE) { 707eda14cbcSMatt Macy pool_found = B_TRUE; 708eda14cbcSMatt Macy 709eda14cbcSMatt Macy if (timeval_earlier(&er_when, &pool_load)) { 710eda14cbcSMatt Macy fmd_hdl_debug(hdl, "ignoring pool %llx, " 711eda14cbcSMatt Macy "ereport time %lld.%lld, " 712eda14cbcSMatt Macy "pool load time = %lld.%lld", 713eda14cbcSMatt Macy pool_guid, er_when.ertv_sec, 714eda14cbcSMatt Macy er_when.ertv_nsec, pool_load.ertv_sec, 715eda14cbcSMatt Macy pool_load.ertv_nsec); 716eda14cbcSMatt Macy zfs_stats.old_drops.fmds_value.ui64++; 717eda14cbcSMatt Macy return; 718eda14cbcSMatt Macy } 719eda14cbcSMatt Macy } 720eda14cbcSMatt Macy } 721eda14cbcSMatt Macy 722eda14cbcSMatt Macy if (zcp == NULL) { 723eda14cbcSMatt Macy fmd_case_t *cs; 724eda14cbcSMatt Macy zfs_case_data_t data = { 0 }; 725eda14cbcSMatt Macy 726eda14cbcSMatt Macy /* 727eda14cbcSMatt Macy * If this is one of our 'fake' resource ereports, and there is 728eda14cbcSMatt Macy * no case open, simply discard it. 729eda14cbcSMatt Macy */ 730eda14cbcSMatt Macy if (isresource) { 731eda14cbcSMatt Macy zfs_stats.resource_drops.fmds_value.ui64++; 732eda14cbcSMatt Macy fmd_hdl_debug(hdl, "discarding '%s for vdev %llu", 733eda14cbcSMatt Macy class, vdev_guid); 734eda14cbcSMatt Macy return; 735eda14cbcSMatt Macy } 736eda14cbcSMatt Macy 737eda14cbcSMatt Macy /* 738eda14cbcSMatt Macy * Skip tracking some ereports 739eda14cbcSMatt Macy */ 740eda14cbcSMatt Macy if (strcmp(class, 741eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DATA)) == 0 || 742eda14cbcSMatt Macy strcmp(class, 743e2257b31SMartin Matuska ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE)) == 0) { 744eda14cbcSMatt Macy zfs_stats.resource_drops.fmds_value.ui64++; 745eda14cbcSMatt Macy return; 746eda14cbcSMatt Macy } 747eda14cbcSMatt Macy 748eda14cbcSMatt Macy /* 749eda14cbcSMatt Macy * Open a new case. 750eda14cbcSMatt Macy */ 751eda14cbcSMatt Macy cs = fmd_case_open(hdl, NULL); 752eda14cbcSMatt Macy 753eda14cbcSMatt Macy fmd_hdl_debug(hdl, "opening case for vdev %llu due to '%s'", 754eda14cbcSMatt Macy vdev_guid, class); 755eda14cbcSMatt Macy 756eda14cbcSMatt Macy /* 757eda14cbcSMatt Macy * Initialize the case buffer. To commonize code, we actually 758eda14cbcSMatt Macy * create the buffer with existing data, and then call 759eda14cbcSMatt Macy * zfs_case_unserialize() to instantiate the in-core structure. 760eda14cbcSMatt Macy */ 761eda14cbcSMatt Macy fmd_buf_create(hdl, cs, CASE_DATA, sizeof (zfs_case_data_t)); 762eda14cbcSMatt Macy 763eda14cbcSMatt Macy data.zc_version = CASE_DATA_VERSION_SERD; 764eda14cbcSMatt Macy data.zc_ena = ena; 765eda14cbcSMatt Macy data.zc_pool_guid = pool_guid; 766eda14cbcSMatt Macy data.zc_vdev_guid = vdev_guid; 767*7a7741afSMartin Matuska data.zc_parent_guid = parent_guid; 768eda14cbcSMatt Macy data.zc_pool_state = (int)pool_state; 769eda14cbcSMatt Macy 770eda14cbcSMatt Macy fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data)); 771eda14cbcSMatt Macy 772eda14cbcSMatt Macy zcp = zfs_case_unserialize(hdl, cs); 773eda14cbcSMatt Macy assert(zcp != NULL); 774eda14cbcSMatt Macy if (pool_found) 775eda14cbcSMatt Macy zcp->zc_when = pool_load; 776eda14cbcSMatt Macy } 777eda14cbcSMatt Macy 778eda14cbcSMatt Macy if (isresource) { 779eda14cbcSMatt Macy fmd_hdl_debug(hdl, "resource event '%s'", class); 780eda14cbcSMatt Macy 781eda14cbcSMatt Macy if (fmd_nvl_class_match(hdl, nvl, 782eda14cbcSMatt Macy ZFS_MAKE_RSRC(FM_RESOURCE_AUTOREPLACE))) { 783eda14cbcSMatt Macy /* 784eda14cbcSMatt Macy * The 'resource.fs.zfs.autoreplace' event indicates 785eda14cbcSMatt Macy * that the pool was loaded with the 'autoreplace' 786eda14cbcSMatt Macy * property set. In this case, any pending device 787eda14cbcSMatt Macy * failures should be ignored, as the asynchronous 788eda14cbcSMatt Macy * autoreplace handling will take care of them. 789eda14cbcSMatt Macy */ 790eda14cbcSMatt Macy fmd_case_close(hdl, zcp->zc_case); 791eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 792eda14cbcSMatt Macy ZFS_MAKE_RSRC(FM_RESOURCE_REMOVED))) { 793eda14cbcSMatt Macy /* 794eda14cbcSMatt Macy * The 'resource.fs.zfs.removed' event indicates that 795eda14cbcSMatt Macy * device removal was detected, and the device was 796eda14cbcSMatt Macy * closed asynchronously. If this is the case, we 797eda14cbcSMatt Macy * assume that any recent I/O errors were due to the 798eda14cbcSMatt Macy * device removal, not any fault of the device itself. 799eda14cbcSMatt Macy * We reset the SERD engine, and cancel any pending 800eda14cbcSMatt Macy * timers. 801eda14cbcSMatt Macy */ 802eda14cbcSMatt Macy if (zcp->zc_data.zc_has_remove_timer) { 803eda14cbcSMatt Macy fmd_timer_remove(hdl, zcp->zc_remove_timer); 804eda14cbcSMatt Macy zcp->zc_data.zc_has_remove_timer = 0; 805e92ffd9bSMartin Matuska zfs_case_serialize(zcp); 806eda14cbcSMatt Macy } 807eda14cbcSMatt Macy if (zcp->zc_data.zc_serd_io[0] != '\0') 808eda14cbcSMatt Macy fmd_serd_reset(hdl, zcp->zc_data.zc_serd_io); 809eda14cbcSMatt Macy if (zcp->zc_data.zc_serd_checksum[0] != '\0') 810eda14cbcSMatt Macy fmd_serd_reset(hdl, 811eda14cbcSMatt Macy zcp->zc_data.zc_serd_checksum); 812e2257b31SMartin Matuska if (zcp->zc_data.zc_serd_slow_io[0] != '\0') 813e2257b31SMartin Matuska fmd_serd_reset(hdl, 814e2257b31SMartin Matuska zcp->zc_data.zc_serd_slow_io); 815eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 816eda14cbcSMatt Macy ZFS_MAKE_RSRC(FM_RESOURCE_STATECHANGE))) { 817eda14cbcSMatt Macy uint64_t state = 0; 818eda14cbcSMatt Macy 819eda14cbcSMatt Macy if (zcp != NULL && 820eda14cbcSMatt Macy nvlist_lookup_uint64(nvl, 821eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, &state) == 0 && 822eda14cbcSMatt Macy state == VDEV_STATE_HEALTHY) { 823eda14cbcSMatt Macy fmd_hdl_debug(hdl, "closing case after a " 824eda14cbcSMatt Macy "device statechange to healthy"); 825eda14cbcSMatt Macy fmd_case_close(hdl, zcp->zc_case); 826eda14cbcSMatt Macy } 827eda14cbcSMatt Macy } 828eda14cbcSMatt Macy zfs_stats.resource_drops.fmds_value.ui64++; 829eda14cbcSMatt Macy return; 830eda14cbcSMatt Macy } 831eda14cbcSMatt Macy 832eda14cbcSMatt Macy /* 833eda14cbcSMatt Macy * Associate the ereport with this case. 834eda14cbcSMatt Macy */ 835eda14cbcSMatt Macy fmd_case_add_ereport(hdl, zcp->zc_case, ep); 836eda14cbcSMatt Macy 837eda14cbcSMatt Macy /* 838eda14cbcSMatt Macy * Don't do anything else if this case is already solved. 839eda14cbcSMatt Macy */ 840eda14cbcSMatt Macy if (fmd_case_solved(hdl, zcp->zc_case)) 841eda14cbcSMatt Macy return; 842eda14cbcSMatt Macy 843e2257b31SMartin Matuska if (vdev_guid) 844e2257b31SMartin Matuska fmd_hdl_debug(hdl, "error event '%s', vdev %llu", class, 845e2257b31SMartin Matuska vdev_guid); 846e2257b31SMartin Matuska else 847eda14cbcSMatt Macy fmd_hdl_debug(hdl, "error event '%s'", class); 848eda14cbcSMatt Macy 849eda14cbcSMatt Macy /* 850eda14cbcSMatt Macy * Determine if we should solve the case and generate a fault. We solve 851eda14cbcSMatt Macy * a case if: 852eda14cbcSMatt Macy * 853eda14cbcSMatt Macy * a. A pool failed to open (ereport.fs.zfs.pool) 854eda14cbcSMatt Macy * b. A device failed to open (ereport.fs.zfs.pool) while a pool 855eda14cbcSMatt Macy * was up and running. 856eda14cbcSMatt Macy * 857eda14cbcSMatt Macy * We may see a series of ereports associated with a pool open, all 858eda14cbcSMatt Macy * chained together by the same ENA. If the pool open succeeds, then 859eda14cbcSMatt Macy * we'll see no further ereports. To detect when a pool open has 860eda14cbcSMatt Macy * succeeded, we associate a timer with the event. When it expires, we 861eda14cbcSMatt Macy * close the case. 862eda14cbcSMatt Macy */ 863eda14cbcSMatt Macy if (fmd_nvl_class_match(hdl, nvl, 864eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_POOL))) { 865eda14cbcSMatt Macy /* 866eda14cbcSMatt Macy * Pool level fault. Before solving the case, go through and 867eda14cbcSMatt Macy * close any open device cases that may be pending. 868eda14cbcSMatt Macy */ 869eda14cbcSMatt Macy for (dcp = uu_list_first(zfs_cases); dcp != NULL; 870eda14cbcSMatt Macy dcp = uu_list_next(zfs_cases, dcp)) { 871eda14cbcSMatt Macy if (dcp->zc_data.zc_pool_guid == 872eda14cbcSMatt Macy zcp->zc_data.zc_pool_guid && 873eda14cbcSMatt Macy dcp->zc_data.zc_vdev_guid != 0) 874eda14cbcSMatt Macy fmd_case_close(hdl, dcp->zc_case); 875eda14cbcSMatt Macy } 876eda14cbcSMatt Macy 877e92ffd9bSMartin Matuska zfs_case_solve(hdl, zcp, "fault.fs.zfs.pool"); 878eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 879eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_LOG_REPLAY))) { 880eda14cbcSMatt Macy /* 881eda14cbcSMatt Macy * Pool level fault for reading the intent logs. 882eda14cbcSMatt Macy */ 883e92ffd9bSMartin Matuska zfs_case_solve(hdl, zcp, "fault.fs.zfs.log_replay"); 884eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.vdev.*")) { 885eda14cbcSMatt Macy /* 886eda14cbcSMatt Macy * Device fault. 887eda14cbcSMatt Macy */ 888e92ffd9bSMartin Matuska zfs_case_solve(hdl, zcp, "fault.fs.zfs.device"); 889eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 890eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO)) || 891eda14cbcSMatt Macy fmd_nvl_class_match(hdl, nvl, 892eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM)) || 893eda14cbcSMatt Macy fmd_nvl_class_match(hdl, nvl, 894eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) || 895eda14cbcSMatt Macy fmd_nvl_class_match(hdl, nvl, 896e2257b31SMartin Matuska ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY)) || 897e2257b31SMartin Matuska fmd_nvl_class_match(hdl, nvl, 898eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) { 8992a58b312SMartin Matuska const char *failmode = NULL; 900eda14cbcSMatt Macy boolean_t checkremove = B_FALSE; 901e3aa18adSMartin Matuska uint32_t pri = 0; 902eda14cbcSMatt Macy 903eda14cbcSMatt Macy /* 904eda14cbcSMatt Macy * If this is a checksum or I/O error, then toss it into the 905eda14cbcSMatt Macy * appropriate SERD engine and check to see if it has fired. 906eda14cbcSMatt Macy * Ideally, we want to do something more sophisticated, 907eda14cbcSMatt Macy * (persistent errors for a single data block, etc). For now, 908eda14cbcSMatt Macy * a single SERD engine is sufficient. 909eda14cbcSMatt Macy */ 910eda14cbcSMatt Macy if (fmd_nvl_class_match(hdl, nvl, 911eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO))) { 912eda14cbcSMatt Macy if (zcp->zc_data.zc_serd_io[0] == '\0') { 91315f0b8c3SMartin Matuska if (nvlist_lookup_uint64(nvl, 91415f0b8c3SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N, 91515f0b8c3SMartin Matuska &io_n) != 0) { 91615f0b8c3SMartin Matuska io_n = DEFAULT_IO_N; 91715f0b8c3SMartin Matuska } 91815f0b8c3SMartin Matuska if (nvlist_lookup_uint64(nvl, 91915f0b8c3SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T, 92015f0b8c3SMartin Matuska &io_t) != 0) { 92115f0b8c3SMartin Matuska io_t = DEFAULT_IO_T; 92215f0b8c3SMartin Matuska } 923eda14cbcSMatt Macy zfs_serd_name(zcp->zc_data.zc_serd_io, 924eda14cbcSMatt Macy pool_guid, vdev_guid, "io"); 925eda14cbcSMatt Macy fmd_serd_create(hdl, zcp->zc_data.zc_serd_io, 92615f0b8c3SMartin Matuska io_n, 92715f0b8c3SMartin Matuska SEC2NSEC(io_t)); 928e92ffd9bSMartin Matuska zfs_case_serialize(zcp); 929eda14cbcSMatt Macy } 930*7a7741afSMartin Matuska if (zfs_fm_serd_record(hdl, zcp->zc_data.zc_serd_io, 931*7a7741afSMartin Matuska ep, zcp, "io error")) { 932eda14cbcSMatt Macy checkremove = B_TRUE; 933*7a7741afSMartin Matuska } 934eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 935e2257b31SMartin Matuska ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) { 936e2257b31SMartin Matuska uint64_t slow_io_n, slow_io_t; 937e2257b31SMartin Matuska 938e2257b31SMartin Matuska /* 939e2257b31SMartin Matuska * Create a slow io SERD engine when the VDEV has the 940e2257b31SMartin Matuska * 'vdev_slow_io_n' and 'vdev_slow_io_n' properties. 941e2257b31SMartin Matuska */ 942e2257b31SMartin Matuska if (zcp->zc_data.zc_serd_slow_io[0] == '\0' && 943e2257b31SMartin Matuska nvlist_lookup_uint64(nvl, 944e2257b31SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N, 945e2257b31SMartin Matuska &slow_io_n) == 0 && 946e2257b31SMartin Matuska nvlist_lookup_uint64(nvl, 947e2257b31SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T, 948e2257b31SMartin Matuska &slow_io_t) == 0) { 949e2257b31SMartin Matuska zfs_serd_name(zcp->zc_data.zc_serd_slow_io, 950e2257b31SMartin Matuska pool_guid, vdev_guid, "slow_io"); 951e2257b31SMartin Matuska fmd_serd_create(hdl, 952e2257b31SMartin Matuska zcp->zc_data.zc_serd_slow_io, 953e2257b31SMartin Matuska slow_io_n, 954e2257b31SMartin Matuska SEC2NSEC(slow_io_t)); 955e2257b31SMartin Matuska zfs_case_serialize(zcp); 956e2257b31SMartin Matuska } 957e2257b31SMartin Matuska /* Pass event to SERD engine and see if this triggers */ 958e2257b31SMartin Matuska if (zcp->zc_data.zc_serd_slow_io[0] != '\0' && 959*7a7741afSMartin Matuska zfs_fm_serd_record(hdl, 960*7a7741afSMartin Matuska zcp->zc_data.zc_serd_slow_io, ep, zcp, "slow io")) { 961e2257b31SMartin Matuska zfs_case_solve(hdl, zcp, 962e2257b31SMartin Matuska "fault.fs.zfs.vdev.slow_io"); 963e2257b31SMartin Matuska } 964e2257b31SMartin Matuska } else if (fmd_nvl_class_match(hdl, nvl, 965eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) { 966e2df9bb4SMartin Matuska uint64_t flags = 0; 967e2df9bb4SMartin Matuska int32_t flags32 = 0; 968e3aa18adSMartin Matuska /* 969e3aa18adSMartin Matuska * We ignore ereports for checksum errors generated by 970e3aa18adSMartin Matuska * scrub/resilver I/O to avoid potentially further 971e3aa18adSMartin Matuska * degrading the pool while it's being repaired. 972e2df9bb4SMartin Matuska * 973e2df9bb4SMartin Matuska * Note that FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS used to 974e2df9bb4SMartin Matuska * be int32. To allow newer zed to work on older 975e2df9bb4SMartin Matuska * kernels, if we don't find the flags, we look for 976e2df9bb4SMartin Matuska * the older ones too. 977e3aa18adSMartin Matuska */ 978e3aa18adSMartin Matuska if (((nvlist_lookup_uint32(nvl, 979e3aa18adSMartin Matuska FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY, &pri) == 0) && 980e3aa18adSMartin Matuska (pri == ZIO_PRIORITY_SCRUB || 981e3aa18adSMartin Matuska pri == ZIO_PRIORITY_REBUILD)) || 982e2df9bb4SMartin Matuska ((nvlist_lookup_uint64(nvl, 983e3aa18adSMartin Matuska FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags) == 0) && 984e2df9bb4SMartin Matuska (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) || 985e2df9bb4SMartin Matuska ((nvlist_lookup_int32(nvl, 986e2df9bb4SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags32) == 0) && 987e2df9bb4SMartin Matuska (flags32 & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) { 988e3aa18adSMartin Matuska fmd_hdl_debug(hdl, "ignoring '%s' for " 989e3aa18adSMartin Matuska "scrub/resilver I/O", class); 990e3aa18adSMartin Matuska return; 991e3aa18adSMartin Matuska } 992e3aa18adSMartin Matuska 993eda14cbcSMatt Macy if (zcp->zc_data.zc_serd_checksum[0] == '\0') { 99415f0b8c3SMartin Matuska if (nvlist_lookup_uint64(nvl, 99515f0b8c3SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N, 99615f0b8c3SMartin Matuska &checksum_n) != 0) { 99715f0b8c3SMartin Matuska checksum_n = DEFAULT_CHECKSUM_N; 99815f0b8c3SMartin Matuska } 99915f0b8c3SMartin Matuska if (nvlist_lookup_uint64(nvl, 100015f0b8c3SMartin Matuska FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T, 100115f0b8c3SMartin Matuska &checksum_t) != 0) { 100215f0b8c3SMartin Matuska checksum_t = DEFAULT_CHECKSUM_T; 100315f0b8c3SMartin Matuska } 100415f0b8c3SMartin Matuska 1005eda14cbcSMatt Macy zfs_serd_name(zcp->zc_data.zc_serd_checksum, 1006eda14cbcSMatt Macy pool_guid, vdev_guid, "checksum"); 1007eda14cbcSMatt Macy fmd_serd_create(hdl, 1008eda14cbcSMatt Macy zcp->zc_data.zc_serd_checksum, 100915f0b8c3SMartin Matuska checksum_n, 101015f0b8c3SMartin Matuska SEC2NSEC(checksum_t)); 1011e92ffd9bSMartin Matuska zfs_case_serialize(zcp); 1012eda14cbcSMatt Macy } 1013*7a7741afSMartin Matuska if (zfs_fm_serd_record(hdl, 1014*7a7741afSMartin Matuska zcp->zc_data.zc_serd_checksum, ep, zcp, 1015*7a7741afSMartin Matuska "checksum")) { 1016eda14cbcSMatt Macy zfs_case_solve(hdl, zcp, 1017e92ffd9bSMartin Matuska "fault.fs.zfs.vdev.checksum"); 1018eda14cbcSMatt Macy } 1019eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 1020eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) && 1021eda14cbcSMatt Macy (nvlist_lookup_string(nvl, 1022eda14cbcSMatt Macy FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, &failmode) == 0) && 1023eda14cbcSMatt Macy failmode != NULL) { 1024eda14cbcSMatt Macy if (strncmp(failmode, FM_EREPORT_FAILMODE_CONTINUE, 1025eda14cbcSMatt Macy strlen(FM_EREPORT_FAILMODE_CONTINUE)) == 0) { 1026eda14cbcSMatt Macy zfs_case_solve(hdl, zcp, 1027e92ffd9bSMartin Matuska "fault.fs.zfs.io_failure_continue"); 1028eda14cbcSMatt Macy } else if (strncmp(failmode, FM_EREPORT_FAILMODE_WAIT, 1029eda14cbcSMatt Macy strlen(FM_EREPORT_FAILMODE_WAIT)) == 0) { 1030eda14cbcSMatt Macy zfs_case_solve(hdl, zcp, 1031e92ffd9bSMartin Matuska "fault.fs.zfs.io_failure_wait"); 1032eda14cbcSMatt Macy } 1033eda14cbcSMatt Macy } else if (fmd_nvl_class_match(hdl, nvl, 1034eda14cbcSMatt Macy ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) { 1035eda14cbcSMatt Macy #ifndef __linux__ 1036eda14cbcSMatt Macy /* This causes an unexpected fault diagnosis on linux */ 1037eda14cbcSMatt Macy checkremove = B_TRUE; 1038eda14cbcSMatt Macy #endif 1039eda14cbcSMatt Macy } 1040eda14cbcSMatt Macy 1041eda14cbcSMatt Macy /* 1042eda14cbcSMatt Macy * Because I/O errors may be due to device removal, we postpone 1043eda14cbcSMatt Macy * any diagnosis until we're sure that we aren't about to 1044eda14cbcSMatt Macy * receive a 'resource.fs.zfs.removed' event. 1045eda14cbcSMatt Macy */ 1046eda14cbcSMatt Macy if (checkremove) { 1047eda14cbcSMatt Macy if (zcp->zc_data.zc_has_remove_timer) 1048eda14cbcSMatt Macy fmd_timer_remove(hdl, zcp->zc_remove_timer); 1049eda14cbcSMatt Macy zcp->zc_remove_timer = fmd_timer_install(hdl, zcp, NULL, 1050eda14cbcSMatt Macy zfs_remove_timeout); 1051eda14cbcSMatt Macy if (!zcp->zc_data.zc_has_remove_timer) { 1052eda14cbcSMatt Macy zcp->zc_data.zc_has_remove_timer = 1; 1053e92ffd9bSMartin Matuska zfs_case_serialize(zcp); 1054eda14cbcSMatt Macy } 1055eda14cbcSMatt Macy } 1056eda14cbcSMatt Macy } 1057eda14cbcSMatt Macy } 1058eda14cbcSMatt Macy 1059eda14cbcSMatt Macy /* 1060eda14cbcSMatt Macy * The timeout is fired when we diagnosed an I/O error, and it was not due to 1061eda14cbcSMatt Macy * device removal (which would cause the timeout to be cancelled). 1062eda14cbcSMatt Macy */ 1063eda14cbcSMatt Macy static void 1064eda14cbcSMatt Macy zfs_fm_timeout(fmd_hdl_t *hdl, id_t id, void *data) 1065eda14cbcSMatt Macy { 1066eda14cbcSMatt Macy zfs_case_t *zcp = data; 1067eda14cbcSMatt Macy 1068eda14cbcSMatt Macy if (id == zcp->zc_remove_timer) 1069e92ffd9bSMartin Matuska zfs_case_solve(hdl, zcp, "fault.fs.zfs.vdev.io"); 1070eda14cbcSMatt Macy } 1071eda14cbcSMatt Macy 1072eda14cbcSMatt Macy /* 1073eda14cbcSMatt Macy * The specified case has been closed and any case-specific 1074eda14cbcSMatt Macy * data structures should be deallocated. 1075eda14cbcSMatt Macy */ 1076eda14cbcSMatt Macy static void 1077eda14cbcSMatt Macy zfs_fm_close(fmd_hdl_t *hdl, fmd_case_t *cs) 1078eda14cbcSMatt Macy { 1079eda14cbcSMatt Macy zfs_case_t *zcp = fmd_case_getspecific(hdl, cs); 1080eda14cbcSMatt Macy 1081eda14cbcSMatt Macy if (zcp->zc_data.zc_serd_checksum[0] != '\0') 1082eda14cbcSMatt Macy fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_checksum); 1083eda14cbcSMatt Macy if (zcp->zc_data.zc_serd_io[0] != '\0') 1084eda14cbcSMatt Macy fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_io); 1085e2257b31SMartin Matuska if (zcp->zc_data.zc_serd_slow_io[0] != '\0') 1086e2257b31SMartin Matuska fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_slow_io); 1087eda14cbcSMatt Macy if (zcp->zc_data.zc_has_remove_timer) 1088eda14cbcSMatt Macy fmd_timer_remove(hdl, zcp->zc_remove_timer); 1089eda14cbcSMatt Macy 1090eda14cbcSMatt Macy uu_list_remove(zfs_cases, zcp); 1091eda14cbcSMatt Macy uu_list_node_fini(zcp, &zcp->zc_node, zfs_case_pool); 1092eda14cbcSMatt Macy fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t)); 1093eda14cbcSMatt Macy } 1094eda14cbcSMatt Macy 1095eda14cbcSMatt Macy static const fmd_hdl_ops_t fmd_ops = { 1096eda14cbcSMatt Macy zfs_fm_recv, /* fmdo_recv */ 1097eda14cbcSMatt Macy zfs_fm_timeout, /* fmdo_timeout */ 1098eda14cbcSMatt Macy zfs_fm_close, /* fmdo_close */ 1099eda14cbcSMatt Macy NULL, /* fmdo_stats */ 1100e2257b31SMartin Matuska NULL, /* fmdo_gc */ 1101eda14cbcSMatt Macy }; 1102eda14cbcSMatt Macy 1103eda14cbcSMatt Macy static const fmd_prop_t fmd_props[] = { 1104eda14cbcSMatt Macy { NULL, 0, NULL } 1105eda14cbcSMatt Macy }; 1106eda14cbcSMatt Macy 1107eda14cbcSMatt Macy static const fmd_hdl_info_t fmd_info = { 1108eda14cbcSMatt Macy "ZFS Diagnosis Engine", "1.0", &fmd_ops, fmd_props 1109eda14cbcSMatt Macy }; 1110eda14cbcSMatt Macy 1111eda14cbcSMatt Macy void 1112eda14cbcSMatt Macy _zfs_diagnosis_init(fmd_hdl_t *hdl) 1113eda14cbcSMatt Macy { 1114eda14cbcSMatt Macy libzfs_handle_t *zhdl; 1115eda14cbcSMatt Macy 1116eda14cbcSMatt Macy if ((zhdl = libzfs_init()) == NULL) 1117eda14cbcSMatt Macy return; 1118eda14cbcSMatt Macy 1119eda14cbcSMatt Macy if ((zfs_case_pool = uu_list_pool_create("zfs_case_pool", 1120eda14cbcSMatt Macy sizeof (zfs_case_t), offsetof(zfs_case_t, zc_node), 1121eda14cbcSMatt Macy NULL, UU_LIST_POOL_DEBUG)) == NULL) { 1122eda14cbcSMatt Macy libzfs_fini(zhdl); 1123eda14cbcSMatt Macy return; 1124eda14cbcSMatt Macy } 1125eda14cbcSMatt Macy 1126eda14cbcSMatt Macy if ((zfs_cases = uu_list_create(zfs_case_pool, NULL, 1127eda14cbcSMatt Macy UU_LIST_DEBUG)) == NULL) { 1128eda14cbcSMatt Macy uu_list_pool_destroy(zfs_case_pool); 1129eda14cbcSMatt Macy libzfs_fini(zhdl); 1130eda14cbcSMatt Macy return; 1131eda14cbcSMatt Macy } 1132eda14cbcSMatt Macy 1133eda14cbcSMatt Macy if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 1134eda14cbcSMatt Macy uu_list_destroy(zfs_cases); 1135eda14cbcSMatt Macy uu_list_pool_destroy(zfs_case_pool); 1136eda14cbcSMatt Macy libzfs_fini(zhdl); 1137eda14cbcSMatt Macy return; 1138eda14cbcSMatt Macy } 1139eda14cbcSMatt Macy 1140eda14cbcSMatt Macy fmd_hdl_setspecific(hdl, zhdl); 1141eda14cbcSMatt Macy 1142eda14cbcSMatt Macy (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (zfs_stats) / 1143eda14cbcSMatt Macy sizeof (fmd_stat_t), (fmd_stat_t *)&zfs_stats); 1144eda14cbcSMatt Macy } 1145eda14cbcSMatt Macy 1146eda14cbcSMatt Macy void 1147eda14cbcSMatt Macy _zfs_diagnosis_fini(fmd_hdl_t *hdl) 1148eda14cbcSMatt Macy { 1149eda14cbcSMatt Macy zfs_case_t *zcp; 1150eda14cbcSMatt Macy uu_list_walk_t *walk; 1151eda14cbcSMatt Macy libzfs_handle_t *zhdl; 1152eda14cbcSMatt Macy 1153eda14cbcSMatt Macy /* 1154eda14cbcSMatt Macy * Remove all active cases. 1155eda14cbcSMatt Macy */ 1156eda14cbcSMatt Macy walk = uu_list_walk_start(zfs_cases, UU_WALK_ROBUST); 1157eda14cbcSMatt Macy while ((zcp = uu_list_walk_next(walk)) != NULL) { 1158eda14cbcSMatt Macy fmd_hdl_debug(hdl, "removing case ena %llu", 1159eda14cbcSMatt Macy (long long unsigned)zcp->zc_data.zc_ena); 1160eda14cbcSMatt Macy uu_list_remove(zfs_cases, zcp); 1161eda14cbcSMatt Macy uu_list_node_fini(zcp, &zcp->zc_node, zfs_case_pool); 1162eda14cbcSMatt Macy fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t)); 1163eda14cbcSMatt Macy } 1164eda14cbcSMatt Macy uu_list_walk_end(walk); 1165eda14cbcSMatt Macy 1166eda14cbcSMatt Macy uu_list_destroy(zfs_cases); 1167eda14cbcSMatt Macy uu_list_pool_destroy(zfs_case_pool); 1168eda14cbcSMatt Macy 1169eda14cbcSMatt Macy zhdl = fmd_hdl_getspecific(hdl); 1170eda14cbcSMatt Macy libzfs_fini(zhdl); 1171eda14cbcSMatt Macy } 1172