xref: /freebsd-src/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c (revision 7a7741af18d6c8a804cc643cb7ecda9d730c6aa6)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy 
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24eda14cbcSMatt Macy  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
25eda14cbcSMatt Macy  * Copyright (c) 2016, Intel Corporation.
26e2257b31SMartin Matuska  * Copyright (c) 2023, Klara Inc.
27eda14cbcSMatt Macy  */
28eda14cbcSMatt Macy 
29eda14cbcSMatt Macy #include <stddef.h>
30eda14cbcSMatt Macy #include <string.h>
31eda14cbcSMatt Macy #include <libuutil.h>
32eda14cbcSMatt Macy #include <libzfs.h>
33eda14cbcSMatt Macy #include <sys/types.h>
34eda14cbcSMatt Macy #include <sys/time.h>
35eda14cbcSMatt Macy #include <sys/fs/zfs.h>
36eda14cbcSMatt Macy #include <sys/fm/protocol.h>
37eda14cbcSMatt Macy #include <sys/fm/fs/zfs.h>
38e3aa18adSMartin Matuska #include <sys/zio.h>
39eda14cbcSMatt Macy 
40eda14cbcSMatt Macy #include "zfs_agents.h"
41eda14cbcSMatt Macy #include "fmd_api.h"
42eda14cbcSMatt Macy 
43eda14cbcSMatt Macy /*
4415f0b8c3SMartin Matuska  * Default values for the serd engine when processing checksum or io errors. The
4515f0b8c3SMartin Matuska  * semantics are N <events> in T <seconds>.
4615f0b8c3SMartin Matuska  */
4715f0b8c3SMartin Matuska #define	DEFAULT_CHECKSUM_N	10	/* events */
4815f0b8c3SMartin Matuska #define	DEFAULT_CHECKSUM_T	600	/* seconds */
4915f0b8c3SMartin Matuska #define	DEFAULT_IO_N		10	/* events */
5015f0b8c3SMartin Matuska #define	DEFAULT_IO_T		600	/* seconds */
51e2257b31SMartin Matuska #define	DEFAULT_SLOW_IO_N	10	/* events */
52e2257b31SMartin Matuska #define	DEFAULT_SLOW_IO_T	30	/* seconds */
53e2257b31SMartin Matuska 
54e2257b31SMartin Matuska #define	CASE_GC_TIMEOUT_SECS	43200	/* 12 hours */
5515f0b8c3SMartin Matuska 
5615f0b8c3SMartin Matuska /*
57e2257b31SMartin Matuska  * Our serd engines are named in the following format:
58e2257b31SMartin Matuska  *     'zfs_<pool_guid>_<vdev_guid>_{checksum,io,slow_io}'
59e2257b31SMartin Matuska  * This #define reserves enough space for two 64-bit hex values plus the
60e2257b31SMartin Matuska  * length of the longest string.
61eda14cbcSMatt Macy  */
62eda14cbcSMatt Macy #define	MAX_SERDLEN	(16 * 2 + sizeof ("zfs___checksum"))
63eda14cbcSMatt Macy 
64eda14cbcSMatt Macy /*
65eda14cbcSMatt Macy  * On-disk case structure.  This must maintain backwards compatibility with
66eda14cbcSMatt Macy  * previous versions of the DE.  By default, any members appended to the end
67eda14cbcSMatt Macy  * will be filled with zeros if they don't exist in a previous version.
68eda14cbcSMatt Macy  */
69eda14cbcSMatt Macy typedef struct zfs_case_data {
70eda14cbcSMatt Macy 	uint64_t	zc_version;
71eda14cbcSMatt Macy 	uint64_t	zc_ena;
72eda14cbcSMatt Macy 	uint64_t	zc_pool_guid;
73eda14cbcSMatt Macy 	uint64_t	zc_vdev_guid;
74*7a7741afSMartin Matuska 	uint64_t	zc_parent_guid;
75eda14cbcSMatt Macy 	int		zc_pool_state;
76eda14cbcSMatt Macy 	char		zc_serd_checksum[MAX_SERDLEN];
77eda14cbcSMatt Macy 	char		zc_serd_io[MAX_SERDLEN];
78e2257b31SMartin Matuska 	char		zc_serd_slow_io[MAX_SERDLEN];
79eda14cbcSMatt Macy 	int		zc_has_remove_timer;
80eda14cbcSMatt Macy } zfs_case_data_t;
81eda14cbcSMatt Macy 
82eda14cbcSMatt Macy /*
83eda14cbcSMatt Macy  * Time-of-day
84eda14cbcSMatt Macy  */
85eda14cbcSMatt Macy typedef struct er_timeval {
86eda14cbcSMatt Macy 	uint64_t	ertv_sec;
87eda14cbcSMatt Macy 	uint64_t	ertv_nsec;
88eda14cbcSMatt Macy } er_timeval_t;
89eda14cbcSMatt Macy 
90eda14cbcSMatt Macy /*
91eda14cbcSMatt Macy  * In-core case structure.
92eda14cbcSMatt Macy  */
93eda14cbcSMatt Macy typedef struct zfs_case {
94eda14cbcSMatt Macy 	boolean_t	zc_present;
95eda14cbcSMatt Macy 	uint32_t	zc_version;
96eda14cbcSMatt Macy 	zfs_case_data_t	zc_data;
97eda14cbcSMatt Macy 	fmd_case_t	*zc_case;
98eda14cbcSMatt Macy 	uu_list_node_t	zc_node;
99eda14cbcSMatt Macy 	id_t		zc_remove_timer;
100eda14cbcSMatt Macy 	char		*zc_fru;
101eda14cbcSMatt Macy 	er_timeval_t	zc_when;
102eda14cbcSMatt Macy } zfs_case_t;
103eda14cbcSMatt Macy 
104eda14cbcSMatt Macy #define	CASE_DATA			"data"
105eda14cbcSMatt Macy #define	CASE_FRU			"fru"
106eda14cbcSMatt Macy #define	CASE_DATA_VERSION_INITIAL	1
107eda14cbcSMatt Macy #define	CASE_DATA_VERSION_SERD		2
108eda14cbcSMatt Macy 
109eda14cbcSMatt Macy typedef struct zfs_de_stats {
110eda14cbcSMatt Macy 	fmd_stat_t	old_drops;
111eda14cbcSMatt Macy 	fmd_stat_t	dev_drops;
112eda14cbcSMatt Macy 	fmd_stat_t	vdev_drops;
113eda14cbcSMatt Macy 	fmd_stat_t	import_drops;
114eda14cbcSMatt Macy 	fmd_stat_t	resource_drops;
115eda14cbcSMatt Macy } zfs_de_stats_t;
116eda14cbcSMatt Macy 
117eda14cbcSMatt Macy zfs_de_stats_t zfs_stats = {
118eda14cbcSMatt Macy 	{ "old_drops", FMD_TYPE_UINT64, "ereports dropped (from before load)" },
119eda14cbcSMatt Macy 	{ "dev_drops", FMD_TYPE_UINT64, "ereports dropped (dev during open)"},
120eda14cbcSMatt Macy 	{ "vdev_drops", FMD_TYPE_UINT64, "ereports dropped (weird vdev types)"},
121eda14cbcSMatt Macy 	{ "import_drops", FMD_TYPE_UINT64, "ereports dropped (during import)" },
122eda14cbcSMatt Macy 	{ "resource_drops", FMD_TYPE_UINT64, "resource related ereports" }
123eda14cbcSMatt Macy };
124eda14cbcSMatt Macy 
125e2257b31SMartin Matuska /* wait 15 seconds after a removal */
126e2257b31SMartin Matuska static hrtime_t zfs_remove_timeout = SEC2NSEC(15);
127eda14cbcSMatt Macy 
128eda14cbcSMatt Macy uu_list_pool_t *zfs_case_pool;
129eda14cbcSMatt Macy uu_list_t *zfs_cases;
130eda14cbcSMatt Macy 
131eda14cbcSMatt Macy #define	ZFS_MAKE_RSRC(type)	\
132eda14cbcSMatt Macy     FM_RSRC_CLASS "." ZFS_ERROR_CLASS "." type
133eda14cbcSMatt Macy #define	ZFS_MAKE_EREPORT(type)	\
134eda14cbcSMatt Macy     FM_EREPORT_CLASS "." ZFS_ERROR_CLASS "." type
135eda14cbcSMatt Macy 
136e2257b31SMartin Matuska static void zfs_purge_cases(fmd_hdl_t *hdl);
137e2257b31SMartin Matuska 
138eda14cbcSMatt Macy /*
139eda14cbcSMatt Macy  * Write out the persistent representation of an active case.
140eda14cbcSMatt Macy  */
141eda14cbcSMatt Macy static void
142e92ffd9bSMartin Matuska zfs_case_serialize(zfs_case_t *zcp)
143eda14cbcSMatt Macy {
144eda14cbcSMatt Macy 	zcp->zc_data.zc_version = CASE_DATA_VERSION_SERD;
145eda14cbcSMatt Macy }
146eda14cbcSMatt Macy 
147eda14cbcSMatt Macy /*
148eda14cbcSMatt Macy  * Read back the persistent representation of an active case.
149eda14cbcSMatt Macy  */
150eda14cbcSMatt Macy static zfs_case_t *
151eda14cbcSMatt Macy zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp)
152eda14cbcSMatt Macy {
153eda14cbcSMatt Macy 	zfs_case_t *zcp;
154eda14cbcSMatt Macy 
155eda14cbcSMatt Macy 	zcp = fmd_hdl_zalloc(hdl, sizeof (zfs_case_t), FMD_SLEEP);
156eda14cbcSMatt Macy 	zcp->zc_case = cp;
157eda14cbcSMatt Macy 
158eda14cbcSMatt Macy 	fmd_buf_read(hdl, cp, CASE_DATA, &zcp->zc_data,
159eda14cbcSMatt Macy 	    sizeof (zcp->zc_data));
160eda14cbcSMatt Macy 
161eda14cbcSMatt Macy 	if (zcp->zc_data.zc_version > CASE_DATA_VERSION_SERD) {
162eda14cbcSMatt Macy 		fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
163eda14cbcSMatt Macy 		return (NULL);
164eda14cbcSMatt Macy 	}
165eda14cbcSMatt Macy 
166eda14cbcSMatt Macy 	/*
167eda14cbcSMatt Macy 	 * fmd_buf_read() will have already zeroed out the remainder of the
168eda14cbcSMatt Macy 	 * buffer, so we don't have to do anything special if the version
169eda14cbcSMatt Macy 	 * doesn't include the SERD engine name.
170eda14cbcSMatt Macy 	 */
171eda14cbcSMatt Macy 
172eda14cbcSMatt Macy 	if (zcp->zc_data.zc_has_remove_timer)
173eda14cbcSMatt Macy 		zcp->zc_remove_timer = fmd_timer_install(hdl, zcp,
174eda14cbcSMatt Macy 		    NULL, zfs_remove_timeout);
175eda14cbcSMatt Macy 
176eda14cbcSMatt Macy 	uu_list_node_init(zcp, &zcp->zc_node, zfs_case_pool);
177eda14cbcSMatt Macy 	(void) uu_list_insert_before(zfs_cases, NULL, zcp);
178eda14cbcSMatt Macy 
179eda14cbcSMatt Macy 	fmd_case_setspecific(hdl, cp, zcp);
180eda14cbcSMatt Macy 
181eda14cbcSMatt Macy 	return (zcp);
182eda14cbcSMatt Macy }
183eda14cbcSMatt Macy 
184eda14cbcSMatt Macy /*
185*7a7741afSMartin Matuska  * Return count of other unique SERD cases under same vdev parent
186e2257b31SMartin Matuska  */
187e2257b31SMartin Matuska static uint_t
188*7a7741afSMartin Matuska zfs_other_serd_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
189e2257b31SMartin Matuska {
190e2257b31SMartin Matuska 	zfs_case_t *zcp;
191e2257b31SMartin Matuska 	uint_t cases = 0;
192e2257b31SMartin Matuska 	static hrtime_t next_check = 0;
193e2257b31SMartin Matuska 
194e2257b31SMartin Matuska 	/*
195e2257b31SMartin Matuska 	 * Note that plumbing in some external GC would require adding locking,
196e2257b31SMartin Matuska 	 * since most of this module code is not thread safe and assumes there
197e2257b31SMartin Matuska 	 * is only one thread running against the module. So we perform GC here
198e2257b31SMartin Matuska 	 * inline periodically so that future delay induced faults will be
199e2257b31SMartin Matuska 	 * possible once the issue causing multiple vdev delays is resolved.
200e2257b31SMartin Matuska 	 */
201e2257b31SMartin Matuska 	if (gethrestime_sec() > next_check) {
202e2257b31SMartin Matuska 		/* Periodically purge old SERD entries and stale cases */
203e2257b31SMartin Matuska 		fmd_serd_gc(hdl);
204e2257b31SMartin Matuska 		zfs_purge_cases(hdl);
205e2257b31SMartin Matuska 		next_check = gethrestime_sec() + CASE_GC_TIMEOUT_SECS;
206e2257b31SMartin Matuska 	}
207e2257b31SMartin Matuska 
208e2257b31SMartin Matuska 	for (zcp = uu_list_first(zfs_cases); zcp != NULL;
209e2257b31SMartin Matuska 	    zcp = uu_list_next(zfs_cases, zcp)) {
210*7a7741afSMartin Matuska 		zfs_case_data_t *zcd = &zcp->zc_data;
211*7a7741afSMartin Matuska 
212*7a7741afSMartin Matuska 		/*
213*7a7741afSMartin Matuska 		 * must be same pool and parent vdev but different leaf vdev
214*7a7741afSMartin Matuska 		 */
215*7a7741afSMartin Matuska 		if (zcd->zc_pool_guid != zfs_case->zc_pool_guid ||
216*7a7741afSMartin Matuska 		    zcd->zc_parent_guid != zfs_case->zc_parent_guid ||
217*7a7741afSMartin Matuska 		    zcd->zc_vdev_guid == zfs_case->zc_vdev_guid) {
218*7a7741afSMartin Matuska 			continue;
219*7a7741afSMartin Matuska 		}
220*7a7741afSMartin Matuska 
221*7a7741afSMartin Matuska 		/*
222*7a7741afSMartin Matuska 		 * Check if there is another active serd case besides zfs_case
223*7a7741afSMartin Matuska 		 *
224*7a7741afSMartin Matuska 		 * Only one serd engine will be assigned to the case
225*7a7741afSMartin Matuska 		 */
226*7a7741afSMartin Matuska 		if (zcd->zc_serd_checksum[0] == zfs_case->zc_serd_checksum[0] &&
227*7a7741afSMartin Matuska 		    fmd_serd_active(hdl, zcd->zc_serd_checksum)) {
228*7a7741afSMartin Matuska 			cases++;
229*7a7741afSMartin Matuska 		}
230*7a7741afSMartin Matuska 		if (zcd->zc_serd_io[0] == zfs_case->zc_serd_io[0] &&
231*7a7741afSMartin Matuska 		    fmd_serd_active(hdl, zcd->zc_serd_io)) {
232*7a7741afSMartin Matuska 			cases++;
233*7a7741afSMartin Matuska 		}
234*7a7741afSMartin Matuska 		if (zcd->zc_serd_slow_io[0] == zfs_case->zc_serd_slow_io[0] &&
235*7a7741afSMartin Matuska 		    fmd_serd_active(hdl, zcd->zc_serd_slow_io)) {
236e2257b31SMartin Matuska 			cases++;
237e2257b31SMartin Matuska 		}
238e2257b31SMartin Matuska 	}
239e2257b31SMartin Matuska 	return (cases);
240e2257b31SMartin Matuska }
241e2257b31SMartin Matuska 
242e2257b31SMartin Matuska /*
243eda14cbcSMatt Macy  * Iterate over any active cases.  If any cases are associated with a pool or
244eda14cbcSMatt Macy  * vdev which is no longer present on the system, close the associated case.
245eda14cbcSMatt Macy  */
246eda14cbcSMatt Macy static void
247eda14cbcSMatt Macy zfs_mark_vdev(uint64_t pool_guid, nvlist_t *vd, er_timeval_t *loaded)
248eda14cbcSMatt Macy {
249eda14cbcSMatt Macy 	uint64_t vdev_guid = 0;
250eda14cbcSMatt Macy 	uint_t c, children;
251eda14cbcSMatt Macy 	nvlist_t **child;
252eda14cbcSMatt Macy 	zfs_case_t *zcp;
253eda14cbcSMatt Macy 
254eda14cbcSMatt Macy 	(void) nvlist_lookup_uint64(vd, ZPOOL_CONFIG_GUID, &vdev_guid);
255eda14cbcSMatt Macy 
256eda14cbcSMatt Macy 	/*
257eda14cbcSMatt Macy 	 * Mark any cases associated with this (pool, vdev) pair.
258eda14cbcSMatt Macy 	 */
259eda14cbcSMatt Macy 	for (zcp = uu_list_first(zfs_cases); zcp != NULL;
260eda14cbcSMatt Macy 	    zcp = uu_list_next(zfs_cases, zcp)) {
261eda14cbcSMatt Macy 		if (zcp->zc_data.zc_pool_guid == pool_guid &&
262eda14cbcSMatt Macy 		    zcp->zc_data.zc_vdev_guid == vdev_guid) {
263eda14cbcSMatt Macy 			zcp->zc_present = B_TRUE;
264eda14cbcSMatt Macy 			zcp->zc_when = *loaded;
265eda14cbcSMatt Macy 		}
266eda14cbcSMatt Macy 	}
267eda14cbcSMatt Macy 
268eda14cbcSMatt Macy 	/*
269eda14cbcSMatt Macy 	 * Iterate over all children.
270eda14cbcSMatt Macy 	 */
271eda14cbcSMatt Macy 	if (nvlist_lookup_nvlist_array(vd, ZPOOL_CONFIG_CHILDREN, &child,
272eda14cbcSMatt Macy 	    &children) == 0) {
273eda14cbcSMatt Macy 		for (c = 0; c < children; c++)
274eda14cbcSMatt Macy 			zfs_mark_vdev(pool_guid, child[c], loaded);
275eda14cbcSMatt Macy 	}
276eda14cbcSMatt Macy 
277eda14cbcSMatt Macy 	if (nvlist_lookup_nvlist_array(vd, ZPOOL_CONFIG_L2CACHE, &child,
278eda14cbcSMatt Macy 	    &children) == 0) {
279eda14cbcSMatt Macy 		for (c = 0; c < children; c++)
280eda14cbcSMatt Macy 			zfs_mark_vdev(pool_guid, child[c], loaded);
281eda14cbcSMatt Macy 	}
282eda14cbcSMatt Macy 
283eda14cbcSMatt Macy 	if (nvlist_lookup_nvlist_array(vd, ZPOOL_CONFIG_SPARES, &child,
284eda14cbcSMatt Macy 	    &children) == 0) {
285eda14cbcSMatt Macy 		for (c = 0; c < children; c++)
286eda14cbcSMatt Macy 			zfs_mark_vdev(pool_guid, child[c], loaded);
287eda14cbcSMatt Macy 	}
288eda14cbcSMatt Macy }
289eda14cbcSMatt Macy 
290eda14cbcSMatt Macy static int
291eda14cbcSMatt Macy zfs_mark_pool(zpool_handle_t *zhp, void *unused)
292eda14cbcSMatt Macy {
293e92ffd9bSMartin Matuska 	(void) unused;
294eda14cbcSMatt Macy 	zfs_case_t *zcp;
295eda14cbcSMatt Macy 	uint64_t pool_guid;
296eda14cbcSMatt Macy 	uint64_t *tod;
297eda14cbcSMatt Macy 	er_timeval_t loaded = { 0 };
298eda14cbcSMatt Macy 	nvlist_t *config, *vd;
299eda14cbcSMatt Macy 	uint_t nelem = 0;
300eda14cbcSMatt Macy 	int ret;
301eda14cbcSMatt Macy 
302eda14cbcSMatt Macy 	pool_guid = zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL);
303eda14cbcSMatt Macy 	/*
304eda14cbcSMatt Macy 	 * Mark any cases associated with just this pool.
305eda14cbcSMatt Macy 	 */
306eda14cbcSMatt Macy 	for (zcp = uu_list_first(zfs_cases); zcp != NULL;
307eda14cbcSMatt Macy 	    zcp = uu_list_next(zfs_cases, zcp)) {
308eda14cbcSMatt Macy 		if (zcp->zc_data.zc_pool_guid == pool_guid &&
309eda14cbcSMatt Macy 		    zcp->zc_data.zc_vdev_guid == 0)
310eda14cbcSMatt Macy 			zcp->zc_present = B_TRUE;
311eda14cbcSMatt Macy 	}
312eda14cbcSMatt Macy 
313eda14cbcSMatt Macy 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
314eda14cbcSMatt Macy 		zpool_close(zhp);
315eda14cbcSMatt Macy 		return (-1);
316eda14cbcSMatt Macy 	}
317eda14cbcSMatt Macy 
318eda14cbcSMatt Macy 	(void) nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_LOADED_TIME,
319eda14cbcSMatt Macy 	    &tod, &nelem);
320eda14cbcSMatt Macy 	if (nelem == 2) {
321eda14cbcSMatt Macy 		loaded.ertv_sec = tod[0];
322eda14cbcSMatt Macy 		loaded.ertv_nsec = tod[1];
323eda14cbcSMatt Macy 		for (zcp = uu_list_first(zfs_cases); zcp != NULL;
324eda14cbcSMatt Macy 		    zcp = uu_list_next(zfs_cases, zcp)) {
325eda14cbcSMatt Macy 			if (zcp->zc_data.zc_pool_guid == pool_guid &&
326eda14cbcSMatt Macy 			    zcp->zc_data.zc_vdev_guid == 0) {
327eda14cbcSMatt Macy 				zcp->zc_when = loaded;
328eda14cbcSMatt Macy 			}
329eda14cbcSMatt Macy 		}
330eda14cbcSMatt Macy 	}
331eda14cbcSMatt Macy 
332eda14cbcSMatt Macy 	ret = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vd);
333eda14cbcSMatt Macy 	if (ret) {
334eda14cbcSMatt Macy 		zpool_close(zhp);
335eda14cbcSMatt Macy 		return (-1);
336eda14cbcSMatt Macy 	}
337eda14cbcSMatt Macy 
338eda14cbcSMatt Macy 	zfs_mark_vdev(pool_guid, vd, &loaded);
339eda14cbcSMatt Macy 
340eda14cbcSMatt Macy 	zpool_close(zhp);
341eda14cbcSMatt Macy 
342eda14cbcSMatt Macy 	return (0);
343eda14cbcSMatt Macy }
344eda14cbcSMatt Macy 
345eda14cbcSMatt Macy struct load_time_arg {
346eda14cbcSMatt Macy 	uint64_t lt_guid;
347eda14cbcSMatt Macy 	er_timeval_t *lt_time;
348eda14cbcSMatt Macy 	boolean_t lt_found;
349eda14cbcSMatt Macy };
350eda14cbcSMatt Macy 
351eda14cbcSMatt Macy static int
352eda14cbcSMatt Macy zpool_find_load_time(zpool_handle_t *zhp, void *arg)
353eda14cbcSMatt Macy {
354eda14cbcSMatt Macy 	struct load_time_arg *lta = arg;
355eda14cbcSMatt Macy 	uint64_t pool_guid;
356eda14cbcSMatt Macy 	uint64_t *tod;
357eda14cbcSMatt Macy 	nvlist_t *config;
358eda14cbcSMatt Macy 	uint_t nelem;
359eda14cbcSMatt Macy 
360eda14cbcSMatt Macy 	if (lta->lt_found) {
361eda14cbcSMatt Macy 		zpool_close(zhp);
362eda14cbcSMatt Macy 		return (0);
363eda14cbcSMatt Macy 	}
364eda14cbcSMatt Macy 
365eda14cbcSMatt Macy 	pool_guid = zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL);
366eda14cbcSMatt Macy 	if (pool_guid != lta->lt_guid) {
367eda14cbcSMatt Macy 		zpool_close(zhp);
368eda14cbcSMatt Macy 		return (0);
369eda14cbcSMatt Macy 	}
370eda14cbcSMatt Macy 
371eda14cbcSMatt Macy 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
372eda14cbcSMatt Macy 		zpool_close(zhp);
373eda14cbcSMatt Macy 		return (-1);
374eda14cbcSMatt Macy 	}
375eda14cbcSMatt Macy 
376eda14cbcSMatt Macy 	if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_LOADED_TIME,
377eda14cbcSMatt Macy 	    &tod, &nelem) == 0 && nelem == 2) {
378eda14cbcSMatt Macy 		lta->lt_found = B_TRUE;
379eda14cbcSMatt Macy 		lta->lt_time->ertv_sec = tod[0];
380eda14cbcSMatt Macy 		lta->lt_time->ertv_nsec = tod[1];
381eda14cbcSMatt Macy 	}
382eda14cbcSMatt Macy 
383eda14cbcSMatt Macy 	zpool_close(zhp);
384eda14cbcSMatt Macy 
385eda14cbcSMatt Macy 	return (0);
386eda14cbcSMatt Macy }
387eda14cbcSMatt Macy 
388eda14cbcSMatt Macy static void
389eda14cbcSMatt Macy zfs_purge_cases(fmd_hdl_t *hdl)
390eda14cbcSMatt Macy {
391eda14cbcSMatt Macy 	zfs_case_t *zcp;
392eda14cbcSMatt Macy 	uu_list_walk_t *walk;
393eda14cbcSMatt Macy 	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
394eda14cbcSMatt Macy 
395eda14cbcSMatt Macy 	/*
396eda14cbcSMatt Macy 	 * There is no way to open a pool by GUID, or lookup a vdev by GUID.  No
397eda14cbcSMatt Macy 	 * matter what we do, we're going to have to stomach an O(vdevs * cases)
398eda14cbcSMatt Macy 	 * algorithm.  In reality, both quantities are likely so small that
399eda14cbcSMatt Macy 	 * neither will matter. Given that iterating over pools is more
400eda14cbcSMatt Macy 	 * expensive than iterating over the in-memory case list, we opt for a
401eda14cbcSMatt Macy 	 * 'present' flag in each case that starts off cleared.  We then iterate
402eda14cbcSMatt Macy 	 * over all pools, marking those that are still present, and removing
403eda14cbcSMatt Macy 	 * those that aren't found.
404eda14cbcSMatt Macy 	 *
405eda14cbcSMatt Macy 	 * Note that we could also construct an FMRI and rely on
406eda14cbcSMatt Macy 	 * fmd_nvl_fmri_present(), but this would end up doing the same search.
407eda14cbcSMatt Macy 	 */
408eda14cbcSMatt Macy 
409eda14cbcSMatt Macy 	/*
410eda14cbcSMatt Macy 	 * Mark the cases as not present.
411eda14cbcSMatt Macy 	 */
412eda14cbcSMatt Macy 	for (zcp = uu_list_first(zfs_cases); zcp != NULL;
413eda14cbcSMatt Macy 	    zcp = uu_list_next(zfs_cases, zcp))
414eda14cbcSMatt Macy 		zcp->zc_present = B_FALSE;
415eda14cbcSMatt Macy 
416eda14cbcSMatt Macy 	/*
417eda14cbcSMatt Macy 	 * Iterate over all pools and mark the pools and vdevs found.  If this
418eda14cbcSMatt Macy 	 * fails (most probably because we're out of memory), then don't close
419eda14cbcSMatt Macy 	 * any of the cases and we cannot be sure they are accurate.
420eda14cbcSMatt Macy 	 */
421eda14cbcSMatt Macy 	if (zpool_iter(zhdl, zfs_mark_pool, NULL) != 0)
422eda14cbcSMatt Macy 		return;
423eda14cbcSMatt Macy 
424eda14cbcSMatt Macy 	/*
425eda14cbcSMatt Macy 	 * Remove those cases which were not found.
426eda14cbcSMatt Macy 	 */
427eda14cbcSMatt Macy 	walk = uu_list_walk_start(zfs_cases, UU_WALK_ROBUST);
428eda14cbcSMatt Macy 	while ((zcp = uu_list_walk_next(walk)) != NULL) {
429eda14cbcSMatt Macy 		if (!zcp->zc_present)
430eda14cbcSMatt Macy 			fmd_case_close(hdl, zcp->zc_case);
431eda14cbcSMatt Macy 	}
432eda14cbcSMatt Macy 	uu_list_walk_end(walk);
433eda14cbcSMatt Macy }
434eda14cbcSMatt Macy 
435eda14cbcSMatt Macy /*
436eda14cbcSMatt Macy  * Construct the name of a serd engine given the pool/vdev GUID and type (io or
437eda14cbcSMatt Macy  * checksum).
438eda14cbcSMatt Macy  */
439eda14cbcSMatt Macy static void
440eda14cbcSMatt Macy zfs_serd_name(char *buf, uint64_t pool_guid, uint64_t vdev_guid,
441eda14cbcSMatt Macy     const char *type)
442eda14cbcSMatt Macy {
443eda14cbcSMatt Macy 	(void) snprintf(buf, MAX_SERDLEN, "zfs_%llx_%llx_%s",
444eda14cbcSMatt Macy 	    (long long unsigned int)pool_guid,
445eda14cbcSMatt Macy 	    (long long unsigned int)vdev_guid, type);
446eda14cbcSMatt Macy }
447eda14cbcSMatt Macy 
448e2257b31SMartin Matuska static void
449e2257b31SMartin Matuska zfs_case_retire(fmd_hdl_t *hdl, zfs_case_t *zcp)
450e2257b31SMartin Matuska {
451e2257b31SMartin Matuska 	fmd_hdl_debug(hdl, "retiring case");
452e2257b31SMartin Matuska 
453e2257b31SMartin Matuska 	fmd_case_close(hdl, zcp->zc_case);
454e2257b31SMartin Matuska }
455e2257b31SMartin Matuska 
456eda14cbcSMatt Macy /*
457eda14cbcSMatt Macy  * Solve a given ZFS case.  This first checks to make sure the diagnosis is
458eda14cbcSMatt Macy  * still valid, as well as cleaning up any pending timer associated with the
459eda14cbcSMatt Macy  * case.
460eda14cbcSMatt Macy  */
461eda14cbcSMatt Macy static void
462e92ffd9bSMartin Matuska zfs_case_solve(fmd_hdl_t *hdl, zfs_case_t *zcp, const char *faultname)
463eda14cbcSMatt Macy {
464eda14cbcSMatt Macy 	nvlist_t *detector, *fault;
465eda14cbcSMatt Macy 	boolean_t serialize;
466eda14cbcSMatt Macy 	nvlist_t *fru = NULL;
467eda14cbcSMatt Macy 	fmd_hdl_debug(hdl, "solving fault '%s'", faultname);
468eda14cbcSMatt Macy 
469eda14cbcSMatt Macy 	/*
470eda14cbcSMatt Macy 	 * Construct the detector from the case data.  The detector is in the
471eda14cbcSMatt Macy 	 * ZFS scheme, and is either the pool or the vdev, depending on whether
472eda14cbcSMatt Macy 	 * this is a vdev or pool fault.
473eda14cbcSMatt Macy 	 */
474eda14cbcSMatt Macy 	detector = fmd_nvl_alloc(hdl, FMD_SLEEP);
475eda14cbcSMatt Macy 
476eda14cbcSMatt Macy 	(void) nvlist_add_uint8(detector, FM_VERSION, ZFS_SCHEME_VERSION0);
477eda14cbcSMatt Macy 	(void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS);
478eda14cbcSMatt Macy 	(void) nvlist_add_uint64(detector, FM_FMRI_ZFS_POOL,
479eda14cbcSMatt Macy 	    zcp->zc_data.zc_pool_guid);
480eda14cbcSMatt Macy 	if (zcp->zc_data.zc_vdev_guid != 0) {
481eda14cbcSMatt Macy 		(void) nvlist_add_uint64(detector, FM_FMRI_ZFS_VDEV,
482eda14cbcSMatt Macy 		    zcp->zc_data.zc_vdev_guid);
483eda14cbcSMatt Macy 	}
484eda14cbcSMatt Macy 
485eda14cbcSMatt Macy 	fault = fmd_nvl_create_fault(hdl, faultname, 100, detector,
486eda14cbcSMatt Macy 	    fru, detector);
487eda14cbcSMatt Macy 	fmd_case_add_suspect(hdl, zcp->zc_case, fault);
488eda14cbcSMatt Macy 
489eda14cbcSMatt Macy 	nvlist_free(fru);
490eda14cbcSMatt Macy 
491eda14cbcSMatt Macy 	fmd_case_solve(hdl, zcp->zc_case);
492eda14cbcSMatt Macy 
493eda14cbcSMatt Macy 	serialize = B_FALSE;
494eda14cbcSMatt Macy 	if (zcp->zc_data.zc_has_remove_timer) {
495eda14cbcSMatt Macy 		fmd_timer_remove(hdl, zcp->zc_remove_timer);
496eda14cbcSMatt Macy 		zcp->zc_data.zc_has_remove_timer = 0;
497eda14cbcSMatt Macy 		serialize = B_TRUE;
498eda14cbcSMatt Macy 	}
499eda14cbcSMatt Macy 	if (serialize)
500e92ffd9bSMartin Matuska 		zfs_case_serialize(zcp);
501eda14cbcSMatt Macy 
502eda14cbcSMatt Macy 	nvlist_free(detector);
503eda14cbcSMatt Macy }
504eda14cbcSMatt Macy 
505eda14cbcSMatt Macy static boolean_t
506eda14cbcSMatt Macy timeval_earlier(er_timeval_t *a, er_timeval_t *b)
507eda14cbcSMatt Macy {
508eda14cbcSMatt Macy 	return (a->ertv_sec < b->ertv_sec ||
509eda14cbcSMatt Macy 	    (a->ertv_sec == b->ertv_sec && a->ertv_nsec < b->ertv_nsec));
510eda14cbcSMatt Macy }
511eda14cbcSMatt Macy 
512eda14cbcSMatt Macy static void
513eda14cbcSMatt Macy zfs_ereport_when(fmd_hdl_t *hdl, nvlist_t *nvl, er_timeval_t *when)
514eda14cbcSMatt Macy {
515e92ffd9bSMartin Matuska 	(void) hdl;
516eda14cbcSMatt Macy 	int64_t *tod;
517eda14cbcSMatt Macy 	uint_t	nelem;
518eda14cbcSMatt Macy 
519eda14cbcSMatt Macy 	if (nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tod,
520eda14cbcSMatt Macy 	    &nelem) == 0 && nelem == 2) {
521eda14cbcSMatt Macy 		when->ertv_sec = tod[0];
522eda14cbcSMatt Macy 		when->ertv_nsec = tod[1];
523eda14cbcSMatt Macy 	} else {
524eda14cbcSMatt Macy 		when->ertv_sec = when->ertv_nsec = UINT64_MAX;
525eda14cbcSMatt Macy 	}
526eda14cbcSMatt Macy }
527eda14cbcSMatt Macy 
528eda14cbcSMatt Macy /*
529*7a7741afSMartin Matuska  * Record the specified event in the SERD engine and return a
530*7a7741afSMartin Matuska  * boolean value indicating whether or not the engine fired as
531*7a7741afSMartin Matuska  * the result of inserting this event.
532*7a7741afSMartin Matuska  *
533*7a7741afSMartin Matuska  * When the pool has similar active cases on other vdevs, then
534*7a7741afSMartin Matuska  * the fired state is disregarded and the case is retired.
535*7a7741afSMartin Matuska  */
536*7a7741afSMartin Matuska static int
537*7a7741afSMartin Matuska zfs_fm_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep,
538*7a7741afSMartin Matuska     zfs_case_t *zcp, const char *err_type)
539*7a7741afSMartin Matuska {
540*7a7741afSMartin Matuska 	int fired = fmd_serd_record(hdl, name, ep);
541*7a7741afSMartin Matuska 	int peers = 0;
542*7a7741afSMartin Matuska 
543*7a7741afSMartin Matuska 	if (fired && (peers = zfs_other_serd_cases(hdl, &zcp->zc_data)) > 0) {
544*7a7741afSMartin Matuska 		fmd_hdl_debug(hdl, "pool %llu is tracking %d other %s cases "
545*7a7741afSMartin Matuska 		    "-- skip faulting the vdev %llu",
546*7a7741afSMartin Matuska 		    (u_longlong_t)zcp->zc_data.zc_pool_guid,
547*7a7741afSMartin Matuska 		    peers, err_type,
548*7a7741afSMartin Matuska 		    (u_longlong_t)zcp->zc_data.zc_vdev_guid);
549*7a7741afSMartin Matuska 		zfs_case_retire(hdl, zcp);
550*7a7741afSMartin Matuska 		fired = 0;
551*7a7741afSMartin Matuska 	}
552*7a7741afSMartin Matuska 
553*7a7741afSMartin Matuska 	return (fired);
554*7a7741afSMartin Matuska }
555*7a7741afSMartin Matuska 
556*7a7741afSMartin Matuska /*
557eda14cbcSMatt Macy  * Main fmd entry point.
558eda14cbcSMatt Macy  */
559eda14cbcSMatt Macy static void
560eda14cbcSMatt Macy zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
561eda14cbcSMatt Macy {
562eda14cbcSMatt Macy 	zfs_case_t *zcp, *dcp;
563eda14cbcSMatt Macy 	int32_t pool_state;
564*7a7741afSMartin Matuska 	uint64_t ena, pool_guid, vdev_guid, parent_guid;
56515f0b8c3SMartin Matuska 	uint64_t checksum_n, checksum_t;
56615f0b8c3SMartin Matuska 	uint64_t io_n, io_t;
567eda14cbcSMatt Macy 	er_timeval_t pool_load;
568eda14cbcSMatt Macy 	er_timeval_t er_when;
569eda14cbcSMatt Macy 	nvlist_t *detector;
570eda14cbcSMatt Macy 	boolean_t pool_found = B_FALSE;
571eda14cbcSMatt Macy 	boolean_t isresource;
5722a58b312SMartin Matuska 	const char *type;
573eda14cbcSMatt Macy 
574eda14cbcSMatt Macy 	/*
575eda14cbcSMatt Macy 	 * We subscribe to notifications for vdev or pool removal.  In these
576eda14cbcSMatt Macy 	 * cases, there may be cases that no longer apply.  Purge any cases
577eda14cbcSMatt Macy 	 * that no longer apply.
578eda14cbcSMatt Macy 	 */
579eda14cbcSMatt Macy 	if (fmd_nvl_class_match(hdl, nvl, "sysevent.fs.zfs.*")) {
580eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "purging orphaned cases from %s",
581eda14cbcSMatt Macy 		    strrchr(class, '.') + 1);
582eda14cbcSMatt Macy 		zfs_purge_cases(hdl);
583eda14cbcSMatt Macy 		zfs_stats.resource_drops.fmds_value.ui64++;
584eda14cbcSMatt Macy 		return;
585eda14cbcSMatt Macy 	}
586eda14cbcSMatt Macy 
587eda14cbcSMatt Macy 	isresource = fmd_nvl_class_match(hdl, nvl, "resource.fs.zfs.*");
588eda14cbcSMatt Macy 
589eda14cbcSMatt Macy 	if (isresource) {
590eda14cbcSMatt Macy 		/*
591eda14cbcSMatt Macy 		 * For resources, we don't have a normal payload.
592eda14cbcSMatt Macy 		 */
593eda14cbcSMatt Macy 		if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
594eda14cbcSMatt Macy 		    &vdev_guid) != 0)
595eda14cbcSMatt Macy 			pool_state = SPA_LOAD_OPEN;
596eda14cbcSMatt Macy 		else
597eda14cbcSMatt Macy 			pool_state = SPA_LOAD_NONE;
598eda14cbcSMatt Macy 		detector = NULL;
599eda14cbcSMatt Macy 	} else {
600eda14cbcSMatt Macy 		(void) nvlist_lookup_nvlist(nvl,
601eda14cbcSMatt Macy 		    FM_EREPORT_DETECTOR, &detector);
602eda14cbcSMatt Macy 		(void) nvlist_lookup_int32(nvl,
603eda14cbcSMatt Macy 		    FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, &pool_state);
604eda14cbcSMatt Macy 	}
605eda14cbcSMatt Macy 
606eda14cbcSMatt Macy 	/*
607eda14cbcSMatt Macy 	 * We also ignore all ereports generated during an import of a pool,
608eda14cbcSMatt Macy 	 * since the only possible fault (.pool) would result in import failure,
609eda14cbcSMatt Macy 	 * and hence no persistent fault.  Some day we may want to do something
610eda14cbcSMatt Macy 	 * with these ereports, so we continue generating them internally.
611eda14cbcSMatt Macy 	 */
612eda14cbcSMatt Macy 	if (pool_state == SPA_LOAD_IMPORT) {
613eda14cbcSMatt Macy 		zfs_stats.import_drops.fmds_value.ui64++;
614eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "ignoring '%s' during import", class);
615eda14cbcSMatt Macy 		return;
616eda14cbcSMatt Macy 	}
617eda14cbcSMatt Macy 
618eda14cbcSMatt Macy 	/*
619eda14cbcSMatt Macy 	 * Device I/O errors are ignored during pool open.
620eda14cbcSMatt Macy 	 */
621eda14cbcSMatt Macy 	if (pool_state == SPA_LOAD_OPEN &&
622eda14cbcSMatt Macy 	    (fmd_nvl_class_match(hdl, nvl,
623eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM)) ||
624eda14cbcSMatt Macy 	    fmd_nvl_class_match(hdl, nvl,
625eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO)) ||
626eda14cbcSMatt Macy 	    fmd_nvl_class_match(hdl, nvl,
627eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE)))) {
628eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "ignoring '%s' during pool open", class);
629eda14cbcSMatt Macy 		zfs_stats.dev_drops.fmds_value.ui64++;
630eda14cbcSMatt Macy 		return;
631eda14cbcSMatt Macy 	}
632eda14cbcSMatt Macy 
633eda14cbcSMatt Macy 	/*
634eda14cbcSMatt Macy 	 * We ignore ereports for anything except disks and files.
635eda14cbcSMatt Macy 	 */
636eda14cbcSMatt Macy 	if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
637eda14cbcSMatt Macy 	    &type) == 0) {
638eda14cbcSMatt Macy 		if (strcmp(type, VDEV_TYPE_DISK) != 0 &&
639eda14cbcSMatt Macy 		    strcmp(type, VDEV_TYPE_FILE) != 0) {
640eda14cbcSMatt Macy 			zfs_stats.vdev_drops.fmds_value.ui64++;
641eda14cbcSMatt Macy 			return;
642eda14cbcSMatt Macy 		}
643eda14cbcSMatt Macy 	}
644eda14cbcSMatt Macy 
645eda14cbcSMatt Macy 	/*
646eda14cbcSMatt Macy 	 * Determine if this ereport corresponds to an open case.
647eda14cbcSMatt Macy 	 * Each vdev or pool can have a single case.
648eda14cbcSMatt Macy 	 */
649eda14cbcSMatt Macy 	(void) nvlist_lookup_uint64(nvl,
650eda14cbcSMatt Macy 	    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid);
651eda14cbcSMatt Macy 	if (nvlist_lookup_uint64(nvl,
652eda14cbcSMatt Macy 	    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
653eda14cbcSMatt Macy 		vdev_guid = 0;
654*7a7741afSMartin Matuska 	if (nvlist_lookup_uint64(nvl,
655*7a7741afSMartin Matuska 	    FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, &parent_guid) != 0)
656*7a7741afSMartin Matuska 		parent_guid = 0;
657eda14cbcSMatt Macy 	if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) != 0)
658eda14cbcSMatt Macy 		ena = 0;
659eda14cbcSMatt Macy 
660eda14cbcSMatt Macy 	zfs_ereport_when(hdl, nvl, &er_when);
661eda14cbcSMatt Macy 
662eda14cbcSMatt Macy 	for (zcp = uu_list_first(zfs_cases); zcp != NULL;
663eda14cbcSMatt Macy 	    zcp = uu_list_next(zfs_cases, zcp)) {
664eda14cbcSMatt Macy 		if (zcp->zc_data.zc_pool_guid == pool_guid) {
665eda14cbcSMatt Macy 			pool_found = B_TRUE;
666eda14cbcSMatt Macy 			pool_load = zcp->zc_when;
667eda14cbcSMatt Macy 		}
668eda14cbcSMatt Macy 		if (zcp->zc_data.zc_vdev_guid == vdev_guid)
669eda14cbcSMatt Macy 			break;
670eda14cbcSMatt Macy 	}
671eda14cbcSMatt Macy 
672eda14cbcSMatt Macy 	/*
673eda14cbcSMatt Macy 	 * Avoid falsely accusing a pool of being faulty.  Do so by
674eda14cbcSMatt Macy 	 * not replaying ereports that were generated prior to the
675eda14cbcSMatt Macy 	 * current import.  If the failure that generated them was
676eda14cbcSMatt Macy 	 * transient because the device was actually removed but we
677eda14cbcSMatt Macy 	 * didn't receive the normal asynchronous notification, we
678eda14cbcSMatt Macy 	 * don't want to mark it as faulted and potentially panic. If
679eda14cbcSMatt Macy 	 * there is still a problem we'd expect not to be able to
680eda14cbcSMatt Macy 	 * import the pool, or that new ereports will be generated
681eda14cbcSMatt Macy 	 * once the pool is used.
682eda14cbcSMatt Macy 	 */
683eda14cbcSMatt Macy 	if (pool_found && timeval_earlier(&er_when, &pool_load)) {
684eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "ignoring pool %llx, "
685eda14cbcSMatt Macy 		    "ereport time %lld.%lld, pool load time = %lld.%lld",
686eda14cbcSMatt Macy 		    pool_guid, er_when.ertv_sec, er_when.ertv_nsec,
687eda14cbcSMatt Macy 		    pool_load.ertv_sec, pool_load.ertv_nsec);
688eda14cbcSMatt Macy 		zfs_stats.old_drops.fmds_value.ui64++;
689eda14cbcSMatt Macy 		return;
690eda14cbcSMatt Macy 	}
691eda14cbcSMatt Macy 
692eda14cbcSMatt Macy 	if (!pool_found) {
693eda14cbcSMatt Macy 		/*
694eda14cbcSMatt Macy 		 * Haven't yet seen this pool, but same situation
695eda14cbcSMatt Macy 		 * may apply.
696eda14cbcSMatt Macy 		 */
697eda14cbcSMatt Macy 		libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
698eda14cbcSMatt Macy 		struct load_time_arg la;
699eda14cbcSMatt Macy 
700eda14cbcSMatt Macy 		la.lt_guid = pool_guid;
701eda14cbcSMatt Macy 		la.lt_time = &pool_load;
702eda14cbcSMatt Macy 		la.lt_found = B_FALSE;
703eda14cbcSMatt Macy 
704eda14cbcSMatt Macy 		if (zhdl != NULL &&
705eda14cbcSMatt Macy 		    zpool_iter(zhdl, zpool_find_load_time, &la) == 0 &&
706eda14cbcSMatt Macy 		    la.lt_found == B_TRUE) {
707eda14cbcSMatt Macy 			pool_found = B_TRUE;
708eda14cbcSMatt Macy 
709eda14cbcSMatt Macy 			if (timeval_earlier(&er_when, &pool_load)) {
710eda14cbcSMatt Macy 				fmd_hdl_debug(hdl, "ignoring pool %llx, "
711eda14cbcSMatt Macy 				    "ereport time %lld.%lld, "
712eda14cbcSMatt Macy 				    "pool load time = %lld.%lld",
713eda14cbcSMatt Macy 				    pool_guid, er_when.ertv_sec,
714eda14cbcSMatt Macy 				    er_when.ertv_nsec, pool_load.ertv_sec,
715eda14cbcSMatt Macy 				    pool_load.ertv_nsec);
716eda14cbcSMatt Macy 				zfs_stats.old_drops.fmds_value.ui64++;
717eda14cbcSMatt Macy 				return;
718eda14cbcSMatt Macy 			}
719eda14cbcSMatt Macy 		}
720eda14cbcSMatt Macy 	}
721eda14cbcSMatt Macy 
722eda14cbcSMatt Macy 	if (zcp == NULL) {
723eda14cbcSMatt Macy 		fmd_case_t *cs;
724eda14cbcSMatt Macy 		zfs_case_data_t data = { 0 };
725eda14cbcSMatt Macy 
726eda14cbcSMatt Macy 		/*
727eda14cbcSMatt Macy 		 * If this is one of our 'fake' resource ereports, and there is
728eda14cbcSMatt Macy 		 * no case open, simply discard it.
729eda14cbcSMatt Macy 		 */
730eda14cbcSMatt Macy 		if (isresource) {
731eda14cbcSMatt Macy 			zfs_stats.resource_drops.fmds_value.ui64++;
732eda14cbcSMatt Macy 			fmd_hdl_debug(hdl, "discarding '%s for vdev %llu",
733eda14cbcSMatt Macy 			    class, vdev_guid);
734eda14cbcSMatt Macy 			return;
735eda14cbcSMatt Macy 		}
736eda14cbcSMatt Macy 
737eda14cbcSMatt Macy 		/*
738eda14cbcSMatt Macy 		 * Skip tracking some ereports
739eda14cbcSMatt Macy 		 */
740eda14cbcSMatt Macy 		if (strcmp(class,
741eda14cbcSMatt Macy 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DATA)) == 0 ||
742eda14cbcSMatt Macy 		    strcmp(class,
743e2257b31SMartin Matuska 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE)) == 0) {
744eda14cbcSMatt Macy 			zfs_stats.resource_drops.fmds_value.ui64++;
745eda14cbcSMatt Macy 			return;
746eda14cbcSMatt Macy 		}
747eda14cbcSMatt Macy 
748eda14cbcSMatt Macy 		/*
749eda14cbcSMatt Macy 		 * Open a new case.
750eda14cbcSMatt Macy 		 */
751eda14cbcSMatt Macy 		cs = fmd_case_open(hdl, NULL);
752eda14cbcSMatt Macy 
753eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "opening case for vdev %llu due to '%s'",
754eda14cbcSMatt Macy 		    vdev_guid, class);
755eda14cbcSMatt Macy 
756eda14cbcSMatt Macy 		/*
757eda14cbcSMatt Macy 		 * Initialize the case buffer.  To commonize code, we actually
758eda14cbcSMatt Macy 		 * create the buffer with existing data, and then call
759eda14cbcSMatt Macy 		 * zfs_case_unserialize() to instantiate the in-core structure.
760eda14cbcSMatt Macy 		 */
761eda14cbcSMatt Macy 		fmd_buf_create(hdl, cs, CASE_DATA, sizeof (zfs_case_data_t));
762eda14cbcSMatt Macy 
763eda14cbcSMatt Macy 		data.zc_version = CASE_DATA_VERSION_SERD;
764eda14cbcSMatt Macy 		data.zc_ena = ena;
765eda14cbcSMatt Macy 		data.zc_pool_guid = pool_guid;
766eda14cbcSMatt Macy 		data.zc_vdev_guid = vdev_guid;
767*7a7741afSMartin Matuska 		data.zc_parent_guid = parent_guid;
768eda14cbcSMatt Macy 		data.zc_pool_state = (int)pool_state;
769eda14cbcSMatt Macy 
770eda14cbcSMatt Macy 		fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data));
771eda14cbcSMatt Macy 
772eda14cbcSMatt Macy 		zcp = zfs_case_unserialize(hdl, cs);
773eda14cbcSMatt Macy 		assert(zcp != NULL);
774eda14cbcSMatt Macy 		if (pool_found)
775eda14cbcSMatt Macy 			zcp->zc_when = pool_load;
776eda14cbcSMatt Macy 	}
777eda14cbcSMatt Macy 
778eda14cbcSMatt Macy 	if (isresource) {
779eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "resource event '%s'", class);
780eda14cbcSMatt Macy 
781eda14cbcSMatt Macy 		if (fmd_nvl_class_match(hdl, nvl,
782eda14cbcSMatt Macy 		    ZFS_MAKE_RSRC(FM_RESOURCE_AUTOREPLACE))) {
783eda14cbcSMatt Macy 			/*
784eda14cbcSMatt Macy 			 * The 'resource.fs.zfs.autoreplace' event indicates
785eda14cbcSMatt Macy 			 * that the pool was loaded with the 'autoreplace'
786eda14cbcSMatt Macy 			 * property set.  In this case, any pending device
787eda14cbcSMatt Macy 			 * failures should be ignored, as the asynchronous
788eda14cbcSMatt Macy 			 * autoreplace handling will take care of them.
789eda14cbcSMatt Macy 			 */
790eda14cbcSMatt Macy 			fmd_case_close(hdl, zcp->zc_case);
791eda14cbcSMatt Macy 		} else if (fmd_nvl_class_match(hdl, nvl,
792eda14cbcSMatt Macy 		    ZFS_MAKE_RSRC(FM_RESOURCE_REMOVED))) {
793eda14cbcSMatt Macy 			/*
794eda14cbcSMatt Macy 			 * The 'resource.fs.zfs.removed' event indicates that
795eda14cbcSMatt Macy 			 * device removal was detected, and the device was
796eda14cbcSMatt Macy 			 * closed asynchronously.  If this is the case, we
797eda14cbcSMatt Macy 			 * assume that any recent I/O errors were due to the
798eda14cbcSMatt Macy 			 * device removal, not any fault of the device itself.
799eda14cbcSMatt Macy 			 * We reset the SERD engine, and cancel any pending
800eda14cbcSMatt Macy 			 * timers.
801eda14cbcSMatt Macy 			 */
802eda14cbcSMatt Macy 			if (zcp->zc_data.zc_has_remove_timer) {
803eda14cbcSMatt Macy 				fmd_timer_remove(hdl, zcp->zc_remove_timer);
804eda14cbcSMatt Macy 				zcp->zc_data.zc_has_remove_timer = 0;
805e92ffd9bSMartin Matuska 				zfs_case_serialize(zcp);
806eda14cbcSMatt Macy 			}
807eda14cbcSMatt Macy 			if (zcp->zc_data.zc_serd_io[0] != '\0')
808eda14cbcSMatt Macy 				fmd_serd_reset(hdl, zcp->zc_data.zc_serd_io);
809eda14cbcSMatt Macy 			if (zcp->zc_data.zc_serd_checksum[0] != '\0')
810eda14cbcSMatt Macy 				fmd_serd_reset(hdl,
811eda14cbcSMatt Macy 				    zcp->zc_data.zc_serd_checksum);
812e2257b31SMartin Matuska 			if (zcp->zc_data.zc_serd_slow_io[0] != '\0')
813e2257b31SMartin Matuska 				fmd_serd_reset(hdl,
814e2257b31SMartin Matuska 				    zcp->zc_data.zc_serd_slow_io);
815eda14cbcSMatt Macy 		} else if (fmd_nvl_class_match(hdl, nvl,
816eda14cbcSMatt Macy 		    ZFS_MAKE_RSRC(FM_RESOURCE_STATECHANGE))) {
817eda14cbcSMatt Macy 			uint64_t state = 0;
818eda14cbcSMatt Macy 
819eda14cbcSMatt Macy 			if (zcp != NULL &&
820eda14cbcSMatt Macy 			    nvlist_lookup_uint64(nvl,
821eda14cbcSMatt Macy 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, &state) == 0 &&
822eda14cbcSMatt Macy 			    state == VDEV_STATE_HEALTHY) {
823eda14cbcSMatt Macy 				fmd_hdl_debug(hdl, "closing case after a "
824eda14cbcSMatt Macy 				    "device statechange to healthy");
825eda14cbcSMatt Macy 				fmd_case_close(hdl, zcp->zc_case);
826eda14cbcSMatt Macy 			}
827eda14cbcSMatt Macy 		}
828eda14cbcSMatt Macy 		zfs_stats.resource_drops.fmds_value.ui64++;
829eda14cbcSMatt Macy 		return;
830eda14cbcSMatt Macy 	}
831eda14cbcSMatt Macy 
832eda14cbcSMatt Macy 	/*
833eda14cbcSMatt Macy 	 * Associate the ereport with this case.
834eda14cbcSMatt Macy 	 */
835eda14cbcSMatt Macy 	fmd_case_add_ereport(hdl, zcp->zc_case, ep);
836eda14cbcSMatt Macy 
837eda14cbcSMatt Macy 	/*
838eda14cbcSMatt Macy 	 * Don't do anything else if this case is already solved.
839eda14cbcSMatt Macy 	 */
840eda14cbcSMatt Macy 	if (fmd_case_solved(hdl, zcp->zc_case))
841eda14cbcSMatt Macy 		return;
842eda14cbcSMatt Macy 
843e2257b31SMartin Matuska 	if (vdev_guid)
844e2257b31SMartin Matuska 		fmd_hdl_debug(hdl, "error event '%s', vdev %llu", class,
845e2257b31SMartin Matuska 		    vdev_guid);
846e2257b31SMartin Matuska 	else
847eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "error event '%s'", class);
848eda14cbcSMatt Macy 
849eda14cbcSMatt Macy 	/*
850eda14cbcSMatt Macy 	 * Determine if we should solve the case and generate a fault.  We solve
851eda14cbcSMatt Macy 	 * a case if:
852eda14cbcSMatt Macy 	 *
853eda14cbcSMatt Macy 	 * 	a. A pool failed to open (ereport.fs.zfs.pool)
854eda14cbcSMatt Macy 	 * 	b. A device failed to open (ereport.fs.zfs.pool) while a pool
855eda14cbcSMatt Macy 	 *	   was up and running.
856eda14cbcSMatt Macy 	 *
857eda14cbcSMatt Macy 	 * We may see a series of ereports associated with a pool open, all
858eda14cbcSMatt Macy 	 * chained together by the same ENA.  If the pool open succeeds, then
859eda14cbcSMatt Macy 	 * we'll see no further ereports.  To detect when a pool open has
860eda14cbcSMatt Macy 	 * succeeded, we associate a timer with the event.  When it expires, we
861eda14cbcSMatt Macy 	 * close the case.
862eda14cbcSMatt Macy 	 */
863eda14cbcSMatt Macy 	if (fmd_nvl_class_match(hdl, nvl,
864eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_POOL))) {
865eda14cbcSMatt Macy 		/*
866eda14cbcSMatt Macy 		 * Pool level fault.  Before solving the case, go through and
867eda14cbcSMatt Macy 		 * close any open device cases that may be pending.
868eda14cbcSMatt Macy 		 */
869eda14cbcSMatt Macy 		for (dcp = uu_list_first(zfs_cases); dcp != NULL;
870eda14cbcSMatt Macy 		    dcp = uu_list_next(zfs_cases, dcp)) {
871eda14cbcSMatt Macy 			if (dcp->zc_data.zc_pool_guid ==
872eda14cbcSMatt Macy 			    zcp->zc_data.zc_pool_guid &&
873eda14cbcSMatt Macy 			    dcp->zc_data.zc_vdev_guid != 0)
874eda14cbcSMatt Macy 				fmd_case_close(hdl, dcp->zc_case);
875eda14cbcSMatt Macy 		}
876eda14cbcSMatt Macy 
877e92ffd9bSMartin Matuska 		zfs_case_solve(hdl, zcp, "fault.fs.zfs.pool");
878eda14cbcSMatt Macy 	} else if (fmd_nvl_class_match(hdl, nvl,
879eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_LOG_REPLAY))) {
880eda14cbcSMatt Macy 		/*
881eda14cbcSMatt Macy 		 * Pool level fault for reading the intent logs.
882eda14cbcSMatt Macy 		 */
883e92ffd9bSMartin Matuska 		zfs_case_solve(hdl, zcp, "fault.fs.zfs.log_replay");
884eda14cbcSMatt Macy 	} else if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.vdev.*")) {
885eda14cbcSMatt Macy 		/*
886eda14cbcSMatt Macy 		 * Device fault.
887eda14cbcSMatt Macy 		 */
888e92ffd9bSMartin Matuska 		zfs_case_solve(hdl, zcp, "fault.fs.zfs.device");
889eda14cbcSMatt Macy 	} else if (fmd_nvl_class_match(hdl, nvl,
890eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO)) ||
891eda14cbcSMatt Macy 	    fmd_nvl_class_match(hdl, nvl,
892eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM)) ||
893eda14cbcSMatt Macy 	    fmd_nvl_class_match(hdl, nvl,
894eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) ||
895eda14cbcSMatt Macy 	    fmd_nvl_class_match(hdl, nvl,
896e2257b31SMartin Matuska 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY)) ||
897e2257b31SMartin Matuska 	    fmd_nvl_class_match(hdl, nvl,
898eda14cbcSMatt Macy 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) {
8992a58b312SMartin Matuska 		const char *failmode = NULL;
900eda14cbcSMatt Macy 		boolean_t checkremove = B_FALSE;
901e3aa18adSMartin Matuska 		uint32_t pri = 0;
902eda14cbcSMatt Macy 
903eda14cbcSMatt Macy 		/*
904eda14cbcSMatt Macy 		 * If this is a checksum or I/O error, then toss it into the
905eda14cbcSMatt Macy 		 * appropriate SERD engine and check to see if it has fired.
906eda14cbcSMatt Macy 		 * Ideally, we want to do something more sophisticated,
907eda14cbcSMatt Macy 		 * (persistent errors for a single data block, etc).  For now,
908eda14cbcSMatt Macy 		 * a single SERD engine is sufficient.
909eda14cbcSMatt Macy 		 */
910eda14cbcSMatt Macy 		if (fmd_nvl_class_match(hdl, nvl,
911eda14cbcSMatt Macy 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO))) {
912eda14cbcSMatt Macy 			if (zcp->zc_data.zc_serd_io[0] == '\0') {
91315f0b8c3SMartin Matuska 				if (nvlist_lookup_uint64(nvl,
91415f0b8c3SMartin Matuska 				    FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N,
91515f0b8c3SMartin Matuska 				    &io_n) != 0) {
91615f0b8c3SMartin Matuska 					io_n = DEFAULT_IO_N;
91715f0b8c3SMartin Matuska 				}
91815f0b8c3SMartin Matuska 				if (nvlist_lookup_uint64(nvl,
91915f0b8c3SMartin Matuska 				    FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T,
92015f0b8c3SMartin Matuska 				    &io_t) != 0) {
92115f0b8c3SMartin Matuska 					io_t = DEFAULT_IO_T;
92215f0b8c3SMartin Matuska 				}
923eda14cbcSMatt Macy 				zfs_serd_name(zcp->zc_data.zc_serd_io,
924eda14cbcSMatt Macy 				    pool_guid, vdev_guid, "io");
925eda14cbcSMatt Macy 				fmd_serd_create(hdl, zcp->zc_data.zc_serd_io,
92615f0b8c3SMartin Matuska 				    io_n,
92715f0b8c3SMartin Matuska 				    SEC2NSEC(io_t));
928e92ffd9bSMartin Matuska 				zfs_case_serialize(zcp);
929eda14cbcSMatt Macy 			}
930*7a7741afSMartin Matuska 			if (zfs_fm_serd_record(hdl, zcp->zc_data.zc_serd_io,
931*7a7741afSMartin Matuska 			    ep, zcp, "io error")) {
932eda14cbcSMatt Macy 				checkremove = B_TRUE;
933*7a7741afSMartin Matuska 			}
934eda14cbcSMatt Macy 		} else if (fmd_nvl_class_match(hdl, nvl,
935e2257b31SMartin Matuska 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) {
936e2257b31SMartin Matuska 			uint64_t slow_io_n, slow_io_t;
937e2257b31SMartin Matuska 
938e2257b31SMartin Matuska 			/*
939e2257b31SMartin Matuska 			 * Create a slow io SERD engine when the VDEV has the
940e2257b31SMartin Matuska 			 * 'vdev_slow_io_n' and 'vdev_slow_io_n' properties.
941e2257b31SMartin Matuska 			 */
942e2257b31SMartin Matuska 			if (zcp->zc_data.zc_serd_slow_io[0] == '\0' &&
943e2257b31SMartin Matuska 			    nvlist_lookup_uint64(nvl,
944e2257b31SMartin Matuska 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N,
945e2257b31SMartin Matuska 			    &slow_io_n) == 0 &&
946e2257b31SMartin Matuska 			    nvlist_lookup_uint64(nvl,
947e2257b31SMartin Matuska 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T,
948e2257b31SMartin Matuska 			    &slow_io_t) == 0) {
949e2257b31SMartin Matuska 				zfs_serd_name(zcp->zc_data.zc_serd_slow_io,
950e2257b31SMartin Matuska 				    pool_guid, vdev_guid, "slow_io");
951e2257b31SMartin Matuska 				fmd_serd_create(hdl,
952e2257b31SMartin Matuska 				    zcp->zc_data.zc_serd_slow_io,
953e2257b31SMartin Matuska 				    slow_io_n,
954e2257b31SMartin Matuska 				    SEC2NSEC(slow_io_t));
955e2257b31SMartin Matuska 				zfs_case_serialize(zcp);
956e2257b31SMartin Matuska 			}
957e2257b31SMartin Matuska 			/* Pass event to SERD engine and see if this triggers */
958e2257b31SMartin Matuska 			if (zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
959*7a7741afSMartin Matuska 			    zfs_fm_serd_record(hdl,
960*7a7741afSMartin Matuska 			    zcp->zc_data.zc_serd_slow_io, ep, zcp, "slow io")) {
961e2257b31SMartin Matuska 				zfs_case_solve(hdl, zcp,
962e2257b31SMartin Matuska 				    "fault.fs.zfs.vdev.slow_io");
963e2257b31SMartin Matuska 			}
964e2257b31SMartin Matuska 		} else if (fmd_nvl_class_match(hdl, nvl,
965eda14cbcSMatt Macy 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
966e2df9bb4SMartin Matuska 			uint64_t flags = 0;
967e2df9bb4SMartin Matuska 			int32_t flags32 = 0;
968e3aa18adSMartin Matuska 			/*
969e3aa18adSMartin Matuska 			 * We ignore ereports for checksum errors generated by
970e3aa18adSMartin Matuska 			 * scrub/resilver I/O to avoid potentially further
971e3aa18adSMartin Matuska 			 * degrading the pool while it's being repaired.
972e2df9bb4SMartin Matuska 			 *
973e2df9bb4SMartin Matuska 			 * Note that FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS used to
974e2df9bb4SMartin Matuska 			 * be int32. To allow newer zed to work on older
975e2df9bb4SMartin Matuska 			 * kernels, if we don't find the flags, we look for
976e2df9bb4SMartin Matuska 			 * the older ones too.
977e3aa18adSMartin Matuska 			 */
978e3aa18adSMartin Matuska 			if (((nvlist_lookup_uint32(nvl,
979e3aa18adSMartin Matuska 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY, &pri) == 0) &&
980e3aa18adSMartin Matuska 			    (pri == ZIO_PRIORITY_SCRUB ||
981e3aa18adSMartin Matuska 			    pri == ZIO_PRIORITY_REBUILD)) ||
982e2df9bb4SMartin Matuska 			    ((nvlist_lookup_uint64(nvl,
983e3aa18adSMartin Matuska 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags) == 0) &&
984e2df9bb4SMartin Matuska 			    (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) ||
985e2df9bb4SMartin Matuska 			    ((nvlist_lookup_int32(nvl,
986e2df9bb4SMartin Matuska 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags32) == 0) &&
987e2df9bb4SMartin Matuska 			    (flags32 & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) {
988e3aa18adSMartin Matuska 				fmd_hdl_debug(hdl, "ignoring '%s' for "
989e3aa18adSMartin Matuska 				    "scrub/resilver I/O", class);
990e3aa18adSMartin Matuska 				return;
991e3aa18adSMartin Matuska 			}
992e3aa18adSMartin Matuska 
993eda14cbcSMatt Macy 			if (zcp->zc_data.zc_serd_checksum[0] == '\0') {
99415f0b8c3SMartin Matuska 				if (nvlist_lookup_uint64(nvl,
99515f0b8c3SMartin Matuska 				    FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N,
99615f0b8c3SMartin Matuska 				    &checksum_n) != 0) {
99715f0b8c3SMartin Matuska 					checksum_n = DEFAULT_CHECKSUM_N;
99815f0b8c3SMartin Matuska 				}
99915f0b8c3SMartin Matuska 				if (nvlist_lookup_uint64(nvl,
100015f0b8c3SMartin Matuska 				    FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T,
100115f0b8c3SMartin Matuska 				    &checksum_t) != 0) {
100215f0b8c3SMartin Matuska 					checksum_t = DEFAULT_CHECKSUM_T;
100315f0b8c3SMartin Matuska 				}
100415f0b8c3SMartin Matuska 
1005eda14cbcSMatt Macy 				zfs_serd_name(zcp->zc_data.zc_serd_checksum,
1006eda14cbcSMatt Macy 				    pool_guid, vdev_guid, "checksum");
1007eda14cbcSMatt Macy 				fmd_serd_create(hdl,
1008eda14cbcSMatt Macy 				    zcp->zc_data.zc_serd_checksum,
100915f0b8c3SMartin Matuska 				    checksum_n,
101015f0b8c3SMartin Matuska 				    SEC2NSEC(checksum_t));
1011e92ffd9bSMartin Matuska 				zfs_case_serialize(zcp);
1012eda14cbcSMatt Macy 			}
1013*7a7741afSMartin Matuska 			if (zfs_fm_serd_record(hdl,
1014*7a7741afSMartin Matuska 			    zcp->zc_data.zc_serd_checksum, ep, zcp,
1015*7a7741afSMartin Matuska 			    "checksum")) {
1016eda14cbcSMatt Macy 				zfs_case_solve(hdl, zcp,
1017e92ffd9bSMartin Matuska 				    "fault.fs.zfs.vdev.checksum");
1018eda14cbcSMatt Macy 			}
1019eda14cbcSMatt Macy 		} else if (fmd_nvl_class_match(hdl, nvl,
1020eda14cbcSMatt Macy 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) &&
1021eda14cbcSMatt Macy 		    (nvlist_lookup_string(nvl,
1022eda14cbcSMatt Macy 		    FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, &failmode) == 0) &&
1023eda14cbcSMatt Macy 		    failmode != NULL) {
1024eda14cbcSMatt Macy 			if (strncmp(failmode, FM_EREPORT_FAILMODE_CONTINUE,
1025eda14cbcSMatt Macy 			    strlen(FM_EREPORT_FAILMODE_CONTINUE)) == 0) {
1026eda14cbcSMatt Macy 				zfs_case_solve(hdl, zcp,
1027e92ffd9bSMartin Matuska 				    "fault.fs.zfs.io_failure_continue");
1028eda14cbcSMatt Macy 			} else if (strncmp(failmode, FM_EREPORT_FAILMODE_WAIT,
1029eda14cbcSMatt Macy 			    strlen(FM_EREPORT_FAILMODE_WAIT)) == 0) {
1030eda14cbcSMatt Macy 				zfs_case_solve(hdl, zcp,
1031e92ffd9bSMartin Matuska 				    "fault.fs.zfs.io_failure_wait");
1032eda14cbcSMatt Macy 			}
1033eda14cbcSMatt Macy 		} else if (fmd_nvl_class_match(hdl, nvl,
1034eda14cbcSMatt Macy 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) {
1035eda14cbcSMatt Macy #ifndef __linux__
1036eda14cbcSMatt Macy 			/* This causes an unexpected fault diagnosis on linux */
1037eda14cbcSMatt Macy 			checkremove = B_TRUE;
1038eda14cbcSMatt Macy #endif
1039eda14cbcSMatt Macy 		}
1040eda14cbcSMatt Macy 
1041eda14cbcSMatt Macy 		/*
1042eda14cbcSMatt Macy 		 * Because I/O errors may be due to device removal, we postpone
1043eda14cbcSMatt Macy 		 * any diagnosis until we're sure that we aren't about to
1044eda14cbcSMatt Macy 		 * receive a 'resource.fs.zfs.removed' event.
1045eda14cbcSMatt Macy 		 */
1046eda14cbcSMatt Macy 		if (checkremove) {
1047eda14cbcSMatt Macy 			if (zcp->zc_data.zc_has_remove_timer)
1048eda14cbcSMatt Macy 				fmd_timer_remove(hdl, zcp->zc_remove_timer);
1049eda14cbcSMatt Macy 			zcp->zc_remove_timer = fmd_timer_install(hdl, zcp, NULL,
1050eda14cbcSMatt Macy 			    zfs_remove_timeout);
1051eda14cbcSMatt Macy 			if (!zcp->zc_data.zc_has_remove_timer) {
1052eda14cbcSMatt Macy 				zcp->zc_data.zc_has_remove_timer = 1;
1053e92ffd9bSMartin Matuska 				zfs_case_serialize(zcp);
1054eda14cbcSMatt Macy 			}
1055eda14cbcSMatt Macy 		}
1056eda14cbcSMatt Macy 	}
1057eda14cbcSMatt Macy }
1058eda14cbcSMatt Macy 
1059eda14cbcSMatt Macy /*
1060eda14cbcSMatt Macy  * The timeout is fired when we diagnosed an I/O error, and it was not due to
1061eda14cbcSMatt Macy  * device removal (which would cause the timeout to be cancelled).
1062eda14cbcSMatt Macy  */
1063eda14cbcSMatt Macy static void
1064eda14cbcSMatt Macy zfs_fm_timeout(fmd_hdl_t *hdl, id_t id, void *data)
1065eda14cbcSMatt Macy {
1066eda14cbcSMatt Macy 	zfs_case_t *zcp = data;
1067eda14cbcSMatt Macy 
1068eda14cbcSMatt Macy 	if (id == zcp->zc_remove_timer)
1069e92ffd9bSMartin Matuska 		zfs_case_solve(hdl, zcp, "fault.fs.zfs.vdev.io");
1070eda14cbcSMatt Macy }
1071eda14cbcSMatt Macy 
1072eda14cbcSMatt Macy /*
1073eda14cbcSMatt Macy  * The specified case has been closed and any case-specific
1074eda14cbcSMatt Macy  * data structures should be deallocated.
1075eda14cbcSMatt Macy  */
1076eda14cbcSMatt Macy static void
1077eda14cbcSMatt Macy zfs_fm_close(fmd_hdl_t *hdl, fmd_case_t *cs)
1078eda14cbcSMatt Macy {
1079eda14cbcSMatt Macy 	zfs_case_t *zcp = fmd_case_getspecific(hdl, cs);
1080eda14cbcSMatt Macy 
1081eda14cbcSMatt Macy 	if (zcp->zc_data.zc_serd_checksum[0] != '\0')
1082eda14cbcSMatt Macy 		fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_checksum);
1083eda14cbcSMatt Macy 	if (zcp->zc_data.zc_serd_io[0] != '\0')
1084eda14cbcSMatt Macy 		fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_io);
1085e2257b31SMartin Matuska 	if (zcp->zc_data.zc_serd_slow_io[0] != '\0')
1086e2257b31SMartin Matuska 		fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_slow_io);
1087eda14cbcSMatt Macy 	if (zcp->zc_data.zc_has_remove_timer)
1088eda14cbcSMatt Macy 		fmd_timer_remove(hdl, zcp->zc_remove_timer);
1089eda14cbcSMatt Macy 
1090eda14cbcSMatt Macy 	uu_list_remove(zfs_cases, zcp);
1091eda14cbcSMatt Macy 	uu_list_node_fini(zcp, &zcp->zc_node, zfs_case_pool);
1092eda14cbcSMatt Macy 	fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
1093eda14cbcSMatt Macy }
1094eda14cbcSMatt Macy 
1095eda14cbcSMatt Macy static const fmd_hdl_ops_t fmd_ops = {
1096eda14cbcSMatt Macy 	zfs_fm_recv,	/* fmdo_recv */
1097eda14cbcSMatt Macy 	zfs_fm_timeout,	/* fmdo_timeout */
1098eda14cbcSMatt Macy 	zfs_fm_close,	/* fmdo_close */
1099eda14cbcSMatt Macy 	NULL,		/* fmdo_stats */
1100e2257b31SMartin Matuska 	NULL,	/* fmdo_gc */
1101eda14cbcSMatt Macy };
1102eda14cbcSMatt Macy 
1103eda14cbcSMatt Macy static const fmd_prop_t fmd_props[] = {
1104eda14cbcSMatt Macy 	{ NULL, 0, NULL }
1105eda14cbcSMatt Macy };
1106eda14cbcSMatt Macy 
1107eda14cbcSMatt Macy static const fmd_hdl_info_t fmd_info = {
1108eda14cbcSMatt Macy 	"ZFS Diagnosis Engine", "1.0", &fmd_ops, fmd_props
1109eda14cbcSMatt Macy };
1110eda14cbcSMatt Macy 
1111eda14cbcSMatt Macy void
1112eda14cbcSMatt Macy _zfs_diagnosis_init(fmd_hdl_t *hdl)
1113eda14cbcSMatt Macy {
1114eda14cbcSMatt Macy 	libzfs_handle_t *zhdl;
1115eda14cbcSMatt Macy 
1116eda14cbcSMatt Macy 	if ((zhdl = libzfs_init()) == NULL)
1117eda14cbcSMatt Macy 		return;
1118eda14cbcSMatt Macy 
1119eda14cbcSMatt Macy 	if ((zfs_case_pool = uu_list_pool_create("zfs_case_pool",
1120eda14cbcSMatt Macy 	    sizeof (zfs_case_t), offsetof(zfs_case_t, zc_node),
1121eda14cbcSMatt Macy 	    NULL, UU_LIST_POOL_DEBUG)) == NULL) {
1122eda14cbcSMatt Macy 		libzfs_fini(zhdl);
1123eda14cbcSMatt Macy 		return;
1124eda14cbcSMatt Macy 	}
1125eda14cbcSMatt Macy 
1126eda14cbcSMatt Macy 	if ((zfs_cases = uu_list_create(zfs_case_pool, NULL,
1127eda14cbcSMatt Macy 	    UU_LIST_DEBUG)) == NULL) {
1128eda14cbcSMatt Macy 		uu_list_pool_destroy(zfs_case_pool);
1129eda14cbcSMatt Macy 		libzfs_fini(zhdl);
1130eda14cbcSMatt Macy 		return;
1131eda14cbcSMatt Macy 	}
1132eda14cbcSMatt Macy 
1133eda14cbcSMatt Macy 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
1134eda14cbcSMatt Macy 		uu_list_destroy(zfs_cases);
1135eda14cbcSMatt Macy 		uu_list_pool_destroy(zfs_case_pool);
1136eda14cbcSMatt Macy 		libzfs_fini(zhdl);
1137eda14cbcSMatt Macy 		return;
1138eda14cbcSMatt Macy 	}
1139eda14cbcSMatt Macy 
1140eda14cbcSMatt Macy 	fmd_hdl_setspecific(hdl, zhdl);
1141eda14cbcSMatt Macy 
1142eda14cbcSMatt Macy 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (zfs_stats) /
1143eda14cbcSMatt Macy 	    sizeof (fmd_stat_t), (fmd_stat_t *)&zfs_stats);
1144eda14cbcSMatt Macy }
1145eda14cbcSMatt Macy 
1146eda14cbcSMatt Macy void
1147eda14cbcSMatt Macy _zfs_diagnosis_fini(fmd_hdl_t *hdl)
1148eda14cbcSMatt Macy {
1149eda14cbcSMatt Macy 	zfs_case_t *zcp;
1150eda14cbcSMatt Macy 	uu_list_walk_t *walk;
1151eda14cbcSMatt Macy 	libzfs_handle_t *zhdl;
1152eda14cbcSMatt Macy 
1153eda14cbcSMatt Macy 	/*
1154eda14cbcSMatt Macy 	 * Remove all active cases.
1155eda14cbcSMatt Macy 	 */
1156eda14cbcSMatt Macy 	walk = uu_list_walk_start(zfs_cases, UU_WALK_ROBUST);
1157eda14cbcSMatt Macy 	while ((zcp = uu_list_walk_next(walk)) != NULL) {
1158eda14cbcSMatt Macy 		fmd_hdl_debug(hdl, "removing case ena %llu",
1159eda14cbcSMatt Macy 		    (long long unsigned)zcp->zc_data.zc_ena);
1160eda14cbcSMatt Macy 		uu_list_remove(zfs_cases, zcp);
1161eda14cbcSMatt Macy 		uu_list_node_fini(zcp, &zcp->zc_node, zfs_case_pool);
1162eda14cbcSMatt Macy 		fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
1163eda14cbcSMatt Macy 	}
1164eda14cbcSMatt Macy 	uu_list_walk_end(walk);
1165eda14cbcSMatt Macy 
1166eda14cbcSMatt Macy 	uu_list_destroy(zfs_cases);
1167eda14cbcSMatt Macy 	uu_list_pool_destroy(zfs_case_pool);
1168eda14cbcSMatt Macy 
1169eda14cbcSMatt Macy 	zhdl = fmd_hdl_getspecific(hdl);
1170eda14cbcSMatt Macy 	libzfs_fini(zhdl);
1171eda14cbcSMatt Macy }
1172