1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
22*3975Sek110237  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens /*
29789Sahrens  * This file contains the functions which analyze the status of a pool.  This
30789Sahrens  * include both the status of an active pool, as well as the status exported
31789Sahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
32789Sahrens  * the pool.  This status is independent (to a certain degree) from the state of
33789Sahrens  * the pool.  A pool's state descsribes only whether or not it is capable of
34789Sahrens  * providing the necessary fault tolerance for data.  The status describes the
35789Sahrens  * overall status of devices.  A pool that is online can still have a device
36789Sahrens  * that is experiencing errors.
37789Sahrens  *
38789Sahrens  * Only a subset of the possible faults can be detected using 'zpool status',
39789Sahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
40789Sahrens  * is left up to the caller, depending on whether it is a live pool or an
41789Sahrens  * import.
42789Sahrens  */
43789Sahrens 
44789Sahrens #include <libzfs.h>
45789Sahrens #include <string.h>
46*3975Sek110237 #include <unistd.h>
47789Sahrens #include "libzfs_impl.h"
48789Sahrens 
49789Sahrens /*
50789Sahrens  * Message ID table.  This must be kep in sync with the ZPOOL_STATUS_* defines
51789Sahrens  * in libzfs.h.  Note that there are some status results which go past the end
52789Sahrens  * of this table, and hence have no associated message ID.
53789Sahrens  */
54*3975Sek110237 static char *zfs_msgid_table[] = {
55789Sahrens 	"ZFS-8000-14",
56789Sahrens 	"ZFS-8000-2Q",
57789Sahrens 	"ZFS-8000-3C",
58789Sahrens 	"ZFS-8000-4J",
59789Sahrens 	"ZFS-8000-5E",
60789Sahrens 	"ZFS-8000-6X",
61789Sahrens 	"ZFS-8000-72",
62789Sahrens 	"ZFS-8000-8A",
63789Sahrens 	"ZFS-8000-9P",
64*3975Sek110237 	"ZFS-8000-A5",
65*3975Sek110237 	"ZFS-8000-EY"
66789Sahrens };
67789Sahrens 
681544Seschrock /*
691544Seschrock  * If the pool is active, a certain class of static errors is overridden by the
701544Seschrock  * faults as analayzed by FMA.  These faults have separate knowledge articles,
711544Seschrock  * and the article referred to by 'zpool status' must match that indicated by
721544Seschrock  * the syslog error message.  We override missing data as well as corrupt pool.
731544Seschrock  */
74*3975Sek110237 static char *zfs_msgid_table_active[] = {
751544Seschrock 	"ZFS-8000-14",
761544Seschrock 	"ZFS-8000-D3",		/* overridden */
771544Seschrock 	"ZFS-8000-D3",		/* overridden */
781544Seschrock 	"ZFS-8000-4J",
791544Seschrock 	"ZFS-8000-5E",
801544Seschrock 	"ZFS-8000-6X",
811544Seschrock 	"ZFS-8000-CS",		/* overridden */
821544Seschrock 	"ZFS-8000-8A",
831544Seschrock 	"ZFS-8000-9P",
841544Seschrock 	"ZFS-8000-CS",		/* overridden */
851544Seschrock };
861544Seschrock 
87*3975Sek110237 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
88789Sahrens 
89789Sahrens /* ARGSUSED */
90789Sahrens static int
91789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
92789Sahrens {
93789Sahrens 	return (state == VDEV_STATE_CANT_OPEN &&
94789Sahrens 	    aux == VDEV_AUX_OPEN_FAILED);
95789Sahrens }
96789Sahrens 
97789Sahrens /* ARGSUSED */
98789Sahrens static int
99789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
100789Sahrens {
101789Sahrens 	return (errs != 0);
102789Sahrens }
103789Sahrens 
104789Sahrens /* ARGSUSED */
105789Sahrens static int
106789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
107789Sahrens {
108789Sahrens 	return (state == VDEV_STATE_CANT_OPEN);
109789Sahrens }
110789Sahrens 
111789Sahrens /* ARGSUSED */
112789Sahrens static int
113789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
114789Sahrens {
115789Sahrens 	return (state == VDEV_STATE_OFFLINE);
116789Sahrens }
117789Sahrens 
118789Sahrens /*
119789Sahrens  * Detect if any leaf devices that have seen errors or could not be opened.
120789Sahrens  */
1212082Seschrock static boolean_t
122789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
123789Sahrens {
124789Sahrens 	nvlist_t **child;
125789Sahrens 	vdev_stat_t *vs;
126789Sahrens 	uint_t c, children;
127789Sahrens 	char *type;
128789Sahrens 
129789Sahrens 	/*
130789Sahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
131789Sahrens 	 * the process of repairing any such errors, and don't want to call them
132789Sahrens 	 * out again.  We'll pick up the fact that a resilver is happening
133789Sahrens 	 * later.
134789Sahrens 	 */
135789Sahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
136789Sahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
1372082Seschrock 		return (B_FALSE);
138789Sahrens 
139789Sahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
140789Sahrens 	    &children) == 0) {
141789Sahrens 		for (c = 0; c < children; c++)
142789Sahrens 			if (find_vdev_problem(child[c], func))
1432082Seschrock 				return (B_TRUE);
144789Sahrens 	} else {
145789Sahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
146789Sahrens 		    (uint64_t **)&vs, &c) == 0);
147789Sahrens 
148789Sahrens 		if (func(vs->vs_state, vs->vs_aux,
149789Sahrens 		    vs->vs_read_errors +
150789Sahrens 		    vs->vs_write_errors +
151789Sahrens 		    vs->vs_checksum_errors))
1522082Seschrock 			return (B_TRUE);
153789Sahrens 	}
154789Sahrens 
1552082Seschrock 	return (B_FALSE);
156789Sahrens }
157789Sahrens 
158789Sahrens /*
159789Sahrens  * Active pool health status.
160789Sahrens  *
161789Sahrens  * To determine the status for a pool, we make several passes over the config,
162789Sahrens  * picking the most egregious error we find.  In order of importance, we do the
163789Sahrens  * following:
164789Sahrens  *
165789Sahrens  *	- Check for a complete and valid configuration
1661544Seschrock  *	- Look for any missing devices in a non-replicated config
1671544Seschrock  *	- Check for any data errors
1681544Seschrock  *	- Check for any missing devices in a replicated config
169789Sahrens  *	- Look for any devices showing errors
170789Sahrens  *	- Check for any resilvering devices
171789Sahrens  *
172789Sahrens  * There can obviously be multiple errors within a single pool, so this routine
173789Sahrens  * only picks the most damaging of all the current errors to report.
174789Sahrens  */
175789Sahrens static zpool_status_t
1762082Seschrock check_status(nvlist_t *config, boolean_t isimport)
177789Sahrens {
178789Sahrens 	nvlist_t *nvroot;
179789Sahrens 	vdev_stat_t *vs;
180789Sahrens 	uint_t vsc;
1811544Seschrock 	uint64_t nerr;
1821760Seschrock 	uint64_t version;
183*3975Sek110237 	uint64_t stateval;
184*3975Sek110237 	uint64_t hostid = 0;
185789Sahrens 
1861760Seschrock 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1871760Seschrock 	    &version) == 0);
188789Sahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
189789Sahrens 	    &nvroot) == 0);
190789Sahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
191789Sahrens 	    (uint64_t **)&vs, &vsc) == 0);
192*3975Sek110237 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
193*3975Sek110237 	    &stateval) == 0);
194*3975Sek110237 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
195*3975Sek110237 
196*3975Sek110237 	/*
197*3975Sek110237 	 * Pool last accessed by another system.
198*3975Sek110237 	 */
199*3975Sek110237 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
200*3975Sek110237 	    stateval == POOL_STATE_ACTIVE)
201*3975Sek110237 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
202789Sahrens 
203789Sahrens 	/*
2041760Seschrock 	 * Newer on-disk version.
2051760Seschrock 	 */
2061760Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2071760Seschrock 	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
2081760Seschrock 		return (ZPOOL_STATUS_VERSION_NEWER);
2091760Seschrock 
2101760Seschrock 	/*
211789Sahrens 	 * Check that the config is complete.
212789Sahrens 	 */
213789Sahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2141544Seschrock 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
215789Sahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
2161544Seschrock 
2171544Seschrock 	/*
2181544Seschrock 	 * Missing devices in non-replicated config.
2191544Seschrock 	 */
2201544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2211544Seschrock 	    find_vdev_problem(nvroot, vdev_missing))
2221544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_NR);
2231544Seschrock 
2241544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2251544Seschrock 	    find_vdev_problem(nvroot, vdev_broken))
2261544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
2271544Seschrock 
2281544Seschrock 	/*
2291544Seschrock 	 * Corrupted pool metadata
2301544Seschrock 	 */
2311544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2321544Seschrock 	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
2331544Seschrock 		return (ZPOOL_STATUS_CORRUPT_POOL);
2341544Seschrock 
2351544Seschrock 	/*
2361544Seschrock 	 * Persistent data errors.
2371544Seschrock 	 */
2381544Seschrock 	if (!isimport) {
2391544Seschrock 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
2401544Seschrock 		    &nerr) == 0 && nerr != 0)
2411544Seschrock 			return (ZPOOL_STATUS_CORRUPT_DATA);
242789Sahrens 	}
243789Sahrens 
244789Sahrens 	/*
2451544Seschrock 	 * Missing devices in a replicated config.
246789Sahrens 	 */
2471544Seschrock 	if (find_vdev_problem(nvroot, vdev_missing))
2481544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_R);
2491544Seschrock 	if (find_vdev_problem(nvroot, vdev_broken))
2501544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
251789Sahrens 
252789Sahrens 	/*
253789Sahrens 	 * Devices with errors
254789Sahrens 	 */
255789Sahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
256789Sahrens 		return (ZPOOL_STATUS_FAILING_DEV);
257789Sahrens 
258789Sahrens 	/*
259789Sahrens 	 * Offlined devices
260789Sahrens 	 */
261789Sahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
262789Sahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
263789Sahrens 
264789Sahrens 	/*
265789Sahrens 	 * Currently resilvering
266789Sahrens 	 */
267789Sahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
268789Sahrens 		return (ZPOOL_STATUS_RESILVERING);
269789Sahrens 
270789Sahrens 	/*
2711760Seschrock 	 * Outdated, but usable, version
272789Sahrens 	 */
2731760Seschrock 	if (version < ZFS_VERSION)
2741760Seschrock 		return (ZPOOL_STATUS_VERSION_OLDER);
275789Sahrens 
276789Sahrens 	return (ZPOOL_STATUS_OK);
277789Sahrens }
278789Sahrens 
279789Sahrens zpool_status_t
280789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
281789Sahrens {
2822082Seschrock 	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
283789Sahrens 
284789Sahrens 	if (ret >= NMSGID)
285789Sahrens 		*msgid = NULL;
286789Sahrens 	else
287*3975Sek110237 		*msgid = zfs_msgid_table_active[ret];
288789Sahrens 
289789Sahrens 	return (ret);
290789Sahrens }
291789Sahrens 
292789Sahrens zpool_status_t
293789Sahrens zpool_import_status(nvlist_t *config, char **msgid)
294789Sahrens {
2952082Seschrock 	zpool_status_t ret = check_status(config, B_TRUE);
296789Sahrens 
297789Sahrens 	if (ret >= NMSGID)
298789Sahrens 		*msgid = NULL;
299789Sahrens 	else
300*3975Sek110237 		*msgid = zfs_msgid_table[ret];
301789Sahrens 
302789Sahrens 	return (ret);
303789Sahrens }
304