1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
226523Sek110237  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens /*
27789Sahrens  * This file contains the functions which analyze the status of a pool.  This
28789Sahrens  * include both the status of an active pool, as well as the status exported
29789Sahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
30789Sahrens  * the pool.  This status is independent (to a certain degree) from the state of
314451Seschrock  * the pool.  A pool's state describes only whether or not it is capable of
32789Sahrens  * providing the necessary fault tolerance for data.  The status describes the
33789Sahrens  * overall status of devices.  A pool that is online can still have a device
34789Sahrens  * that is experiencing errors.
35789Sahrens  *
36789Sahrens  * Only a subset of the possible faults can be detected using 'zpool status',
37789Sahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
38789Sahrens  * is left up to the caller, depending on whether it is a live pool or an
39789Sahrens  * import.
40789Sahrens  */
41789Sahrens 
42789Sahrens #include <libzfs.h>
43789Sahrens #include <string.h>
443975Sek110237 #include <unistd.h>
45789Sahrens #include "libzfs_impl.h"
46789Sahrens 
47789Sahrens /*
484451Seschrock  * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
49789Sahrens  * in libzfs.h.  Note that there are some status results which go past the end
50789Sahrens  * of this table, and hence have no associated message ID.
51789Sahrens  */
523975Sek110237 static char *zfs_msgid_table[] = {
53789Sahrens 	"ZFS-8000-14",
54789Sahrens 	"ZFS-8000-2Q",
55789Sahrens 	"ZFS-8000-3C",
56789Sahrens 	"ZFS-8000-4J",
57789Sahrens 	"ZFS-8000-5E",
58789Sahrens 	"ZFS-8000-6X",
59789Sahrens 	"ZFS-8000-72",
60789Sahrens 	"ZFS-8000-8A",
61789Sahrens 	"ZFS-8000-9P",
623975Sek110237 	"ZFS-8000-A5",
636523Sek110237 	"ZFS-8000-EY",
646523Sek110237 	"ZFS-8000-HC",
657294Sperrin 	"ZFS-8000-JQ",
667294Sperrin 	"ZFS-8000-K4",
67789Sahrens };
68789Sahrens 
693975Sek110237 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
70789Sahrens 
71789Sahrens /* ARGSUSED */
72789Sahrens static int
73789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
74789Sahrens {
75789Sahrens 	return (state == VDEV_STATE_CANT_OPEN &&
76789Sahrens 	    aux == VDEV_AUX_OPEN_FAILED);
77789Sahrens }
78789Sahrens 
79789Sahrens /* ARGSUSED */
80789Sahrens static int
814451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
824451Seschrock {
834451Seschrock 	return (state == VDEV_STATE_FAULTED);
844451Seschrock }
854451Seschrock 
864451Seschrock /* ARGSUSED */
874451Seschrock static int
88789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
89789Sahrens {
904451Seschrock 	return (state == VDEV_STATE_DEGRADED || errs != 0);
91789Sahrens }
92789Sahrens 
93789Sahrens /* ARGSUSED */
94789Sahrens static int
95789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
96789Sahrens {
97789Sahrens 	return (state == VDEV_STATE_CANT_OPEN);
98789Sahrens }
99789Sahrens 
100789Sahrens /* ARGSUSED */
101789Sahrens static int
102789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
103789Sahrens {
104789Sahrens 	return (state == VDEV_STATE_OFFLINE);
105789Sahrens }
106789Sahrens 
107789Sahrens /*
108789Sahrens  * Detect if any leaf devices that have seen errors or could not be opened.
109789Sahrens  */
1102082Seschrock static boolean_t
111789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
112789Sahrens {
113789Sahrens 	nvlist_t **child;
114789Sahrens 	vdev_stat_t *vs;
115789Sahrens 	uint_t c, children;
116789Sahrens 	char *type;
117789Sahrens 
118789Sahrens 	/*
119789Sahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
120789Sahrens 	 * the process of repairing any such errors, and don't want to call them
121789Sahrens 	 * out again.  We'll pick up the fact that a resilver is happening
122789Sahrens 	 * later.
123789Sahrens 	 */
124789Sahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
125789Sahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
1262082Seschrock 		return (B_FALSE);
127789Sahrens 
128789Sahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
129789Sahrens 	    &children) == 0) {
130789Sahrens 		for (c = 0; c < children; c++)
131789Sahrens 			if (find_vdev_problem(child[c], func))
1322082Seschrock 				return (B_TRUE);
133789Sahrens 	} else {
134789Sahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
135789Sahrens 		    (uint64_t **)&vs, &c) == 0);
136789Sahrens 
137789Sahrens 		if (func(vs->vs_state, vs->vs_aux,
138789Sahrens 		    vs->vs_read_errors +
139789Sahrens 		    vs->vs_write_errors +
140789Sahrens 		    vs->vs_checksum_errors))
1412082Seschrock 			return (B_TRUE);
142789Sahrens 	}
143789Sahrens 
1442082Seschrock 	return (B_FALSE);
145789Sahrens }
146789Sahrens 
147789Sahrens /*
148789Sahrens  * Active pool health status.
149789Sahrens  *
150789Sahrens  * To determine the status for a pool, we make several passes over the config,
151789Sahrens  * picking the most egregious error we find.  In order of importance, we do the
152789Sahrens  * following:
153789Sahrens  *
154789Sahrens  *	- Check for a complete and valid configuration
1554451Seschrock  *	- Look for any faulted or missing devices in a non-replicated config
1561544Seschrock  *	- Check for any data errors
1574451Seschrock  *	- Check for any faulted or missing devices in a replicated config
158789Sahrens  *	- Look for any devices showing errors
159789Sahrens  *	- Check for any resilvering devices
160789Sahrens  *
161789Sahrens  * There can obviously be multiple errors within a single pool, so this routine
162789Sahrens  * only picks the most damaging of all the current errors to report.
163789Sahrens  */
164789Sahrens static zpool_status_t
165*7754SJeff.Bonwick@Sun.COM check_status(nvlist_t *config, boolean_t isimport)
166789Sahrens {
167789Sahrens 	nvlist_t *nvroot;
168789Sahrens 	vdev_stat_t *vs;
169789Sahrens 	uint_t vsc;
1701544Seschrock 	uint64_t nerr;
1711760Seschrock 	uint64_t version;
1723975Sek110237 	uint64_t stateval;
173*7754SJeff.Bonwick@Sun.COM 	uint64_t suspended;
1743975Sek110237 	uint64_t hostid = 0;
175789Sahrens 
1761760Seschrock 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1771760Seschrock 	    &version) == 0);
178789Sahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
179789Sahrens 	    &nvroot) == 0);
180789Sahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
181789Sahrens 	    (uint64_t **)&vs, &vsc) == 0);
1823975Sek110237 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1833975Sek110237 	    &stateval) == 0);
1843975Sek110237 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
1853975Sek110237 
1863975Sek110237 	/*
1873975Sek110237 	 * Pool last accessed by another system.
1883975Sek110237 	 */
1893975Sek110237 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
1903975Sek110237 	    stateval == POOL_STATE_ACTIVE)
1913975Sek110237 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
192789Sahrens 
193789Sahrens 	/*
1941760Seschrock 	 * Newer on-disk version.
1951760Seschrock 	 */
1961760Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
1971760Seschrock 	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
1981760Seschrock 		return (ZPOOL_STATUS_VERSION_NEWER);
1991760Seschrock 
2001760Seschrock 	/*
201789Sahrens 	 * Check that the config is complete.
202789Sahrens 	 */
203789Sahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2041544Seschrock 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
205789Sahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
2061544Seschrock 
2071544Seschrock 	/*
208*7754SJeff.Bonwick@Sun.COM 	 * Check whether the pool has suspended due to failed I/O.
2096523Sek110237 	 */
210*7754SJeff.Bonwick@Sun.COM 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
211*7754SJeff.Bonwick@Sun.COM 	    &suspended) == 0) {
212*7754SJeff.Bonwick@Sun.COM 		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
2136523Sek110237 			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
214*7754SJeff.Bonwick@Sun.COM 		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2156523Sek110237 	}
2166523Sek110237 
2176523Sek110237 	/*
2187294Sperrin 	 * Could not read a log.
2197294Sperrin 	 */
2207294Sperrin 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2217294Sperrin 	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
2227294Sperrin 		return (ZPOOL_STATUS_BAD_LOG);
2237294Sperrin 	}
2247294Sperrin 
2257294Sperrin 	/*
2264451Seschrock 	 * Bad devices in non-replicated config.
2271544Seschrock 	 */
2281544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2294451Seschrock 	    find_vdev_problem(nvroot, vdev_faulted))
2304451Seschrock 		return (ZPOOL_STATUS_FAULTED_DEV_NR);
2314451Seschrock 
2324451Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2331544Seschrock 	    find_vdev_problem(nvroot, vdev_missing))
2341544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_NR);
2351544Seschrock 
2361544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2371544Seschrock 	    find_vdev_problem(nvroot, vdev_broken))
2381544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
2391544Seschrock 
2401544Seschrock 	/*
2411544Seschrock 	 * Corrupted pool metadata
2421544Seschrock 	 */
2431544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2441544Seschrock 	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
2451544Seschrock 		return (ZPOOL_STATUS_CORRUPT_POOL);
2461544Seschrock 
2471544Seschrock 	/*
2481544Seschrock 	 * Persistent data errors.
2491544Seschrock 	 */
2501544Seschrock 	if (!isimport) {
2511544Seschrock 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
2521544Seschrock 		    &nerr) == 0 && nerr != 0)
2531544Seschrock 			return (ZPOOL_STATUS_CORRUPT_DATA);
254789Sahrens 	}
255789Sahrens 
256789Sahrens 	/*
2571544Seschrock 	 * Missing devices in a replicated config.
258789Sahrens 	 */
2594451Seschrock 	if (find_vdev_problem(nvroot, vdev_faulted))
2604451Seschrock 		return (ZPOOL_STATUS_FAULTED_DEV_R);
2611544Seschrock 	if (find_vdev_problem(nvroot, vdev_missing))
2621544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_R);
2631544Seschrock 	if (find_vdev_problem(nvroot, vdev_broken))
2641544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
265789Sahrens 
266789Sahrens 	/*
267789Sahrens 	 * Devices with errors
268789Sahrens 	 */
269789Sahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
270789Sahrens 		return (ZPOOL_STATUS_FAILING_DEV);
271789Sahrens 
272789Sahrens 	/*
273789Sahrens 	 * Offlined devices
274789Sahrens 	 */
275789Sahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
276789Sahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
277789Sahrens 
278789Sahrens 	/*
279789Sahrens 	 * Currently resilvering
280789Sahrens 	 */
281789Sahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
282789Sahrens 		return (ZPOOL_STATUS_RESILVERING);
283789Sahrens 
284789Sahrens 	/*
2851760Seschrock 	 * Outdated, but usable, version
286789Sahrens 	 */
2874577Sahrens 	if (version < SPA_VERSION)
2881760Seschrock 		return (ZPOOL_STATUS_VERSION_OLDER);
289789Sahrens 
290789Sahrens 	return (ZPOOL_STATUS_OK);
291789Sahrens }
292789Sahrens 
293789Sahrens zpool_status_t
294789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
295789Sahrens {
296*7754SJeff.Bonwick@Sun.COM 	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
297789Sahrens 
298789Sahrens 	if (ret >= NMSGID)
299789Sahrens 		*msgid = NULL;
300789Sahrens 	else
3014451Seschrock 		*msgid = zfs_msgid_table[ret];
302789Sahrens 
303789Sahrens 	return (ret);
304789Sahrens }
305789Sahrens 
306789Sahrens zpool_status_t
307789Sahrens zpool_import_status(nvlist_t *config, char **msgid)
308789Sahrens {
309*7754SJeff.Bonwick@Sun.COM 	zpool_status_t ret = check_status(config, B_TRUE);
310789Sahrens 
311789Sahrens 	if (ret >= NMSGID)
312789Sahrens 		*msgid = NULL;
313789Sahrens 	else
3143975Sek110237 		*msgid = zfs_msgid_table[ret];
315789Sahrens 
316789Sahrens 	return (ret);
317789Sahrens }
318