1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
226523Sek110237  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens /*
29789Sahrens  * This file contains the functions which analyze the status of a pool.  This
30789Sahrens  * include both the status of an active pool, as well as the status exported
31789Sahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
32789Sahrens  * the pool.  This status is independent (to a certain degree) from the state of
334451Seschrock  * the pool.  A pool's state describes only whether or not it is capable of
34789Sahrens  * providing the necessary fault tolerance for data.  The status describes the
35789Sahrens  * overall status of devices.  A pool that is online can still have a device
36789Sahrens  * that is experiencing errors.
37789Sahrens  *
38789Sahrens  * Only a subset of the possible faults can be detected using 'zpool status',
39789Sahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
40789Sahrens  * is left up to the caller, depending on whether it is a live pool or an
41789Sahrens  * import.
42789Sahrens  */
43789Sahrens 
44789Sahrens #include <libzfs.h>
45789Sahrens #include <string.h>
463975Sek110237 #include <unistd.h>
47789Sahrens #include "libzfs_impl.h"
48789Sahrens 
49789Sahrens /*
504451Seschrock  * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
51789Sahrens  * in libzfs.h.  Note that there are some status results which go past the end
52789Sahrens  * of this table, and hence have no associated message ID.
53789Sahrens  */
543975Sek110237 static char *zfs_msgid_table[] = {
55789Sahrens 	"ZFS-8000-14",
56789Sahrens 	"ZFS-8000-2Q",
57789Sahrens 	"ZFS-8000-3C",
58789Sahrens 	"ZFS-8000-4J",
59789Sahrens 	"ZFS-8000-5E",
60789Sahrens 	"ZFS-8000-6X",
61789Sahrens 	"ZFS-8000-72",
62789Sahrens 	"ZFS-8000-8A",
63789Sahrens 	"ZFS-8000-9P",
643975Sek110237 	"ZFS-8000-A5",
656523Sek110237 	"ZFS-8000-EY",
666523Sek110237 	"ZFS-8000-HC",
67*7294Sperrin 	"ZFS-8000-JQ",
68*7294Sperrin 	"ZFS-8000-K4",
69789Sahrens };
70789Sahrens 
713975Sek110237 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
72789Sahrens 
73789Sahrens /* ARGSUSED */
74789Sahrens static int
75789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
76789Sahrens {
77789Sahrens 	return (state == VDEV_STATE_CANT_OPEN &&
78789Sahrens 	    aux == VDEV_AUX_OPEN_FAILED);
79789Sahrens }
80789Sahrens 
81789Sahrens /* ARGSUSED */
82789Sahrens static int
834451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
844451Seschrock {
854451Seschrock 	return (state == VDEV_STATE_FAULTED);
864451Seschrock }
874451Seschrock 
884451Seschrock /* ARGSUSED */
894451Seschrock static int
90789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
91789Sahrens {
924451Seschrock 	return (state == VDEV_STATE_DEGRADED || errs != 0);
93789Sahrens }
94789Sahrens 
95789Sahrens /* ARGSUSED */
96789Sahrens static int
97789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
98789Sahrens {
99789Sahrens 	return (state == VDEV_STATE_CANT_OPEN);
100789Sahrens }
101789Sahrens 
102789Sahrens /* ARGSUSED */
103789Sahrens static int
104789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
105789Sahrens {
106789Sahrens 	return (state == VDEV_STATE_OFFLINE);
107789Sahrens }
108789Sahrens 
109789Sahrens /*
110789Sahrens  * Detect if any leaf devices that have seen errors or could not be opened.
111789Sahrens  */
1122082Seschrock static boolean_t
113789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
114789Sahrens {
115789Sahrens 	nvlist_t **child;
116789Sahrens 	vdev_stat_t *vs;
117789Sahrens 	uint_t c, children;
118789Sahrens 	char *type;
119789Sahrens 
120789Sahrens 	/*
121789Sahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
122789Sahrens 	 * the process of repairing any such errors, and don't want to call them
123789Sahrens 	 * out again.  We'll pick up the fact that a resilver is happening
124789Sahrens 	 * later.
125789Sahrens 	 */
126789Sahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
127789Sahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
1282082Seschrock 		return (B_FALSE);
129789Sahrens 
130789Sahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
131789Sahrens 	    &children) == 0) {
132789Sahrens 		for (c = 0; c < children; c++)
133789Sahrens 			if (find_vdev_problem(child[c], func))
1342082Seschrock 				return (B_TRUE);
135789Sahrens 	} else {
136789Sahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
137789Sahrens 		    (uint64_t **)&vs, &c) == 0);
138789Sahrens 
139789Sahrens 		if (func(vs->vs_state, vs->vs_aux,
140789Sahrens 		    vs->vs_read_errors +
141789Sahrens 		    vs->vs_write_errors +
142789Sahrens 		    vs->vs_checksum_errors))
1432082Seschrock 			return (B_TRUE);
144789Sahrens 	}
145789Sahrens 
1462082Seschrock 	return (B_FALSE);
147789Sahrens }
148789Sahrens 
149789Sahrens /*
150789Sahrens  * Active pool health status.
151789Sahrens  *
152789Sahrens  * To determine the status for a pool, we make several passes over the config,
153789Sahrens  * picking the most egregious error we find.  In order of importance, we do the
154789Sahrens  * following:
155789Sahrens  *
156789Sahrens  *	- Check for a complete and valid configuration
1574451Seschrock  *	- Look for any faulted or missing devices in a non-replicated config
1581544Seschrock  *	- Check for any data errors
1594451Seschrock  *	- Check for any faulted or missing devices in a replicated config
160789Sahrens  *	- Look for any devices showing errors
161789Sahrens  *	- Check for any resilvering devices
162789Sahrens  *
163789Sahrens  * There can obviously be multiple errors within a single pool, so this routine
164789Sahrens  * only picks the most damaging of all the current errors to report.
165789Sahrens  */
166789Sahrens static zpool_status_t
1676523Sek110237 check_status(zpool_handle_t *zhp, nvlist_t *config, boolean_t isimport)
168789Sahrens {
169789Sahrens 	nvlist_t *nvroot;
170789Sahrens 	vdev_stat_t *vs;
171789Sahrens 	uint_t vsc;
1721544Seschrock 	uint64_t nerr;
1731760Seschrock 	uint64_t version;
1743975Sek110237 	uint64_t stateval;
1753975Sek110237 	uint64_t hostid = 0;
176789Sahrens 
1771760Seschrock 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1781760Seschrock 	    &version) == 0);
179789Sahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
180789Sahrens 	    &nvroot) == 0);
181789Sahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
182789Sahrens 	    (uint64_t **)&vs, &vsc) == 0);
1833975Sek110237 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1843975Sek110237 	    &stateval) == 0);
1853975Sek110237 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
1863975Sek110237 
1873975Sek110237 	/*
1883975Sek110237 	 * Pool last accessed by another system.
1893975Sek110237 	 */
1903975Sek110237 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
1913975Sek110237 	    stateval == POOL_STATE_ACTIVE)
1923975Sek110237 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
193789Sahrens 
194789Sahrens 	/*
1951760Seschrock 	 * Newer on-disk version.
1961760Seschrock 	 */
1971760Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
1981760Seschrock 	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
1991760Seschrock 		return (ZPOOL_STATUS_VERSION_NEWER);
2001760Seschrock 
2011760Seschrock 	/*
202789Sahrens 	 * Check that the config is complete.
203789Sahrens 	 */
204789Sahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2051544Seschrock 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
206789Sahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
2071544Seschrock 
2081544Seschrock 	/*
2096523Sek110237 	 * Pool has experienced failed I/O.
2106523Sek110237 	 */
2116523Sek110237 	if (stateval == POOL_STATE_IO_FAILURE) {
2126523Sek110237 		zpool_handle_t *tmp_zhp = NULL;
2136523Sek110237 		libzfs_handle_t *hdl = NULL;
2146523Sek110237 		char property[ZPOOL_MAXPROPLEN];
2156523Sek110237 		char *failmode = NULL;
2166523Sek110237 
2176523Sek110237 		if (zhp == NULL) {
2186523Sek110237 			char *poolname;
2196523Sek110237 
2206523Sek110237 			verify(nvlist_lookup_string(config,
2216523Sek110237 			    ZPOOL_CONFIG_POOL_NAME, &poolname) == 0);
2226523Sek110237 			if ((hdl = libzfs_init()) == NULL)
2236523Sek110237 				return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2246523Sek110237 			tmp_zhp = zpool_open_canfail(hdl, poolname);
2256523Sek110237 			if (tmp_zhp == NULL) {
2266523Sek110237 				libzfs_fini(hdl);
2276523Sek110237 				return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2286523Sek110237 			}
2296523Sek110237 		}
2306523Sek110237 		if (zpool_get_prop(zhp ? zhp : tmp_zhp, ZPOOL_PROP_FAILUREMODE,
2316523Sek110237 		    property, sizeof (property), NULL) == 0)
2326523Sek110237 			failmode = property;
2336523Sek110237 		if (tmp_zhp != NULL)
2346523Sek110237 			zpool_close(tmp_zhp);
2356523Sek110237 		if (hdl != NULL)
2366523Sek110237 			libzfs_fini(hdl);
2376523Sek110237 		if (failmode == NULL)
2386523Sek110237 			return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2396523Sek110237 
2406523Sek110237 		if (strncmp(failmode, "continue", strlen("continue")) == 0)
2416523Sek110237 			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
2426523Sek110237 		else
2436523Sek110237 			return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2446523Sek110237 	}
2456523Sek110237 
2466523Sek110237 	/*
247*7294Sperrin 	 * Could not read a log.
248*7294Sperrin 	 */
249*7294Sperrin 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
250*7294Sperrin 	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
251*7294Sperrin 		return (ZPOOL_STATUS_BAD_LOG);
252*7294Sperrin 	}
253*7294Sperrin 
254*7294Sperrin 	/*
2554451Seschrock 	 * Bad devices in non-replicated config.
2561544Seschrock 	 */
2571544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2584451Seschrock 	    find_vdev_problem(nvroot, vdev_faulted))
2594451Seschrock 		return (ZPOOL_STATUS_FAULTED_DEV_NR);
2604451Seschrock 
2614451Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2621544Seschrock 	    find_vdev_problem(nvroot, vdev_missing))
2631544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_NR);
2641544Seschrock 
2651544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2661544Seschrock 	    find_vdev_problem(nvroot, vdev_broken))
2671544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
2681544Seschrock 
2691544Seschrock 	/*
2701544Seschrock 	 * Corrupted pool metadata
2711544Seschrock 	 */
2721544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2731544Seschrock 	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
2741544Seschrock 		return (ZPOOL_STATUS_CORRUPT_POOL);
2751544Seschrock 
2761544Seschrock 	/*
2771544Seschrock 	 * Persistent data errors.
2781544Seschrock 	 */
2791544Seschrock 	if (!isimport) {
2801544Seschrock 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
2811544Seschrock 		    &nerr) == 0 && nerr != 0)
2821544Seschrock 			return (ZPOOL_STATUS_CORRUPT_DATA);
283789Sahrens 	}
284789Sahrens 
285789Sahrens 	/*
2861544Seschrock 	 * Missing devices in a replicated config.
287789Sahrens 	 */
2884451Seschrock 	if (find_vdev_problem(nvroot, vdev_faulted))
2894451Seschrock 		return (ZPOOL_STATUS_FAULTED_DEV_R);
2901544Seschrock 	if (find_vdev_problem(nvroot, vdev_missing))
2911544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_R);
2921544Seschrock 	if (find_vdev_problem(nvroot, vdev_broken))
2931544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
294789Sahrens 
295789Sahrens 	/*
296789Sahrens 	 * Devices with errors
297789Sahrens 	 */
298789Sahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
299789Sahrens 		return (ZPOOL_STATUS_FAILING_DEV);
300789Sahrens 
301789Sahrens 	/*
302789Sahrens 	 * Offlined devices
303789Sahrens 	 */
304789Sahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
305789Sahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
306789Sahrens 
307789Sahrens 	/*
308789Sahrens 	 * Currently resilvering
309789Sahrens 	 */
310789Sahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
311789Sahrens 		return (ZPOOL_STATUS_RESILVERING);
312789Sahrens 
313789Sahrens 	/*
3141760Seschrock 	 * Outdated, but usable, version
315789Sahrens 	 */
3164577Sahrens 	if (version < SPA_VERSION)
3171760Seschrock 		return (ZPOOL_STATUS_VERSION_OLDER);
318789Sahrens 
319789Sahrens 	return (ZPOOL_STATUS_OK);
320789Sahrens }
321789Sahrens 
322789Sahrens zpool_status_t
323789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
324789Sahrens {
3256523Sek110237 	zpool_status_t ret = check_status(zhp, zhp->zpool_config, B_FALSE);
326789Sahrens 
327789Sahrens 	if (ret >= NMSGID)
328789Sahrens 		*msgid = NULL;
329789Sahrens 	else
3304451Seschrock 		*msgid = zfs_msgid_table[ret];
331789Sahrens 
332789Sahrens 	return (ret);
333789Sahrens }
334789Sahrens 
335789Sahrens zpool_status_t
336789Sahrens zpool_import_status(nvlist_t *config, char **msgid)
337789Sahrens {
3386523Sek110237 	zpool_status_t ret = check_status(NULL, config, B_TRUE);
339789Sahrens 
340789Sahrens 	if (ret >= NMSGID)
341789Sahrens 		*msgid = NULL;
342789Sahrens 	else
3433975Sek110237 		*msgid = zfs_msgid_table[ret];
344789Sahrens 
345789Sahrens 	return (ret);
346789Sahrens }
347