1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
22*10151SGeorge.Wilson@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens /*
27789Sahrens  * This file contains the functions which analyze the status of a pool.  This
28789Sahrens  * include both the status of an active pool, as well as the status exported
29789Sahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
30789Sahrens  * the pool.  This status is independent (to a certain degree) from the state of
314451Seschrock  * the pool.  A pool's state describes only whether or not it is capable of
32789Sahrens  * providing the necessary fault tolerance for data.  The status describes the
33789Sahrens  * overall status of devices.  A pool that is online can still have a device
34789Sahrens  * that is experiencing errors.
35789Sahrens  *
36789Sahrens  * Only a subset of the possible faults can be detected using 'zpool status',
37789Sahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
38789Sahrens  * is left up to the caller, depending on whether it is a live pool or an
39789Sahrens  * import.
40789Sahrens  */
41789Sahrens 
42789Sahrens #include <libzfs.h>
43789Sahrens #include <string.h>
443975Sek110237 #include <unistd.h>
45789Sahrens #include "libzfs_impl.h"
46789Sahrens 
47789Sahrens /*
484451Seschrock  * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
49789Sahrens  * in libzfs.h.  Note that there are some status results which go past the end
50789Sahrens  * of this table, and hence have no associated message ID.
51789Sahrens  */
523975Sek110237 static char *zfs_msgid_table[] = {
53789Sahrens 	"ZFS-8000-14",
54789Sahrens 	"ZFS-8000-2Q",
55789Sahrens 	"ZFS-8000-3C",
56789Sahrens 	"ZFS-8000-4J",
57789Sahrens 	"ZFS-8000-5E",
58789Sahrens 	"ZFS-8000-6X",
59789Sahrens 	"ZFS-8000-72",
60789Sahrens 	"ZFS-8000-8A",
61789Sahrens 	"ZFS-8000-9P",
623975Sek110237 	"ZFS-8000-A5",
636523Sek110237 	"ZFS-8000-EY",
646523Sek110237 	"ZFS-8000-HC",
657294Sperrin 	"ZFS-8000-JQ",
667294Sperrin 	"ZFS-8000-K4",
67789Sahrens };
68789Sahrens 
693975Sek110237 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
70789Sahrens 
71789Sahrens /* ARGSUSED */
72789Sahrens static int
73789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
74789Sahrens {
75789Sahrens 	return (state == VDEV_STATE_CANT_OPEN &&
76789Sahrens 	    aux == VDEV_AUX_OPEN_FAILED);
77789Sahrens }
78789Sahrens 
79789Sahrens /* ARGSUSED */
80789Sahrens static int
814451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
824451Seschrock {
834451Seschrock 	return (state == VDEV_STATE_FAULTED);
844451Seschrock }
854451Seschrock 
864451Seschrock /* ARGSUSED */
874451Seschrock static int
88789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
89789Sahrens {
904451Seschrock 	return (state == VDEV_STATE_DEGRADED || errs != 0);
91789Sahrens }
92789Sahrens 
93789Sahrens /* ARGSUSED */
94789Sahrens static int
95789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
96789Sahrens {
97789Sahrens 	return (state == VDEV_STATE_CANT_OPEN);
98789Sahrens }
99789Sahrens 
100789Sahrens /* ARGSUSED */
101789Sahrens static int
102789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
103789Sahrens {
104789Sahrens 	return (state == VDEV_STATE_OFFLINE);
105789Sahrens }
106789Sahrens 
107*10151SGeorge.Wilson@Sun.COM /* ARGSUSED */
108*10151SGeorge.Wilson@Sun.COM static int
109*10151SGeorge.Wilson@Sun.COM vdev_removed(uint64_t state, uint64_t aux, uint64_t errs)
110*10151SGeorge.Wilson@Sun.COM {
111*10151SGeorge.Wilson@Sun.COM 	return (state == VDEV_STATE_REMOVED);
112*10151SGeorge.Wilson@Sun.COM }
113*10151SGeorge.Wilson@Sun.COM 
114789Sahrens /*
115789Sahrens  * Detect if any leaf devices that have seen errors or could not be opened.
116789Sahrens  */
1172082Seschrock static boolean_t
118789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
119789Sahrens {
120789Sahrens 	nvlist_t **child;
121789Sahrens 	vdev_stat_t *vs;
122789Sahrens 	uint_t c, children;
123789Sahrens 	char *type;
124789Sahrens 
125789Sahrens 	/*
126789Sahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
127789Sahrens 	 * the process of repairing any such errors, and don't want to call them
128789Sahrens 	 * out again.  We'll pick up the fact that a resilver is happening
129789Sahrens 	 * later.
130789Sahrens 	 */
131789Sahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
132789Sahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
1332082Seschrock 		return (B_FALSE);
134789Sahrens 
135789Sahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
136789Sahrens 	    &children) == 0) {
137789Sahrens 		for (c = 0; c < children; c++)
138789Sahrens 			if (find_vdev_problem(child[c], func))
1392082Seschrock 				return (B_TRUE);
140789Sahrens 	} else {
141789Sahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
142789Sahrens 		    (uint64_t **)&vs, &c) == 0);
143789Sahrens 
144789Sahrens 		if (func(vs->vs_state, vs->vs_aux,
145789Sahrens 		    vs->vs_read_errors +
146789Sahrens 		    vs->vs_write_errors +
147789Sahrens 		    vs->vs_checksum_errors))
1482082Seschrock 			return (B_TRUE);
149789Sahrens 	}
150789Sahrens 
1512082Seschrock 	return (B_FALSE);
152789Sahrens }
153789Sahrens 
154789Sahrens /*
155789Sahrens  * Active pool health status.
156789Sahrens  *
157789Sahrens  * To determine the status for a pool, we make several passes over the config,
158789Sahrens  * picking the most egregious error we find.  In order of importance, we do the
159789Sahrens  * following:
160789Sahrens  *
161789Sahrens  *	- Check for a complete and valid configuration
1624451Seschrock  *	- Look for any faulted or missing devices in a non-replicated config
1631544Seschrock  *	- Check for any data errors
1644451Seschrock  *	- Check for any faulted or missing devices in a replicated config
165789Sahrens  *	- Look for any devices showing errors
166789Sahrens  *	- Check for any resilvering devices
167789Sahrens  *
168789Sahrens  * There can obviously be multiple errors within a single pool, so this routine
169789Sahrens  * only picks the most damaging of all the current errors to report.
170789Sahrens  */
171789Sahrens static zpool_status_t
1727754SJeff.Bonwick@Sun.COM check_status(nvlist_t *config, boolean_t isimport)
173789Sahrens {
174789Sahrens 	nvlist_t *nvroot;
175789Sahrens 	vdev_stat_t *vs;
176789Sahrens 	uint_t vsc;
1771544Seschrock 	uint64_t nerr;
1781760Seschrock 	uint64_t version;
1793975Sek110237 	uint64_t stateval;
1807754SJeff.Bonwick@Sun.COM 	uint64_t suspended;
1813975Sek110237 	uint64_t hostid = 0;
182789Sahrens 
1831760Seschrock 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1841760Seschrock 	    &version) == 0);
185789Sahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
186789Sahrens 	    &nvroot) == 0);
187789Sahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
188789Sahrens 	    (uint64_t **)&vs, &vsc) == 0);
1893975Sek110237 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1903975Sek110237 	    &stateval) == 0);
1913975Sek110237 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
1923975Sek110237 
1933975Sek110237 	/*
1943975Sek110237 	 * Pool last accessed by another system.
1953975Sek110237 	 */
1963975Sek110237 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
1973975Sek110237 	    stateval == POOL_STATE_ACTIVE)
1983975Sek110237 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
199789Sahrens 
200789Sahrens 	/*
2011760Seschrock 	 * Newer on-disk version.
2021760Seschrock 	 */
2031760Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2041760Seschrock 	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
2051760Seschrock 		return (ZPOOL_STATUS_VERSION_NEWER);
2061760Seschrock 
2071760Seschrock 	/*
208789Sahrens 	 * Check that the config is complete.
209789Sahrens 	 */
210789Sahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2111544Seschrock 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
212789Sahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
2131544Seschrock 
2141544Seschrock 	/*
2157754SJeff.Bonwick@Sun.COM 	 * Check whether the pool has suspended due to failed I/O.
2166523Sek110237 	 */
2177754SJeff.Bonwick@Sun.COM 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
2187754SJeff.Bonwick@Sun.COM 	    &suspended) == 0) {
2197754SJeff.Bonwick@Sun.COM 		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
2206523Sek110237 			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
2217754SJeff.Bonwick@Sun.COM 		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2226523Sek110237 	}
2236523Sek110237 
2246523Sek110237 	/*
2257294Sperrin 	 * Could not read a log.
2267294Sperrin 	 */
2277294Sperrin 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2287294Sperrin 	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
2297294Sperrin 		return (ZPOOL_STATUS_BAD_LOG);
2307294Sperrin 	}
2317294Sperrin 
2327294Sperrin 	/*
2334451Seschrock 	 * Bad devices in non-replicated config.
2341544Seschrock 	 */
2351544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2364451Seschrock 	    find_vdev_problem(nvroot, vdev_faulted))
2374451Seschrock 		return (ZPOOL_STATUS_FAULTED_DEV_NR);
2384451Seschrock 
2394451Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2401544Seschrock 	    find_vdev_problem(nvroot, vdev_missing))
2411544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_NR);
2421544Seschrock 
2431544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2441544Seschrock 	    find_vdev_problem(nvroot, vdev_broken))
2451544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
2461544Seschrock 
2471544Seschrock 	/*
2481544Seschrock 	 * Corrupted pool metadata
2491544Seschrock 	 */
2501544Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2511544Seschrock 	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
2521544Seschrock 		return (ZPOOL_STATUS_CORRUPT_POOL);
2531544Seschrock 
2541544Seschrock 	/*
2551544Seschrock 	 * Persistent data errors.
2561544Seschrock 	 */
2571544Seschrock 	if (!isimport) {
2581544Seschrock 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
2591544Seschrock 		    &nerr) == 0 && nerr != 0)
2601544Seschrock 			return (ZPOOL_STATUS_CORRUPT_DATA);
261789Sahrens 	}
262789Sahrens 
263789Sahrens 	/*
2641544Seschrock 	 * Missing devices in a replicated config.
265789Sahrens 	 */
2664451Seschrock 	if (find_vdev_problem(nvroot, vdev_faulted))
2674451Seschrock 		return (ZPOOL_STATUS_FAULTED_DEV_R);
2681544Seschrock 	if (find_vdev_problem(nvroot, vdev_missing))
2691544Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_R);
2701544Seschrock 	if (find_vdev_problem(nvroot, vdev_broken))
2711544Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
272789Sahrens 
273789Sahrens 	/*
274789Sahrens 	 * Devices with errors
275789Sahrens 	 */
276789Sahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
277789Sahrens 		return (ZPOOL_STATUS_FAILING_DEV);
278789Sahrens 
279789Sahrens 	/*
280789Sahrens 	 * Offlined devices
281789Sahrens 	 */
282789Sahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
283789Sahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
284789Sahrens 
285789Sahrens 	/*
286*10151SGeorge.Wilson@Sun.COM 	 * Removed device
287*10151SGeorge.Wilson@Sun.COM 	 */
288*10151SGeorge.Wilson@Sun.COM 	if (find_vdev_problem(nvroot, vdev_removed))
289*10151SGeorge.Wilson@Sun.COM 		return (ZPOOL_STATUS_REMOVED_DEV);
290*10151SGeorge.Wilson@Sun.COM 
291*10151SGeorge.Wilson@Sun.COM 	/*
292789Sahrens 	 * Currently resilvering
293789Sahrens 	 */
294789Sahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
295789Sahrens 		return (ZPOOL_STATUS_RESILVERING);
296789Sahrens 
297789Sahrens 	/*
2981760Seschrock 	 * Outdated, but usable, version
299789Sahrens 	 */
3004577Sahrens 	if (version < SPA_VERSION)
3011760Seschrock 		return (ZPOOL_STATUS_VERSION_OLDER);
302789Sahrens 
303789Sahrens 	return (ZPOOL_STATUS_OK);
304789Sahrens }
305789Sahrens 
306789Sahrens zpool_status_t
307789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
308789Sahrens {
3097754SJeff.Bonwick@Sun.COM 	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
310789Sahrens 
311789Sahrens 	if (ret >= NMSGID)
312789Sahrens 		*msgid = NULL;
313789Sahrens 	else
3144451Seschrock 		*msgid = zfs_msgid_table[ret];
315789Sahrens 
316789Sahrens 	return (ret);
317789Sahrens }
318789Sahrens 
319789Sahrens zpool_status_t
320789Sahrens zpool_import_status(nvlist_t *config, char **msgid)
321789Sahrens {
3227754SJeff.Bonwick@Sun.COM 	zpool_status_t ret = check_status(config, B_TRUE);
323789Sahrens 
324789Sahrens 	if (ret >= NMSGID)
325789Sahrens 		*msgid = NULL;
326789Sahrens 	else
3273975Sek110237 		*msgid = zfs_msgid_table[ret];
328789Sahrens 
329789Sahrens 	return (ret);
330789Sahrens }
331