1789Sahrens /*
2789Sahrens * CDDL HEADER START
3789Sahrens *
4789Sahrens * The contents of this file are subject to the terms of the
51544Seschrock * Common Development and Distribution License (the "License").
61544Seschrock * You may not use this file except in compliance with the License.
7789Sahrens *
8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens * or http://www.opensolaris.org/os/licensing.
10789Sahrens * See the License for the specific language governing permissions
11789Sahrens * and limitations under the License.
12789Sahrens *
13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens *
19789Sahrens * CDDL HEADER END
20789Sahrens */
21789Sahrens /*
22*12296SLin.Ling@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23789Sahrens */
24789Sahrens
25789Sahrens /*
26789Sahrens * This file contains the functions which analyze the status of a pool. This
27789Sahrens * include both the status of an active pool, as well as the status exported
28789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of
29789Sahrens * the pool. This status is independent (to a certain degree) from the state of
304451Seschrock * the pool. A pool's state describes only whether or not it is capable of
31789Sahrens * providing the necessary fault tolerance for data. The status describes the
32789Sahrens * overall status of devices. A pool that is online can still have a device
33789Sahrens * that is experiencing errors.
34789Sahrens *
35789Sahrens * Only a subset of the possible faults can be detected using 'zpool status',
36789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation
37789Sahrens * is left up to the caller, depending on whether it is a live pool or an
38789Sahrens * import.
39789Sahrens */
40789Sahrens
41789Sahrens #include <libzfs.h>
42789Sahrens #include <string.h>
433975Sek110237 #include <unistd.h>
44789Sahrens #include "libzfs_impl.h"
45789Sahrens
46789Sahrens /*
474451Seschrock * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines
48789Sahrens * in libzfs.h. Note that there are some status results which go past the end
49789Sahrens * of this table, and hence have no associated message ID.
50789Sahrens */
513975Sek110237 static char *zfs_msgid_table[] = {
52789Sahrens "ZFS-8000-14",
53789Sahrens "ZFS-8000-2Q",
54789Sahrens "ZFS-8000-3C",
55789Sahrens "ZFS-8000-4J",
56789Sahrens "ZFS-8000-5E",
57789Sahrens "ZFS-8000-6X",
58789Sahrens "ZFS-8000-72",
59789Sahrens "ZFS-8000-8A",
60789Sahrens "ZFS-8000-9P",
613975Sek110237 "ZFS-8000-A5",
626523Sek110237 "ZFS-8000-EY",
636523Sek110237 "ZFS-8000-HC",
647294Sperrin "ZFS-8000-JQ",
657294Sperrin "ZFS-8000-K4",
66789Sahrens };
67789Sahrens
683975Sek110237 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
69789Sahrens
70789Sahrens /* ARGSUSED */
71789Sahrens static int
vdev_missing(uint64_t state,uint64_t aux,uint64_t errs)72789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
73789Sahrens {
74789Sahrens return (state == VDEV_STATE_CANT_OPEN &&
75789Sahrens aux == VDEV_AUX_OPEN_FAILED);
76789Sahrens }
77789Sahrens
78789Sahrens /* ARGSUSED */
79789Sahrens static int
vdev_faulted(uint64_t state,uint64_t aux,uint64_t errs)804451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
814451Seschrock {
824451Seschrock return (state == VDEV_STATE_FAULTED);
834451Seschrock }
844451Seschrock
854451Seschrock /* ARGSUSED */
864451Seschrock static int
vdev_errors(uint64_t state,uint64_t aux,uint64_t errs)87789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
88789Sahrens {
894451Seschrock return (state == VDEV_STATE_DEGRADED || errs != 0);
90789Sahrens }
91789Sahrens
92789Sahrens /* ARGSUSED */
93789Sahrens static int
vdev_broken(uint64_t state,uint64_t aux,uint64_t errs)94789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
95789Sahrens {
96789Sahrens return (state == VDEV_STATE_CANT_OPEN);
97789Sahrens }
98789Sahrens
99789Sahrens /* ARGSUSED */
100789Sahrens static int
vdev_offlined(uint64_t state,uint64_t aux,uint64_t errs)101789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
102789Sahrens {
103789Sahrens return (state == VDEV_STATE_OFFLINE);
104789Sahrens }
105789Sahrens
10610151SGeorge.Wilson@Sun.COM /* ARGSUSED */
10710151SGeorge.Wilson@Sun.COM static int
vdev_removed(uint64_t state,uint64_t aux,uint64_t errs)10810151SGeorge.Wilson@Sun.COM vdev_removed(uint64_t state, uint64_t aux, uint64_t errs)
10910151SGeorge.Wilson@Sun.COM {
11010151SGeorge.Wilson@Sun.COM return (state == VDEV_STATE_REMOVED);
11110151SGeorge.Wilson@Sun.COM }
11210151SGeorge.Wilson@Sun.COM
113789Sahrens /*
114789Sahrens * Detect if any leaf devices that have seen errors or could not be opened.
115789Sahrens */
1162082Seschrock static boolean_t
find_vdev_problem(nvlist_t * vdev,int (* func)(uint64_t,uint64_t,uint64_t))117789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
118789Sahrens {
119789Sahrens nvlist_t **child;
120789Sahrens vdev_stat_t *vs;
121789Sahrens uint_t c, children;
122789Sahrens char *type;
123789Sahrens
124789Sahrens /*
125789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in
126789Sahrens * the process of repairing any such errors, and don't want to call them
127789Sahrens * out again. We'll pick up the fact that a resilver is happening
128789Sahrens * later.
129789Sahrens */
130789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
131789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
1322082Seschrock return (B_FALSE);
133789Sahrens
134789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
135789Sahrens &children) == 0) {
136789Sahrens for (c = 0; c < children; c++)
137789Sahrens if (find_vdev_problem(child[c], func))
1382082Seschrock return (B_TRUE);
139789Sahrens } else {
140*12296SLin.Ling@Sun.COM verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
141789Sahrens (uint64_t **)&vs, &c) == 0);
142789Sahrens
143789Sahrens if (func(vs->vs_state, vs->vs_aux,
144789Sahrens vs->vs_read_errors +
145789Sahrens vs->vs_write_errors +
146789Sahrens vs->vs_checksum_errors))
1472082Seschrock return (B_TRUE);
148789Sahrens }
149789Sahrens
1502082Seschrock return (B_FALSE);
151789Sahrens }
152789Sahrens
153789Sahrens /*
154789Sahrens * Active pool health status.
155789Sahrens *
156789Sahrens * To determine the status for a pool, we make several passes over the config,
157789Sahrens * picking the most egregious error we find. In order of importance, we do the
158789Sahrens * following:
159789Sahrens *
160789Sahrens * - Check for a complete and valid configuration
1614451Seschrock * - Look for any faulted or missing devices in a non-replicated config
1621544Seschrock * - Check for any data errors
1634451Seschrock * - Check for any faulted or missing devices in a replicated config
164789Sahrens * - Look for any devices showing errors
165789Sahrens * - Check for any resilvering devices
166789Sahrens *
167789Sahrens * There can obviously be multiple errors within a single pool, so this routine
168789Sahrens * only picks the most damaging of all the current errors to report.
169789Sahrens */
170789Sahrens static zpool_status_t
check_status(nvlist_t * config,boolean_t isimport)1717754SJeff.Bonwick@Sun.COM check_status(nvlist_t *config, boolean_t isimport)
172789Sahrens {
173789Sahrens nvlist_t *nvroot;
174789Sahrens vdev_stat_t *vs;
175*12296SLin.Ling@Sun.COM pool_scan_stat_t *ps = NULL;
176*12296SLin.Ling@Sun.COM uint_t vsc, psc;
1771544Seschrock uint64_t nerr;
1781760Seschrock uint64_t version;
1793975Sek110237 uint64_t stateval;
1807754SJeff.Bonwick@Sun.COM uint64_t suspended;
1813975Sek110237 uint64_t hostid = 0;
182789Sahrens
1831760Seschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1841760Seschrock &version) == 0);
185789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
186789Sahrens &nvroot) == 0);
187*12296SLin.Ling@Sun.COM verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
188789Sahrens (uint64_t **)&vs, &vsc) == 0);
1893975Sek110237 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1903975Sek110237 &stateval) == 0);
191*12296SLin.Ling@Sun.COM
192*12296SLin.Ling@Sun.COM /*
193*12296SLin.Ling@Sun.COM * Currently resilvering a vdev
194*12296SLin.Ling@Sun.COM */
195*12296SLin.Ling@Sun.COM (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
196*12296SLin.Ling@Sun.COM (uint64_t **)&ps, &psc);
197*12296SLin.Ling@Sun.COM if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
198*12296SLin.Ling@Sun.COM ps->pss_state == DSS_SCANNING)
199*12296SLin.Ling@Sun.COM return (ZPOOL_STATUS_RESILVERING);
2003975Sek110237
2013975Sek110237 /*
2023975Sek110237 * Pool last accessed by another system.
2033975Sek110237 */
204*12296SLin.Ling@Sun.COM (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
2053975Sek110237 if (hostid != 0 && (unsigned long)hostid != gethostid() &&
2063975Sek110237 stateval == POOL_STATE_ACTIVE)
2073975Sek110237 return (ZPOOL_STATUS_HOSTID_MISMATCH);
208789Sahrens
209789Sahrens /*
2101760Seschrock * Newer on-disk version.
2111760Seschrock */
2121760Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2131760Seschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER)
2141760Seschrock return (ZPOOL_STATUS_VERSION_NEWER);
2151760Seschrock
2161760Seschrock /*
217789Sahrens * Check that the config is complete.
218789Sahrens */
219789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2201544Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
221789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM);
2221544Seschrock
2231544Seschrock /*
2247754SJeff.Bonwick@Sun.COM * Check whether the pool has suspended due to failed I/O.
2256523Sek110237 */
2267754SJeff.Bonwick@Sun.COM if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
2277754SJeff.Bonwick@Sun.COM &suspended) == 0) {
2287754SJeff.Bonwick@Sun.COM if (suspended == ZIO_FAILURE_MODE_CONTINUE)
2296523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
2307754SJeff.Bonwick@Sun.COM return (ZPOOL_STATUS_IO_FAILURE_WAIT);
2316523Sek110237 }
2326523Sek110237
2336523Sek110237 /*
2347294Sperrin * Could not read a log.
2357294Sperrin */
2367294Sperrin if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2377294Sperrin vs->vs_aux == VDEV_AUX_BAD_LOG) {
2387294Sperrin return (ZPOOL_STATUS_BAD_LOG);
2397294Sperrin }
2407294Sperrin
2417294Sperrin /*
2424451Seschrock * Bad devices in non-replicated config.
2431544Seschrock */
2441544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2454451Seschrock find_vdev_problem(nvroot, vdev_faulted))
2464451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_NR);
2474451Seschrock
2484451Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2491544Seschrock find_vdev_problem(nvroot, vdev_missing))
2501544Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR);
2511544Seschrock
2521544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2531544Seschrock find_vdev_problem(nvroot, vdev_broken))
2541544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
2551544Seschrock
2561544Seschrock /*
2571544Seschrock * Corrupted pool metadata
2581544Seschrock */
2591544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
2601544Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
2611544Seschrock return (ZPOOL_STATUS_CORRUPT_POOL);
2621544Seschrock
2631544Seschrock /*
2641544Seschrock * Persistent data errors.
2651544Seschrock */
2661544Seschrock if (!isimport) {
2671544Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
2681544Seschrock &nerr) == 0 && nerr != 0)
2691544Seschrock return (ZPOOL_STATUS_CORRUPT_DATA);
270789Sahrens }
271789Sahrens
272789Sahrens /*
2731544Seschrock * Missing devices in a replicated config.
274789Sahrens */
2754451Seschrock if (find_vdev_problem(nvroot, vdev_faulted))
2764451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_R);
2771544Seschrock if (find_vdev_problem(nvroot, vdev_missing))
2781544Seschrock return (ZPOOL_STATUS_MISSING_DEV_R);
2791544Seschrock if (find_vdev_problem(nvroot, vdev_broken))
2801544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R);
281789Sahrens
282789Sahrens /*
283789Sahrens * Devices with errors
284789Sahrens */
285789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors))
286789Sahrens return (ZPOOL_STATUS_FAILING_DEV);
287789Sahrens
288789Sahrens /*
289789Sahrens * Offlined devices
290789Sahrens */
291789Sahrens if (find_vdev_problem(nvroot, vdev_offlined))
292789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV);
293789Sahrens
294789Sahrens /*
29510151SGeorge.Wilson@Sun.COM * Removed device
29610151SGeorge.Wilson@Sun.COM */
29710151SGeorge.Wilson@Sun.COM if (find_vdev_problem(nvroot, vdev_removed))
29810151SGeorge.Wilson@Sun.COM return (ZPOOL_STATUS_REMOVED_DEV);
29910151SGeorge.Wilson@Sun.COM
30010151SGeorge.Wilson@Sun.COM /*
3011760Seschrock * Outdated, but usable, version
302789Sahrens */
3034577Sahrens if (version < SPA_VERSION)
3041760Seschrock return (ZPOOL_STATUS_VERSION_OLDER);
305789Sahrens
306789Sahrens return (ZPOOL_STATUS_OK);
307789Sahrens }
308789Sahrens
309789Sahrens zpool_status_t
zpool_get_status(zpool_handle_t * zhp,char ** msgid)310789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
311789Sahrens {
3127754SJeff.Bonwick@Sun.COM zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
313789Sahrens
314789Sahrens if (ret >= NMSGID)
315789Sahrens *msgid = NULL;
316789Sahrens else
3174451Seschrock *msgid = zfs_msgid_table[ret];
318789Sahrens
319789Sahrens return (ret);
320789Sahrens }
321789Sahrens
322789Sahrens zpool_status_t
zpool_import_status(nvlist_t * config,char ** msgid)323789Sahrens zpool_import_status(nvlist_t *config, char **msgid)
324789Sahrens {
3257754SJeff.Bonwick@Sun.COM zpool_status_t ret = check_status(config, B_TRUE);
326789Sahrens
327789Sahrens if (ret >= NMSGID)
328789Sahrens *msgid = NULL;
329789Sahrens else
3303975Sek110237 *msgid = zfs_msgid_table[ret];
331789Sahrens
332789Sahrens return (ret);
333789Sahrens }
33411149SGeorge.Wilson@Sun.COM
33511149SGeorge.Wilson@Sun.COM static void
dump_ddt_stat(const ddt_stat_t * dds,int h)33611149SGeorge.Wilson@Sun.COM dump_ddt_stat(const ddt_stat_t *dds, int h)
33711149SGeorge.Wilson@Sun.COM {
33811149SGeorge.Wilson@Sun.COM char refcnt[6];
33911149SGeorge.Wilson@Sun.COM char blocks[6], lsize[6], psize[6], dsize[6];
34011149SGeorge.Wilson@Sun.COM char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
34111149SGeorge.Wilson@Sun.COM
34211149SGeorge.Wilson@Sun.COM if (dds == NULL || dds->dds_blocks == 0)
34311149SGeorge.Wilson@Sun.COM return;
34411149SGeorge.Wilson@Sun.COM
34511149SGeorge.Wilson@Sun.COM if (h == -1)
34611149SGeorge.Wilson@Sun.COM (void) strcpy(refcnt, "Total");
34711149SGeorge.Wilson@Sun.COM else
34811149SGeorge.Wilson@Sun.COM zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
34911149SGeorge.Wilson@Sun.COM
35011149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
35111149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
35211149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
35311149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
35411149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
35511149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
35611149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
35711149SGeorge.Wilson@Sun.COM zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
35811149SGeorge.Wilson@Sun.COM
35911149SGeorge.Wilson@Sun.COM (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
36011149SGeorge.Wilson@Sun.COM refcnt,
36111149SGeorge.Wilson@Sun.COM blocks, lsize, psize, dsize,
36211149SGeorge.Wilson@Sun.COM ref_blocks, ref_lsize, ref_psize, ref_dsize);
36311149SGeorge.Wilson@Sun.COM }
36411149SGeorge.Wilson@Sun.COM
36511149SGeorge.Wilson@Sun.COM /*
36611149SGeorge.Wilson@Sun.COM * Print the DDT histogram and the column totals.
36711149SGeorge.Wilson@Sun.COM */
36811149SGeorge.Wilson@Sun.COM void
zpool_dump_ddt(const ddt_stat_t * dds_total,const ddt_histogram_t * ddh)36911149SGeorge.Wilson@Sun.COM zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
37011149SGeorge.Wilson@Sun.COM {
37111149SGeorge.Wilson@Sun.COM int h;
37211149SGeorge.Wilson@Sun.COM
37311149SGeorge.Wilson@Sun.COM (void) printf("\n");
37411149SGeorge.Wilson@Sun.COM
37511149SGeorge.Wilson@Sun.COM (void) printf("bucket "
37611149SGeorge.Wilson@Sun.COM " allocated "
37711149SGeorge.Wilson@Sun.COM " referenced \n");
37811149SGeorge.Wilson@Sun.COM (void) printf("______ "
37911149SGeorge.Wilson@Sun.COM "______________________________ "
38011149SGeorge.Wilson@Sun.COM "______________________________\n");
38111149SGeorge.Wilson@Sun.COM
38211149SGeorge.Wilson@Sun.COM (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
38311149SGeorge.Wilson@Sun.COM "refcnt",
38411149SGeorge.Wilson@Sun.COM "blocks", "LSIZE", "PSIZE", "DSIZE",
38511149SGeorge.Wilson@Sun.COM "blocks", "LSIZE", "PSIZE", "DSIZE");
38611149SGeorge.Wilson@Sun.COM
38711149SGeorge.Wilson@Sun.COM (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
38811149SGeorge.Wilson@Sun.COM "------",
38911149SGeorge.Wilson@Sun.COM "------", "-----", "-----", "-----",
39011149SGeorge.Wilson@Sun.COM "------", "-----", "-----", "-----");
39111149SGeorge.Wilson@Sun.COM
39211149SGeorge.Wilson@Sun.COM for (h = 0; h < 64; h++)
39311149SGeorge.Wilson@Sun.COM dump_ddt_stat(&ddh->ddh_stat[h], h);
39411149SGeorge.Wilson@Sun.COM
39511149SGeorge.Wilson@Sun.COM dump_ddt_stat(dds_total, -1);
39611149SGeorge.Wilson@Sun.COM
39711149SGeorge.Wilson@Sun.COM (void) printf("\n");
39811149SGeorge.Wilson@Sun.COM }
399