1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 226523Sek110237 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens /* 29789Sahrens * This file contains the functions which analyze the status of a pool. This 30789Sahrens * include both the status of an active pool, as well as the status exported 31789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32789Sahrens * the pool. This status is independent (to a certain degree) from the state of 334451Seschrock * the pool. A pool's state describes only whether or not it is capable of 34789Sahrens * providing the necessary fault tolerance for data. The status describes the 35789Sahrens * overall status of devices. A pool that is online can still have a device 36789Sahrens * that is experiencing errors. 37789Sahrens * 38789Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 39789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 40789Sahrens * is left up to the caller, depending on whether it is a live pool or an 41789Sahrens * import. 42789Sahrens */ 43789Sahrens 44789Sahrens #include <libzfs.h> 45789Sahrens #include <string.h> 463975Sek110237 #include <unistd.h> 47789Sahrens #include "libzfs_impl.h" 48789Sahrens 49789Sahrens /* 504451Seschrock * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 51789Sahrens * in libzfs.h. Note that there are some status results which go past the end 52789Sahrens * of this table, and hence have no associated message ID. 53789Sahrens */ 543975Sek110237 static char *zfs_msgid_table[] = { 55789Sahrens "ZFS-8000-14", 56789Sahrens "ZFS-8000-2Q", 57789Sahrens "ZFS-8000-3C", 58789Sahrens "ZFS-8000-4J", 59789Sahrens "ZFS-8000-5E", 60789Sahrens "ZFS-8000-6X", 61789Sahrens "ZFS-8000-72", 62789Sahrens "ZFS-8000-8A", 63789Sahrens "ZFS-8000-9P", 643975Sek110237 "ZFS-8000-A5", 656523Sek110237 "ZFS-8000-EY", 666523Sek110237 "ZFS-8000-HC", 67*7294Sperrin "ZFS-8000-JQ", 68*7294Sperrin "ZFS-8000-K4", 69789Sahrens }; 70789Sahrens 713975Sek110237 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 72789Sahrens 73789Sahrens /* ARGSUSED */ 74789Sahrens static int 75789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 76789Sahrens { 77789Sahrens return (state == VDEV_STATE_CANT_OPEN && 78789Sahrens aux == VDEV_AUX_OPEN_FAILED); 79789Sahrens } 80789Sahrens 81789Sahrens /* ARGSUSED */ 82789Sahrens static int 834451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 844451Seschrock { 854451Seschrock return (state == VDEV_STATE_FAULTED); 864451Seschrock } 874451Seschrock 884451Seschrock /* ARGSUSED */ 894451Seschrock static int 90789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 91789Sahrens { 924451Seschrock return (state == VDEV_STATE_DEGRADED || errs != 0); 93789Sahrens } 94789Sahrens 95789Sahrens /* ARGSUSED */ 96789Sahrens static int 97789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 98789Sahrens { 99789Sahrens return (state == VDEV_STATE_CANT_OPEN); 100789Sahrens } 101789Sahrens 102789Sahrens /* ARGSUSED */ 103789Sahrens static int 104789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 105789Sahrens { 106789Sahrens return (state == VDEV_STATE_OFFLINE); 107789Sahrens } 108789Sahrens 109789Sahrens /* 110789Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 111789Sahrens */ 1122082Seschrock static boolean_t 113789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 114789Sahrens { 115789Sahrens nvlist_t **child; 116789Sahrens vdev_stat_t *vs; 117789Sahrens uint_t c, children; 118789Sahrens char *type; 119789Sahrens 120789Sahrens /* 121789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 122789Sahrens * the process of repairing any such errors, and don't want to call them 123789Sahrens * out again. We'll pick up the fact that a resilver is happening 124789Sahrens * later. 125789Sahrens */ 126789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 127789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 1282082Seschrock return (B_FALSE); 129789Sahrens 130789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 131789Sahrens &children) == 0) { 132789Sahrens for (c = 0; c < children; c++) 133789Sahrens if (find_vdev_problem(child[c], func)) 1342082Seschrock return (B_TRUE); 135789Sahrens } else { 136789Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 137789Sahrens (uint64_t **)&vs, &c) == 0); 138789Sahrens 139789Sahrens if (func(vs->vs_state, vs->vs_aux, 140789Sahrens vs->vs_read_errors + 141789Sahrens vs->vs_write_errors + 142789Sahrens vs->vs_checksum_errors)) 1432082Seschrock return (B_TRUE); 144789Sahrens } 145789Sahrens 1462082Seschrock return (B_FALSE); 147789Sahrens } 148789Sahrens 149789Sahrens /* 150789Sahrens * Active pool health status. 151789Sahrens * 152789Sahrens * To determine the status for a pool, we make several passes over the config, 153789Sahrens * picking the most egregious error we find. In order of importance, we do the 154789Sahrens * following: 155789Sahrens * 156789Sahrens * - Check for a complete and valid configuration 1574451Seschrock * - Look for any faulted or missing devices in a non-replicated config 1581544Seschrock * - Check for any data errors 1594451Seschrock * - Check for any faulted or missing devices in a replicated config 160789Sahrens * - Look for any devices showing errors 161789Sahrens * - Check for any resilvering devices 162789Sahrens * 163789Sahrens * There can obviously be multiple errors within a single pool, so this routine 164789Sahrens * only picks the most damaging of all the current errors to report. 165789Sahrens */ 166789Sahrens static zpool_status_t 1676523Sek110237 check_status(zpool_handle_t *zhp, nvlist_t *config, boolean_t isimport) 168789Sahrens { 169789Sahrens nvlist_t *nvroot; 170789Sahrens vdev_stat_t *vs; 171789Sahrens uint_t vsc; 1721544Seschrock uint64_t nerr; 1731760Seschrock uint64_t version; 1743975Sek110237 uint64_t stateval; 1753975Sek110237 uint64_t hostid = 0; 176789Sahrens 1771760Seschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 1781760Seschrock &version) == 0); 179789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 180789Sahrens &nvroot) == 0); 181789Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 182789Sahrens (uint64_t **)&vs, &vsc) == 0); 1833975Sek110237 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1843975Sek110237 &stateval) == 0); 1853975Sek110237 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 1863975Sek110237 1873975Sek110237 /* 1883975Sek110237 * Pool last accessed by another system. 1893975Sek110237 */ 1903975Sek110237 if (hostid != 0 && (unsigned long)hostid != gethostid() && 1913975Sek110237 stateval == POOL_STATE_ACTIVE) 1923975Sek110237 return (ZPOOL_STATUS_HOSTID_MISMATCH); 193789Sahrens 194789Sahrens /* 1951760Seschrock * Newer on-disk version. 1961760Seschrock */ 1971760Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 1981760Seschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 1991760Seschrock return (ZPOOL_STATUS_VERSION_NEWER); 2001760Seschrock 2011760Seschrock /* 202789Sahrens * Check that the config is complete. 203789Sahrens */ 204789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2051544Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 206789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 2071544Seschrock 2081544Seschrock /* 2096523Sek110237 * Pool has experienced failed I/O. 2106523Sek110237 */ 2116523Sek110237 if (stateval == POOL_STATE_IO_FAILURE) { 2126523Sek110237 zpool_handle_t *tmp_zhp = NULL; 2136523Sek110237 libzfs_handle_t *hdl = NULL; 2146523Sek110237 char property[ZPOOL_MAXPROPLEN]; 2156523Sek110237 char *failmode = NULL; 2166523Sek110237 2176523Sek110237 if (zhp == NULL) { 2186523Sek110237 char *poolname; 2196523Sek110237 2206523Sek110237 verify(nvlist_lookup_string(config, 2216523Sek110237 ZPOOL_CONFIG_POOL_NAME, &poolname) == 0); 2226523Sek110237 if ((hdl = libzfs_init()) == NULL) 2236523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 2246523Sek110237 tmp_zhp = zpool_open_canfail(hdl, poolname); 2256523Sek110237 if (tmp_zhp == NULL) { 2266523Sek110237 libzfs_fini(hdl); 2276523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 2286523Sek110237 } 2296523Sek110237 } 2306523Sek110237 if (zpool_get_prop(zhp ? zhp : tmp_zhp, ZPOOL_PROP_FAILUREMODE, 2316523Sek110237 property, sizeof (property), NULL) == 0) 2326523Sek110237 failmode = property; 2336523Sek110237 if (tmp_zhp != NULL) 2346523Sek110237 zpool_close(tmp_zhp); 2356523Sek110237 if (hdl != NULL) 2366523Sek110237 libzfs_fini(hdl); 2376523Sek110237 if (failmode == NULL) 2386523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 2396523Sek110237 2406523Sek110237 if (strncmp(failmode, "continue", strlen("continue")) == 0) 2416523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 2426523Sek110237 else 2436523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 2446523Sek110237 } 2456523Sek110237 2466523Sek110237 /* 247*7294Sperrin * Could not read a log. 248*7294Sperrin */ 249*7294Sperrin if (vs->vs_state == VDEV_STATE_CANT_OPEN && 250*7294Sperrin vs->vs_aux == VDEV_AUX_BAD_LOG) { 251*7294Sperrin return (ZPOOL_STATUS_BAD_LOG); 252*7294Sperrin } 253*7294Sperrin 254*7294Sperrin /* 2554451Seschrock * Bad devices in non-replicated config. 2561544Seschrock */ 2571544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2584451Seschrock find_vdev_problem(nvroot, vdev_faulted)) 2594451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_NR); 2604451Seschrock 2614451Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2621544Seschrock find_vdev_problem(nvroot, vdev_missing)) 2631544Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 2641544Seschrock 2651544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2661544Seschrock find_vdev_problem(nvroot, vdev_broken)) 2671544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 2681544Seschrock 2691544Seschrock /* 2701544Seschrock * Corrupted pool metadata 2711544Seschrock */ 2721544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2731544Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 2741544Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 2751544Seschrock 2761544Seschrock /* 2771544Seschrock * Persistent data errors. 2781544Seschrock */ 2791544Seschrock if (!isimport) { 2801544Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 2811544Seschrock &nerr) == 0 && nerr != 0) 2821544Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 283789Sahrens } 284789Sahrens 285789Sahrens /* 2861544Seschrock * Missing devices in a replicated config. 287789Sahrens */ 2884451Seschrock if (find_vdev_problem(nvroot, vdev_faulted)) 2894451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_R); 2901544Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 2911544Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 2921544Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 2931544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 294789Sahrens 295789Sahrens /* 296789Sahrens * Devices with errors 297789Sahrens */ 298789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 299789Sahrens return (ZPOOL_STATUS_FAILING_DEV); 300789Sahrens 301789Sahrens /* 302789Sahrens * Offlined devices 303789Sahrens */ 304789Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 305789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 306789Sahrens 307789Sahrens /* 308789Sahrens * Currently resilvering 309789Sahrens */ 310789Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 311789Sahrens return (ZPOOL_STATUS_RESILVERING); 312789Sahrens 313789Sahrens /* 3141760Seschrock * Outdated, but usable, version 315789Sahrens */ 3164577Sahrens if (version < SPA_VERSION) 3171760Seschrock return (ZPOOL_STATUS_VERSION_OLDER); 318789Sahrens 319789Sahrens return (ZPOOL_STATUS_OK); 320789Sahrens } 321789Sahrens 322789Sahrens zpool_status_t 323789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 324789Sahrens { 3256523Sek110237 zpool_status_t ret = check_status(zhp, zhp->zpool_config, B_FALSE); 326789Sahrens 327789Sahrens if (ret >= NMSGID) 328789Sahrens *msgid = NULL; 329789Sahrens else 3304451Seschrock *msgid = zfs_msgid_table[ret]; 331789Sahrens 332789Sahrens return (ret); 333789Sahrens } 334789Sahrens 335789Sahrens zpool_status_t 336789Sahrens zpool_import_status(nvlist_t *config, char **msgid) 337789Sahrens { 3386523Sek110237 zpool_status_t ret = check_status(NULL, config, B_TRUE); 339789Sahrens 340789Sahrens if (ret >= NMSGID) 341789Sahrens *msgid = NULL; 342789Sahrens else 3433975Sek110237 *msgid = zfs_msgid_table[ret]; 344789Sahrens 345789Sahrens return (ret); 346789Sahrens } 347