1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 226523Sek110237 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens /* 27789Sahrens * This file contains the functions which analyze the status of a pool. This 28789Sahrens * include both the status of an active pool, as well as the status exported 29789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 30789Sahrens * the pool. This status is independent (to a certain degree) from the state of 314451Seschrock * the pool. A pool's state describes only whether or not it is capable of 32789Sahrens * providing the necessary fault tolerance for data. The status describes the 33789Sahrens * overall status of devices. A pool that is online can still have a device 34789Sahrens * that is experiencing errors. 35789Sahrens * 36789Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 37789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 38789Sahrens * is left up to the caller, depending on whether it is a live pool or an 39789Sahrens * import. 40789Sahrens */ 41789Sahrens 42789Sahrens #include <libzfs.h> 43789Sahrens #include <string.h> 443975Sek110237 #include <unistd.h> 45789Sahrens #include "libzfs_impl.h" 46789Sahrens 47789Sahrens /* 484451Seschrock * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 49789Sahrens * in libzfs.h. Note that there are some status results which go past the end 50789Sahrens * of this table, and hence have no associated message ID. 51789Sahrens */ 523975Sek110237 static char *zfs_msgid_table[] = { 53789Sahrens "ZFS-8000-14", 54789Sahrens "ZFS-8000-2Q", 55789Sahrens "ZFS-8000-3C", 56789Sahrens "ZFS-8000-4J", 57789Sahrens "ZFS-8000-5E", 58789Sahrens "ZFS-8000-6X", 59789Sahrens "ZFS-8000-72", 60789Sahrens "ZFS-8000-8A", 61789Sahrens "ZFS-8000-9P", 623975Sek110237 "ZFS-8000-A5", 636523Sek110237 "ZFS-8000-EY", 646523Sek110237 "ZFS-8000-HC", 657294Sperrin "ZFS-8000-JQ", 667294Sperrin "ZFS-8000-K4", 67789Sahrens }; 68789Sahrens 693975Sek110237 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 70789Sahrens 71789Sahrens /* ARGSUSED */ 72789Sahrens static int 73789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 74789Sahrens { 75789Sahrens return (state == VDEV_STATE_CANT_OPEN && 76789Sahrens aux == VDEV_AUX_OPEN_FAILED); 77789Sahrens } 78789Sahrens 79789Sahrens /* ARGSUSED */ 80789Sahrens static int 814451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 824451Seschrock { 834451Seschrock return (state == VDEV_STATE_FAULTED); 844451Seschrock } 854451Seschrock 864451Seschrock /* ARGSUSED */ 874451Seschrock static int 88789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 89789Sahrens { 904451Seschrock return (state == VDEV_STATE_DEGRADED || errs != 0); 91789Sahrens } 92789Sahrens 93789Sahrens /* ARGSUSED */ 94789Sahrens static int 95789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 96789Sahrens { 97789Sahrens return (state == VDEV_STATE_CANT_OPEN); 98789Sahrens } 99789Sahrens 100789Sahrens /* ARGSUSED */ 101789Sahrens static int 102789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 103789Sahrens { 104789Sahrens return (state == VDEV_STATE_OFFLINE); 105789Sahrens } 106789Sahrens 107789Sahrens /* 108789Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 109789Sahrens */ 1102082Seschrock static boolean_t 111789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 112789Sahrens { 113789Sahrens nvlist_t **child; 114789Sahrens vdev_stat_t *vs; 115789Sahrens uint_t c, children; 116789Sahrens char *type; 117789Sahrens 118789Sahrens /* 119789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 120789Sahrens * the process of repairing any such errors, and don't want to call them 121789Sahrens * out again. We'll pick up the fact that a resilver is happening 122789Sahrens * later. 123789Sahrens */ 124789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 125789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 1262082Seschrock return (B_FALSE); 127789Sahrens 128789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 129789Sahrens &children) == 0) { 130789Sahrens for (c = 0; c < children; c++) 131789Sahrens if (find_vdev_problem(child[c], func)) 1322082Seschrock return (B_TRUE); 133789Sahrens } else { 134789Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 135789Sahrens (uint64_t **)&vs, &c) == 0); 136789Sahrens 137789Sahrens if (func(vs->vs_state, vs->vs_aux, 138789Sahrens vs->vs_read_errors + 139789Sahrens vs->vs_write_errors + 140789Sahrens vs->vs_checksum_errors)) 1412082Seschrock return (B_TRUE); 142789Sahrens } 143789Sahrens 1442082Seschrock return (B_FALSE); 145789Sahrens } 146789Sahrens 147789Sahrens /* 148789Sahrens * Active pool health status. 149789Sahrens * 150789Sahrens * To determine the status for a pool, we make several passes over the config, 151789Sahrens * picking the most egregious error we find. In order of importance, we do the 152789Sahrens * following: 153789Sahrens * 154789Sahrens * - Check for a complete and valid configuration 1554451Seschrock * - Look for any faulted or missing devices in a non-replicated config 1561544Seschrock * - Check for any data errors 1574451Seschrock * - Check for any faulted or missing devices in a replicated config 158789Sahrens * - Look for any devices showing errors 159789Sahrens * - Check for any resilvering devices 160789Sahrens * 161789Sahrens * There can obviously be multiple errors within a single pool, so this routine 162789Sahrens * only picks the most damaging of all the current errors to report. 163789Sahrens */ 164789Sahrens static zpool_status_t 165*7754SJeff.Bonwick@Sun.COM check_status(nvlist_t *config, boolean_t isimport) 166789Sahrens { 167789Sahrens nvlist_t *nvroot; 168789Sahrens vdev_stat_t *vs; 169789Sahrens uint_t vsc; 1701544Seschrock uint64_t nerr; 1711760Seschrock uint64_t version; 1723975Sek110237 uint64_t stateval; 173*7754SJeff.Bonwick@Sun.COM uint64_t suspended; 1743975Sek110237 uint64_t hostid = 0; 175789Sahrens 1761760Seschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 1771760Seschrock &version) == 0); 178789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 179789Sahrens &nvroot) == 0); 180789Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 181789Sahrens (uint64_t **)&vs, &vsc) == 0); 1823975Sek110237 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1833975Sek110237 &stateval) == 0); 1843975Sek110237 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 1853975Sek110237 1863975Sek110237 /* 1873975Sek110237 * Pool last accessed by another system. 1883975Sek110237 */ 1893975Sek110237 if (hostid != 0 && (unsigned long)hostid != gethostid() && 1903975Sek110237 stateval == POOL_STATE_ACTIVE) 1913975Sek110237 return (ZPOOL_STATUS_HOSTID_MISMATCH); 192789Sahrens 193789Sahrens /* 1941760Seschrock * Newer on-disk version. 1951760Seschrock */ 1961760Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 1971760Seschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 1981760Seschrock return (ZPOOL_STATUS_VERSION_NEWER); 1991760Seschrock 2001760Seschrock /* 201789Sahrens * Check that the config is complete. 202789Sahrens */ 203789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2041544Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 205789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 2061544Seschrock 2071544Seschrock /* 208*7754SJeff.Bonwick@Sun.COM * Check whether the pool has suspended due to failed I/O. 2096523Sek110237 */ 210*7754SJeff.Bonwick@Sun.COM if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, 211*7754SJeff.Bonwick@Sun.COM &suspended) == 0) { 212*7754SJeff.Bonwick@Sun.COM if (suspended == ZIO_FAILURE_MODE_CONTINUE) 2136523Sek110237 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 214*7754SJeff.Bonwick@Sun.COM return (ZPOOL_STATUS_IO_FAILURE_WAIT); 2156523Sek110237 } 2166523Sek110237 2176523Sek110237 /* 2187294Sperrin * Could not read a log. 2197294Sperrin */ 2207294Sperrin if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2217294Sperrin vs->vs_aux == VDEV_AUX_BAD_LOG) { 2227294Sperrin return (ZPOOL_STATUS_BAD_LOG); 2237294Sperrin } 2247294Sperrin 2257294Sperrin /* 2264451Seschrock * Bad devices in non-replicated config. 2271544Seschrock */ 2281544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2294451Seschrock find_vdev_problem(nvroot, vdev_faulted)) 2304451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_NR); 2314451Seschrock 2324451Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2331544Seschrock find_vdev_problem(nvroot, vdev_missing)) 2341544Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 2351544Seschrock 2361544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2371544Seschrock find_vdev_problem(nvroot, vdev_broken)) 2381544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 2391544Seschrock 2401544Seschrock /* 2411544Seschrock * Corrupted pool metadata 2421544Seschrock */ 2431544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2441544Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 2451544Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 2461544Seschrock 2471544Seschrock /* 2481544Seschrock * Persistent data errors. 2491544Seschrock */ 2501544Seschrock if (!isimport) { 2511544Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 2521544Seschrock &nerr) == 0 && nerr != 0) 2531544Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 254789Sahrens } 255789Sahrens 256789Sahrens /* 2571544Seschrock * Missing devices in a replicated config. 258789Sahrens */ 2594451Seschrock if (find_vdev_problem(nvroot, vdev_faulted)) 2604451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_R); 2611544Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 2621544Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 2631544Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 2641544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 265789Sahrens 266789Sahrens /* 267789Sahrens * Devices with errors 268789Sahrens */ 269789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 270789Sahrens return (ZPOOL_STATUS_FAILING_DEV); 271789Sahrens 272789Sahrens /* 273789Sahrens * Offlined devices 274789Sahrens */ 275789Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 276789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 277789Sahrens 278789Sahrens /* 279789Sahrens * Currently resilvering 280789Sahrens */ 281789Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 282789Sahrens return (ZPOOL_STATUS_RESILVERING); 283789Sahrens 284789Sahrens /* 2851760Seschrock * Outdated, but usable, version 286789Sahrens */ 2874577Sahrens if (version < SPA_VERSION) 2881760Seschrock return (ZPOOL_STATUS_VERSION_OLDER); 289789Sahrens 290789Sahrens return (ZPOOL_STATUS_OK); 291789Sahrens } 292789Sahrens 293789Sahrens zpool_status_t 294789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 295789Sahrens { 296*7754SJeff.Bonwick@Sun.COM zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 297789Sahrens 298789Sahrens if (ret >= NMSGID) 299789Sahrens *msgid = NULL; 300789Sahrens else 3014451Seschrock *msgid = zfs_msgid_table[ret]; 302789Sahrens 303789Sahrens return (ret); 304789Sahrens } 305789Sahrens 306789Sahrens zpool_status_t 307789Sahrens zpool_import_status(nvlist_t *config, char **msgid) 308789Sahrens { 309*7754SJeff.Bonwick@Sun.COM zpool_status_t ret = check_status(config, B_TRUE); 310789Sahrens 311789Sahrens if (ret >= NMSGID) 312789Sahrens *msgid = NULL; 313789Sahrens else 3143975Sek110237 *msgid = zfs_msgid_table[ret]; 315789Sahrens 316789Sahrens return (ret); 317789Sahrens } 318