1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 22*3975Sek110237 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens /* 29789Sahrens * This file contains the functions which analyze the status of a pool. This 30789Sahrens * include both the status of an active pool, as well as the status exported 31789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32789Sahrens * the pool. This status is independent (to a certain degree) from the state of 33789Sahrens * the pool. A pool's state descsribes only whether or not it is capable of 34789Sahrens * providing the necessary fault tolerance for data. The status describes the 35789Sahrens * overall status of devices. A pool that is online can still have a device 36789Sahrens * that is experiencing errors. 37789Sahrens * 38789Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 39789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 40789Sahrens * is left up to the caller, depending on whether it is a live pool or an 41789Sahrens * import. 42789Sahrens */ 43789Sahrens 44789Sahrens #include <libzfs.h> 45789Sahrens #include <string.h> 46*3975Sek110237 #include <unistd.h> 47789Sahrens #include "libzfs_impl.h" 48789Sahrens 49789Sahrens /* 50789Sahrens * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 51789Sahrens * in libzfs.h. Note that there are some status results which go past the end 52789Sahrens * of this table, and hence have no associated message ID. 53789Sahrens */ 54*3975Sek110237 static char *zfs_msgid_table[] = { 55789Sahrens "ZFS-8000-14", 56789Sahrens "ZFS-8000-2Q", 57789Sahrens "ZFS-8000-3C", 58789Sahrens "ZFS-8000-4J", 59789Sahrens "ZFS-8000-5E", 60789Sahrens "ZFS-8000-6X", 61789Sahrens "ZFS-8000-72", 62789Sahrens "ZFS-8000-8A", 63789Sahrens "ZFS-8000-9P", 64*3975Sek110237 "ZFS-8000-A5", 65*3975Sek110237 "ZFS-8000-EY" 66789Sahrens }; 67789Sahrens 681544Seschrock /* 691544Seschrock * If the pool is active, a certain class of static errors is overridden by the 701544Seschrock * faults as analayzed by FMA. These faults have separate knowledge articles, 711544Seschrock * and the article referred to by 'zpool status' must match that indicated by 721544Seschrock * the syslog error message. We override missing data as well as corrupt pool. 731544Seschrock */ 74*3975Sek110237 static char *zfs_msgid_table_active[] = { 751544Seschrock "ZFS-8000-14", 761544Seschrock "ZFS-8000-D3", /* overridden */ 771544Seschrock "ZFS-8000-D3", /* overridden */ 781544Seschrock "ZFS-8000-4J", 791544Seschrock "ZFS-8000-5E", 801544Seschrock "ZFS-8000-6X", 811544Seschrock "ZFS-8000-CS", /* overridden */ 821544Seschrock "ZFS-8000-8A", 831544Seschrock "ZFS-8000-9P", 841544Seschrock "ZFS-8000-CS", /* overridden */ 851544Seschrock }; 861544Seschrock 87*3975Sek110237 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 88789Sahrens 89789Sahrens /* ARGSUSED */ 90789Sahrens static int 91789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 92789Sahrens { 93789Sahrens return (state == VDEV_STATE_CANT_OPEN && 94789Sahrens aux == VDEV_AUX_OPEN_FAILED); 95789Sahrens } 96789Sahrens 97789Sahrens /* ARGSUSED */ 98789Sahrens static int 99789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 100789Sahrens { 101789Sahrens return (errs != 0); 102789Sahrens } 103789Sahrens 104789Sahrens /* ARGSUSED */ 105789Sahrens static int 106789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 107789Sahrens { 108789Sahrens return (state == VDEV_STATE_CANT_OPEN); 109789Sahrens } 110789Sahrens 111789Sahrens /* ARGSUSED */ 112789Sahrens static int 113789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 114789Sahrens { 115789Sahrens return (state == VDEV_STATE_OFFLINE); 116789Sahrens } 117789Sahrens 118789Sahrens /* 119789Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 120789Sahrens */ 1212082Seschrock static boolean_t 122789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 123789Sahrens { 124789Sahrens nvlist_t **child; 125789Sahrens vdev_stat_t *vs; 126789Sahrens uint_t c, children; 127789Sahrens char *type; 128789Sahrens 129789Sahrens /* 130789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 131789Sahrens * the process of repairing any such errors, and don't want to call them 132789Sahrens * out again. We'll pick up the fact that a resilver is happening 133789Sahrens * later. 134789Sahrens */ 135789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 136789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 1372082Seschrock return (B_FALSE); 138789Sahrens 139789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 140789Sahrens &children) == 0) { 141789Sahrens for (c = 0; c < children; c++) 142789Sahrens if (find_vdev_problem(child[c], func)) 1432082Seschrock return (B_TRUE); 144789Sahrens } else { 145789Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 146789Sahrens (uint64_t **)&vs, &c) == 0); 147789Sahrens 148789Sahrens if (func(vs->vs_state, vs->vs_aux, 149789Sahrens vs->vs_read_errors + 150789Sahrens vs->vs_write_errors + 151789Sahrens vs->vs_checksum_errors)) 1522082Seschrock return (B_TRUE); 153789Sahrens } 154789Sahrens 1552082Seschrock return (B_FALSE); 156789Sahrens } 157789Sahrens 158789Sahrens /* 159789Sahrens * Active pool health status. 160789Sahrens * 161789Sahrens * To determine the status for a pool, we make several passes over the config, 162789Sahrens * picking the most egregious error we find. In order of importance, we do the 163789Sahrens * following: 164789Sahrens * 165789Sahrens * - Check for a complete and valid configuration 1661544Seschrock * - Look for any missing devices in a non-replicated config 1671544Seschrock * - Check for any data errors 1681544Seschrock * - Check for any missing devices in a replicated config 169789Sahrens * - Look for any devices showing errors 170789Sahrens * - Check for any resilvering devices 171789Sahrens * 172789Sahrens * There can obviously be multiple errors within a single pool, so this routine 173789Sahrens * only picks the most damaging of all the current errors to report. 174789Sahrens */ 175789Sahrens static zpool_status_t 1762082Seschrock check_status(nvlist_t *config, boolean_t isimport) 177789Sahrens { 178789Sahrens nvlist_t *nvroot; 179789Sahrens vdev_stat_t *vs; 180789Sahrens uint_t vsc; 1811544Seschrock uint64_t nerr; 1821760Seschrock uint64_t version; 183*3975Sek110237 uint64_t stateval; 184*3975Sek110237 uint64_t hostid = 0; 185789Sahrens 1861760Seschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 1871760Seschrock &version) == 0); 188789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 189789Sahrens &nvroot) == 0); 190789Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 191789Sahrens (uint64_t **)&vs, &vsc) == 0); 192*3975Sek110237 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 193*3975Sek110237 &stateval) == 0); 194*3975Sek110237 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 195*3975Sek110237 196*3975Sek110237 /* 197*3975Sek110237 * Pool last accessed by another system. 198*3975Sek110237 */ 199*3975Sek110237 if (hostid != 0 && (unsigned long)hostid != gethostid() && 200*3975Sek110237 stateval == POOL_STATE_ACTIVE) 201*3975Sek110237 return (ZPOOL_STATUS_HOSTID_MISMATCH); 202789Sahrens 203789Sahrens /* 2041760Seschrock * Newer on-disk version. 2051760Seschrock */ 2061760Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2071760Seschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 2081760Seschrock return (ZPOOL_STATUS_VERSION_NEWER); 2091760Seschrock 2101760Seschrock /* 211789Sahrens * Check that the config is complete. 212789Sahrens */ 213789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2141544Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 215789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 2161544Seschrock 2171544Seschrock /* 2181544Seschrock * Missing devices in non-replicated config. 2191544Seschrock */ 2201544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2211544Seschrock find_vdev_problem(nvroot, vdev_missing)) 2221544Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 2231544Seschrock 2241544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2251544Seschrock find_vdev_problem(nvroot, vdev_broken)) 2261544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 2271544Seschrock 2281544Seschrock /* 2291544Seschrock * Corrupted pool metadata 2301544Seschrock */ 2311544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2321544Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 2331544Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 2341544Seschrock 2351544Seschrock /* 2361544Seschrock * Persistent data errors. 2371544Seschrock */ 2381544Seschrock if (!isimport) { 2391544Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 2401544Seschrock &nerr) == 0 && nerr != 0) 2411544Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 242789Sahrens } 243789Sahrens 244789Sahrens /* 2451544Seschrock * Missing devices in a replicated config. 246789Sahrens */ 2471544Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 2481544Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 2491544Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 2501544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 251789Sahrens 252789Sahrens /* 253789Sahrens * Devices with errors 254789Sahrens */ 255789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 256789Sahrens return (ZPOOL_STATUS_FAILING_DEV); 257789Sahrens 258789Sahrens /* 259789Sahrens * Offlined devices 260789Sahrens */ 261789Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 262789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 263789Sahrens 264789Sahrens /* 265789Sahrens * Currently resilvering 266789Sahrens */ 267789Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 268789Sahrens return (ZPOOL_STATUS_RESILVERING); 269789Sahrens 270789Sahrens /* 2711760Seschrock * Outdated, but usable, version 272789Sahrens */ 2731760Seschrock if (version < ZFS_VERSION) 2741760Seschrock return (ZPOOL_STATUS_VERSION_OLDER); 275789Sahrens 276789Sahrens return (ZPOOL_STATUS_OK); 277789Sahrens } 278789Sahrens 279789Sahrens zpool_status_t 280789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 281789Sahrens { 2822082Seschrock zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 283789Sahrens 284789Sahrens if (ret >= NMSGID) 285789Sahrens *msgid = NULL; 286789Sahrens else 287*3975Sek110237 *msgid = zfs_msgid_table_active[ret]; 288789Sahrens 289789Sahrens return (ret); 290789Sahrens } 291789Sahrens 292789Sahrens zpool_status_t 293789Sahrens zpool_import_status(nvlist_t *config, char **msgid) 294789Sahrens { 2952082Seschrock zpool_status_t ret = check_status(config, B_TRUE); 296789Sahrens 297789Sahrens if (ret >= NMSGID) 298789Sahrens *msgid = NULL; 299789Sahrens else 300*3975Sek110237 *msgid = zfs_msgid_table[ret]; 301789Sahrens 302789Sahrens return (ret); 303789Sahrens } 304