1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 223975Sek110237 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens /* 29789Sahrens * This file contains the functions which analyze the status of a pool. This 30789Sahrens * include both the status of an active pool, as well as the status exported 31789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32789Sahrens * the pool. This status is independent (to a certain degree) from the state of 33*4451Seschrock * the pool. A pool's state describes only whether or not it is capable of 34789Sahrens * providing the necessary fault tolerance for data. The status describes the 35789Sahrens * overall status of devices. A pool that is online can still have a device 36789Sahrens * that is experiencing errors. 37789Sahrens * 38789Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 39789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 40789Sahrens * is left up to the caller, depending on whether it is a live pool or an 41789Sahrens * import. 42789Sahrens */ 43789Sahrens 44789Sahrens #include <libzfs.h> 45789Sahrens #include <string.h> 463975Sek110237 #include <unistd.h> 47789Sahrens #include "libzfs_impl.h" 48789Sahrens 49789Sahrens /* 50*4451Seschrock * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 51789Sahrens * in libzfs.h. Note that there are some status results which go past the end 52789Sahrens * of this table, and hence have no associated message ID. 53789Sahrens */ 543975Sek110237 static char *zfs_msgid_table[] = { 55789Sahrens "ZFS-8000-14", 56789Sahrens "ZFS-8000-2Q", 57789Sahrens "ZFS-8000-3C", 58789Sahrens "ZFS-8000-4J", 59789Sahrens "ZFS-8000-5E", 60789Sahrens "ZFS-8000-6X", 61789Sahrens "ZFS-8000-72", 62789Sahrens "ZFS-8000-8A", 63789Sahrens "ZFS-8000-9P", 643975Sek110237 "ZFS-8000-A5", 653975Sek110237 "ZFS-8000-EY" 66789Sahrens }; 67789Sahrens 683975Sek110237 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 69789Sahrens 70789Sahrens /* ARGSUSED */ 71789Sahrens static int 72789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 73789Sahrens { 74789Sahrens return (state == VDEV_STATE_CANT_OPEN && 75789Sahrens aux == VDEV_AUX_OPEN_FAILED); 76789Sahrens } 77789Sahrens 78789Sahrens /* ARGSUSED */ 79789Sahrens static int 80*4451Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 81*4451Seschrock { 82*4451Seschrock return (state == VDEV_STATE_FAULTED); 83*4451Seschrock } 84*4451Seschrock 85*4451Seschrock /* ARGSUSED */ 86*4451Seschrock static int 87789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 88789Sahrens { 89*4451Seschrock return (state == VDEV_STATE_DEGRADED || errs != 0); 90789Sahrens } 91789Sahrens 92789Sahrens /* ARGSUSED */ 93789Sahrens static int 94789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 95789Sahrens { 96789Sahrens return (state == VDEV_STATE_CANT_OPEN); 97789Sahrens } 98789Sahrens 99789Sahrens /* ARGSUSED */ 100789Sahrens static int 101789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 102789Sahrens { 103789Sahrens return (state == VDEV_STATE_OFFLINE); 104789Sahrens } 105789Sahrens 106789Sahrens /* 107789Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 108789Sahrens */ 1092082Seschrock static boolean_t 110789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 111789Sahrens { 112789Sahrens nvlist_t **child; 113789Sahrens vdev_stat_t *vs; 114789Sahrens uint_t c, children; 115789Sahrens char *type; 116789Sahrens 117789Sahrens /* 118789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 119789Sahrens * the process of repairing any such errors, and don't want to call them 120789Sahrens * out again. We'll pick up the fact that a resilver is happening 121789Sahrens * later. 122789Sahrens */ 123789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 124789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 1252082Seschrock return (B_FALSE); 126789Sahrens 127789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 128789Sahrens &children) == 0) { 129789Sahrens for (c = 0; c < children; c++) 130789Sahrens if (find_vdev_problem(child[c], func)) 1312082Seschrock return (B_TRUE); 132789Sahrens } else { 133789Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 134789Sahrens (uint64_t **)&vs, &c) == 0); 135789Sahrens 136789Sahrens if (func(vs->vs_state, vs->vs_aux, 137789Sahrens vs->vs_read_errors + 138789Sahrens vs->vs_write_errors + 139789Sahrens vs->vs_checksum_errors)) 1402082Seschrock return (B_TRUE); 141789Sahrens } 142789Sahrens 1432082Seschrock return (B_FALSE); 144789Sahrens } 145789Sahrens 146789Sahrens /* 147789Sahrens * Active pool health status. 148789Sahrens * 149789Sahrens * To determine the status for a pool, we make several passes over the config, 150789Sahrens * picking the most egregious error we find. In order of importance, we do the 151789Sahrens * following: 152789Sahrens * 153789Sahrens * - Check for a complete and valid configuration 154*4451Seschrock * - Look for any faulted or missing devices in a non-replicated config 1551544Seschrock * - Check for any data errors 156*4451Seschrock * - Check for any faulted or missing devices in a replicated config 157789Sahrens * - Look for any devices showing errors 158789Sahrens * - Check for any resilvering devices 159789Sahrens * 160789Sahrens * There can obviously be multiple errors within a single pool, so this routine 161789Sahrens * only picks the most damaging of all the current errors to report. 162789Sahrens */ 163789Sahrens static zpool_status_t 1642082Seschrock check_status(nvlist_t *config, boolean_t isimport) 165789Sahrens { 166789Sahrens nvlist_t *nvroot; 167789Sahrens vdev_stat_t *vs; 168789Sahrens uint_t vsc; 1691544Seschrock uint64_t nerr; 1701760Seschrock uint64_t version; 1713975Sek110237 uint64_t stateval; 1723975Sek110237 uint64_t hostid = 0; 173789Sahrens 1741760Seschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 1751760Seschrock &version) == 0); 176789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 177789Sahrens &nvroot) == 0); 178789Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 179789Sahrens (uint64_t **)&vs, &vsc) == 0); 1803975Sek110237 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1813975Sek110237 &stateval) == 0); 1823975Sek110237 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 1833975Sek110237 1843975Sek110237 /* 1853975Sek110237 * Pool last accessed by another system. 1863975Sek110237 */ 1873975Sek110237 if (hostid != 0 && (unsigned long)hostid != gethostid() && 1883975Sek110237 stateval == POOL_STATE_ACTIVE) 1893975Sek110237 return (ZPOOL_STATUS_HOSTID_MISMATCH); 190789Sahrens 191789Sahrens /* 1921760Seschrock * Newer on-disk version. 1931760Seschrock */ 1941760Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 1951760Seschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 1961760Seschrock return (ZPOOL_STATUS_VERSION_NEWER); 1971760Seschrock 1981760Seschrock /* 199789Sahrens * Check that the config is complete. 200789Sahrens */ 201789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2021544Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 203789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 2041544Seschrock 2051544Seschrock /* 206*4451Seschrock * Bad devices in non-replicated config. 2071544Seschrock */ 2081544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 209*4451Seschrock find_vdev_problem(nvroot, vdev_faulted)) 210*4451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_NR); 211*4451Seschrock 212*4451Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2131544Seschrock find_vdev_problem(nvroot, vdev_missing)) 2141544Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 2151544Seschrock 2161544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2171544Seschrock find_vdev_problem(nvroot, vdev_broken)) 2181544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 2191544Seschrock 2201544Seschrock /* 2211544Seschrock * Corrupted pool metadata 2221544Seschrock */ 2231544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2241544Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 2251544Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 2261544Seschrock 2271544Seschrock /* 2281544Seschrock * Persistent data errors. 2291544Seschrock */ 2301544Seschrock if (!isimport) { 2311544Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 2321544Seschrock &nerr) == 0 && nerr != 0) 2331544Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 234789Sahrens } 235789Sahrens 236789Sahrens /* 2371544Seschrock * Missing devices in a replicated config. 238789Sahrens */ 239*4451Seschrock if (find_vdev_problem(nvroot, vdev_faulted)) 240*4451Seschrock return (ZPOOL_STATUS_FAULTED_DEV_R); 2411544Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 2421544Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 2431544Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 2441544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 245789Sahrens 246789Sahrens /* 247789Sahrens * Devices with errors 248789Sahrens */ 249789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 250789Sahrens return (ZPOOL_STATUS_FAILING_DEV); 251789Sahrens 252789Sahrens /* 253789Sahrens * Offlined devices 254789Sahrens */ 255789Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 256789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 257789Sahrens 258789Sahrens /* 259789Sahrens * Currently resilvering 260789Sahrens */ 261789Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 262789Sahrens return (ZPOOL_STATUS_RESILVERING); 263789Sahrens 264789Sahrens /* 2651760Seschrock * Outdated, but usable, version 266789Sahrens */ 2671760Seschrock if (version < ZFS_VERSION) 2681760Seschrock return (ZPOOL_STATUS_VERSION_OLDER); 269789Sahrens 270789Sahrens return (ZPOOL_STATUS_OK); 271789Sahrens } 272789Sahrens 273789Sahrens zpool_status_t 274789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 275789Sahrens { 2762082Seschrock zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 277789Sahrens 278789Sahrens if (ret >= NMSGID) 279789Sahrens *msgid = NULL; 280789Sahrens else 281*4451Seschrock *msgid = zfs_msgid_table[ret]; 282789Sahrens 283789Sahrens return (ret); 284789Sahrens } 285789Sahrens 286789Sahrens zpool_status_t 287789Sahrens zpool_import_status(nvlist_t *config, char **msgid) 288789Sahrens { 2892082Seschrock zpool_status_t ret = check_status(config, B_TRUE); 290789Sahrens 291789Sahrens if (ret >= NMSGID) 292789Sahrens *msgid = NULL; 293789Sahrens else 2943975Sek110237 *msgid = zfs_msgid_table[ret]; 295789Sahrens 296789Sahrens return (ret); 297789Sahrens } 298