1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221544Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens /* 29789Sahrens * This file contains the functions which analyze the status of a pool. This 30789Sahrens * include both the status of an active pool, as well as the status exported 31789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32789Sahrens * the pool. This status is independent (to a certain degree) from the state of 33789Sahrens * the pool. A pool's state descsribes only whether or not it is capable of 34789Sahrens * providing the necessary fault tolerance for data. The status describes the 35789Sahrens * overall status of devices. A pool that is online can still have a device 36789Sahrens * that is experiencing errors. 37789Sahrens * 38789Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 39789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 40789Sahrens * is left up to the caller, depending on whether it is a live pool or an 41789Sahrens * import. 42789Sahrens */ 43789Sahrens 44789Sahrens #include <libzfs.h> 45789Sahrens #include <string.h> 46789Sahrens #include "libzfs_impl.h" 47789Sahrens 48789Sahrens /* 49789Sahrens * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 50789Sahrens * in libzfs.h. Note that there are some status results which go past the end 51789Sahrens * of this table, and hence have no associated message ID. 52789Sahrens */ 53789Sahrens static char *msgid_table[] = { 54789Sahrens "ZFS-8000-14", 55789Sahrens "ZFS-8000-2Q", 56789Sahrens "ZFS-8000-3C", 57789Sahrens "ZFS-8000-4J", 58789Sahrens "ZFS-8000-5E", 59789Sahrens "ZFS-8000-6X", 60789Sahrens "ZFS-8000-72", 61789Sahrens "ZFS-8000-8A", 62789Sahrens "ZFS-8000-9P", 63789Sahrens "ZFS-8000-A5" 64789Sahrens }; 65789Sahrens 661544Seschrock /* 671544Seschrock * If the pool is active, a certain class of static errors is overridden by the 681544Seschrock * faults as analayzed by FMA. These faults have separate knowledge articles, 691544Seschrock * and the article referred to by 'zpool status' must match that indicated by 701544Seschrock * the syslog error message. We override missing data as well as corrupt pool. 711544Seschrock */ 721544Seschrock static char *msgid_table_active[] = { 731544Seschrock "ZFS-8000-14", 741544Seschrock "ZFS-8000-D3", /* overridden */ 751544Seschrock "ZFS-8000-D3", /* overridden */ 761544Seschrock "ZFS-8000-4J", 771544Seschrock "ZFS-8000-5E", 781544Seschrock "ZFS-8000-6X", 791544Seschrock "ZFS-8000-CS", /* overridden */ 801544Seschrock "ZFS-8000-8A", 811544Seschrock "ZFS-8000-9P", 821544Seschrock "ZFS-8000-CS", /* overridden */ 831544Seschrock }; 841544Seschrock 85789Sahrens #define NMSGID (sizeof (msgid_table) / sizeof (msgid_table[0])) 86789Sahrens 87789Sahrens /* ARGSUSED */ 88789Sahrens static int 89789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 90789Sahrens { 91789Sahrens return (state == VDEV_STATE_CANT_OPEN && 92789Sahrens aux == VDEV_AUX_OPEN_FAILED); 93789Sahrens } 94789Sahrens 95789Sahrens /* ARGSUSED */ 96789Sahrens static int 97789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 98789Sahrens { 99789Sahrens return (errs != 0); 100789Sahrens } 101789Sahrens 102789Sahrens /* ARGSUSED */ 103789Sahrens static int 104789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 105789Sahrens { 106789Sahrens return (state == VDEV_STATE_CANT_OPEN); 107789Sahrens } 108789Sahrens 109789Sahrens /* ARGSUSED */ 110789Sahrens static int 111789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 112789Sahrens { 113789Sahrens return (state == VDEV_STATE_OFFLINE); 114789Sahrens } 115789Sahrens 116789Sahrens /* 117789Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 118789Sahrens */ 119789Sahrens static int 120789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 121789Sahrens { 122789Sahrens nvlist_t **child; 123789Sahrens vdev_stat_t *vs; 124789Sahrens uint_t c, children; 125789Sahrens char *type; 126789Sahrens 127789Sahrens /* 128789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 129789Sahrens * the process of repairing any such errors, and don't want to call them 130789Sahrens * out again. We'll pick up the fact that a resilver is happening 131789Sahrens * later. 132789Sahrens */ 133789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 134789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 135789Sahrens return (FALSE); 136789Sahrens 137789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 138789Sahrens &children) == 0) { 139789Sahrens for (c = 0; c < children; c++) 140789Sahrens if (find_vdev_problem(child[c], func)) 141789Sahrens return (TRUE); 142789Sahrens } else { 143789Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 144789Sahrens (uint64_t **)&vs, &c) == 0); 145789Sahrens 146789Sahrens if (func(vs->vs_state, vs->vs_aux, 147789Sahrens vs->vs_read_errors + 148789Sahrens vs->vs_write_errors + 149789Sahrens vs->vs_checksum_errors)) 150789Sahrens return (TRUE); 151789Sahrens } 152789Sahrens 153789Sahrens return (FALSE); 154789Sahrens } 155789Sahrens 156789Sahrens /* 157789Sahrens * Active pool health status. 158789Sahrens * 159789Sahrens * To determine the status for a pool, we make several passes over the config, 160789Sahrens * picking the most egregious error we find. In order of importance, we do the 161789Sahrens * following: 162789Sahrens * 163789Sahrens * - Check for a complete and valid configuration 1641544Seschrock * - Look for any missing devices in a non-replicated config 1651544Seschrock * - Check for any data errors 1661544Seschrock * - Check for any missing devices in a replicated config 167789Sahrens * - Look for any devices showing errors 168789Sahrens * - Check for any resilvering devices 169789Sahrens * 170789Sahrens * There can obviously be multiple errors within a single pool, so this routine 171789Sahrens * only picks the most damaging of all the current errors to report. 172789Sahrens */ 173789Sahrens static zpool_status_t 174789Sahrens check_status(nvlist_t *config, int isimport) 175789Sahrens { 176789Sahrens nvlist_t *nvroot; 177789Sahrens vdev_stat_t *vs; 178789Sahrens uint_t vsc; 1791544Seschrock uint64_t nerr; 180*1760Seschrock uint64_t version; 181789Sahrens 182*1760Seschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 183*1760Seschrock &version) == 0); 184789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 185789Sahrens &nvroot) == 0); 186789Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 187789Sahrens (uint64_t **)&vs, &vsc) == 0); 188789Sahrens 189789Sahrens /* 190*1760Seschrock * Newer on-disk version. 191*1760Seschrock */ 192*1760Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 193*1760Seschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 194*1760Seschrock return (ZPOOL_STATUS_VERSION_NEWER); 195*1760Seschrock 196*1760Seschrock /* 197789Sahrens * Check that the config is complete. 198789Sahrens */ 199789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2001544Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 201789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 2021544Seschrock 2031544Seschrock /* 2041544Seschrock * Missing devices in non-replicated config. 2051544Seschrock */ 2061544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2071544Seschrock find_vdev_problem(nvroot, vdev_missing)) 2081544Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 2091544Seschrock 2101544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2111544Seschrock find_vdev_problem(nvroot, vdev_broken)) 2121544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 2131544Seschrock 2141544Seschrock /* 2151544Seschrock * Corrupted pool metadata 2161544Seschrock */ 2171544Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2181544Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 2191544Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 2201544Seschrock 2211544Seschrock /* 2221544Seschrock * Persistent data errors. 2231544Seschrock */ 2241544Seschrock if (!isimport) { 2251544Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 2261544Seschrock &nerr) == 0 && nerr != 0) 2271544Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 228789Sahrens } 229789Sahrens 230789Sahrens /* 2311544Seschrock * Missing devices in a replicated config. 232789Sahrens */ 2331544Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 2341544Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 2351544Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 2361544Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 237789Sahrens 238789Sahrens /* 239789Sahrens * Devices with errors 240789Sahrens */ 241789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 242789Sahrens return (ZPOOL_STATUS_FAILING_DEV); 243789Sahrens 244789Sahrens /* 245789Sahrens * Offlined devices 246789Sahrens */ 247789Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 248789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 249789Sahrens 250789Sahrens /* 251789Sahrens * Currently resilvering 252789Sahrens */ 253789Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 254789Sahrens return (ZPOOL_STATUS_RESILVERING); 255789Sahrens 256789Sahrens /* 257*1760Seschrock * Outdated, but usable, version 258789Sahrens */ 259*1760Seschrock if (version < ZFS_VERSION) 260*1760Seschrock return (ZPOOL_STATUS_VERSION_OLDER); 261789Sahrens 262789Sahrens return (ZPOOL_STATUS_OK); 263789Sahrens } 264789Sahrens 265789Sahrens zpool_status_t 266789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 267789Sahrens { 268789Sahrens zpool_status_t ret = check_status(zhp->zpool_config, FALSE); 269789Sahrens 270789Sahrens if (ret >= NMSGID) 271789Sahrens *msgid = NULL; 272789Sahrens else 2731544Seschrock *msgid = msgid_table_active[ret]; 274789Sahrens 275789Sahrens return (ret); 276789Sahrens } 277789Sahrens 278789Sahrens zpool_status_t 279789Sahrens zpool_import_status(nvlist_t *config, char **msgid) 280789Sahrens { 281789Sahrens zpool_status_t ret = check_status(config, TRUE); 282789Sahrens 283789Sahrens if (ret >= NMSGID) 284789Sahrens *msgid = NULL; 285789Sahrens else 286789Sahrens *msgid = msgid_table[ret]; 287789Sahrens 288789Sahrens return (ret); 289789Sahrens } 290