1*789Sahrens /* 2*789Sahrens * CDDL HEADER START 3*789Sahrens * 4*789Sahrens * The contents of this file are subject to the terms of the 5*789Sahrens * Common Development and Distribution License, Version 1.0 only 6*789Sahrens * (the "License"). You may not use this file except in compliance 7*789Sahrens * with the License. 8*789Sahrens * 9*789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*789Sahrens * or http://www.opensolaris.org/os/licensing. 11*789Sahrens * See the License for the specific language governing permissions 12*789Sahrens * and limitations under the License. 13*789Sahrens * 14*789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*789Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*789Sahrens * 20*789Sahrens * CDDL HEADER END 21*789Sahrens */ 22*789Sahrens /* 23*789Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*789Sahrens * Use is subject to license terms. 25*789Sahrens */ 26*789Sahrens 27*789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*789Sahrens 29*789Sahrens /* 30*789Sahrens * This file contains the functions which analyze the status of a pool. This 31*789Sahrens * include both the status of an active pool, as well as the status exported 32*789Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 33*789Sahrens * the pool. This status is independent (to a certain degree) from the state of 34*789Sahrens * the pool. A pool's state descsribes only whether or not it is capable of 35*789Sahrens * providing the necessary fault tolerance for data. The status describes the 36*789Sahrens * overall status of devices. A pool that is online can still have a device 37*789Sahrens * that is experiencing errors. 38*789Sahrens * 39*789Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 40*789Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 41*789Sahrens * is left up to the caller, depending on whether it is a live pool or an 42*789Sahrens * import. 43*789Sahrens */ 44*789Sahrens 45*789Sahrens #include <libzfs.h> 46*789Sahrens #include <string.h> 47*789Sahrens #include "libzfs_impl.h" 48*789Sahrens 49*789Sahrens /* 50*789Sahrens * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 51*789Sahrens * in libzfs.h. Note that there are some status results which go past the end 52*789Sahrens * of this table, and hence have no associated message ID. 53*789Sahrens */ 54*789Sahrens static char *msgid_table[] = { 55*789Sahrens "ZFS-8000-14", 56*789Sahrens "ZFS-8000-2Q", 57*789Sahrens "ZFS-8000-3C", 58*789Sahrens "ZFS-8000-4J", 59*789Sahrens "ZFS-8000-5E", 60*789Sahrens "ZFS-8000-6X", 61*789Sahrens "ZFS-8000-72", 62*789Sahrens "ZFS-8000-8A", 63*789Sahrens "ZFS-8000-9P", 64*789Sahrens "ZFS-8000-A5" 65*789Sahrens }; 66*789Sahrens 67*789Sahrens #define NMSGID (sizeof (msgid_table) / sizeof (msgid_table[0])) 68*789Sahrens 69*789Sahrens /* ARGSUSED */ 70*789Sahrens static int 71*789Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 72*789Sahrens { 73*789Sahrens return (state == VDEV_STATE_CANT_OPEN && 74*789Sahrens aux == VDEV_AUX_OPEN_FAILED); 75*789Sahrens } 76*789Sahrens 77*789Sahrens /* ARGSUSED */ 78*789Sahrens static int 79*789Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 80*789Sahrens { 81*789Sahrens return (errs != 0); 82*789Sahrens } 83*789Sahrens 84*789Sahrens /* ARGSUSED */ 85*789Sahrens static int 86*789Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 87*789Sahrens { 88*789Sahrens return (state == VDEV_STATE_CANT_OPEN); 89*789Sahrens } 90*789Sahrens 91*789Sahrens /* ARGSUSED */ 92*789Sahrens static int 93*789Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 94*789Sahrens { 95*789Sahrens return (state == VDEV_STATE_OFFLINE); 96*789Sahrens } 97*789Sahrens 98*789Sahrens /* 99*789Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 100*789Sahrens */ 101*789Sahrens static int 102*789Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 103*789Sahrens { 104*789Sahrens nvlist_t **child; 105*789Sahrens vdev_stat_t *vs; 106*789Sahrens uint_t c, children; 107*789Sahrens char *type; 108*789Sahrens 109*789Sahrens /* 110*789Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 111*789Sahrens * the process of repairing any such errors, and don't want to call them 112*789Sahrens * out again. We'll pick up the fact that a resilver is happening 113*789Sahrens * later. 114*789Sahrens */ 115*789Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 116*789Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 117*789Sahrens return (FALSE); 118*789Sahrens 119*789Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 120*789Sahrens &children) == 0) { 121*789Sahrens for (c = 0; c < children; c++) 122*789Sahrens if (find_vdev_problem(child[c], func)) 123*789Sahrens return (TRUE); 124*789Sahrens } else { 125*789Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 126*789Sahrens (uint64_t **)&vs, &c) == 0); 127*789Sahrens 128*789Sahrens if (func(vs->vs_state, vs->vs_aux, 129*789Sahrens vs->vs_read_errors + 130*789Sahrens vs->vs_write_errors + 131*789Sahrens vs->vs_checksum_errors)) 132*789Sahrens return (TRUE); 133*789Sahrens } 134*789Sahrens 135*789Sahrens return (FALSE); 136*789Sahrens } 137*789Sahrens 138*789Sahrens /* 139*789Sahrens * Active pool health status. 140*789Sahrens * 141*789Sahrens * To determine the status for a pool, we make several passes over the config, 142*789Sahrens * picking the most egregious error we find. In order of importance, we do the 143*789Sahrens * following: 144*789Sahrens * 145*789Sahrens * - Check for a complete and valid configuration 146*789Sahrens * - Look for any missing devices 147*789Sahrens * - Look for any devices showing errors 148*789Sahrens * - Check for any data errors 149*789Sahrens * - Check for any resilvering devices 150*789Sahrens * 151*789Sahrens * There can obviously be multiple errors within a single pool, so this routine 152*789Sahrens * only picks the most damaging of all the current errors to report. 153*789Sahrens */ 154*789Sahrens static zpool_status_t 155*789Sahrens check_status(nvlist_t *config, int isimport) 156*789Sahrens { 157*789Sahrens nvlist_t *nvroot; 158*789Sahrens vdev_stat_t *vs; 159*789Sahrens uint_t vsc; 160*789Sahrens 161*789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 162*789Sahrens &nvroot) == 0); 163*789Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 164*789Sahrens (uint64_t **)&vs, &vsc) == 0); 165*789Sahrens 166*789Sahrens /* 167*789Sahrens * Check that the config is complete. 168*789Sahrens */ 169*789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 170*789Sahrens vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) { 171*789Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 172*789Sahrens } 173*789Sahrens 174*789Sahrens /* 175*789Sahrens * Missing devices 176*789Sahrens */ 177*789Sahrens if (find_vdev_problem(nvroot, vdev_missing)) { 178*789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN) 179*789Sahrens return (ZPOOL_STATUS_MISSING_DEV_NR); 180*789Sahrens else 181*789Sahrens return (ZPOOL_STATUS_MISSING_DEV_R); 182*789Sahrens } 183*789Sahrens 184*789Sahrens /* 185*789Sahrens * Devices with corrupted labels. 186*789Sahrens */ 187*789Sahrens if (find_vdev_problem(nvroot, vdev_broken)) { 188*789Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN) 189*789Sahrens return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 190*789Sahrens else 191*789Sahrens return (ZPOOL_STATUS_CORRUPT_LABEL_R); 192*789Sahrens } 193*789Sahrens 194*789Sahrens /* 195*789Sahrens * Devices with errors 196*789Sahrens */ 197*789Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 198*789Sahrens return (ZPOOL_STATUS_FAILING_DEV); 199*789Sahrens 200*789Sahrens /* 201*789Sahrens * Offlined devices 202*789Sahrens */ 203*789Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 204*789Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 205*789Sahrens 206*789Sahrens /* 207*789Sahrens * Currently resilvering 208*789Sahrens */ 209*789Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 210*789Sahrens return (ZPOOL_STATUS_RESILVERING); 211*789Sahrens 212*789Sahrens /* 213*789Sahrens * We currently have no way to detect the following errors: 214*789Sahrens * 215*789Sahrens * CORRUPT_CACHE 216*789Sahrens * VERSION_MISMATCH 217*789Sahrens * CORRUPT_POOL 218*789Sahrens * CORRUPT_DATA 219*789Sahrens */ 220*789Sahrens 221*789Sahrens return (ZPOOL_STATUS_OK); 222*789Sahrens } 223*789Sahrens 224*789Sahrens zpool_status_t 225*789Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 226*789Sahrens { 227*789Sahrens zpool_status_t ret = check_status(zhp->zpool_config, FALSE); 228*789Sahrens 229*789Sahrens if (ret >= NMSGID) 230*789Sahrens *msgid = NULL; 231*789Sahrens else 232*789Sahrens *msgid = msgid_table[ret]; 233*789Sahrens 234*789Sahrens return (ret); 235*789Sahrens } 236*789Sahrens 237*789Sahrens zpool_status_t 238*789Sahrens zpool_import_status(nvlist_t *config, char **msgid) 239*789Sahrens { 240*789Sahrens zpool_status_t ret = check_status(config, TRUE); 241*789Sahrens 242*789Sahrens if (ret >= NMSGID) 243*789Sahrens *msgid = NULL; 244*789Sahrens else 245*789Sahrens *msgid = msgid_table[ret]; 246*789Sahrens 247*789Sahrens return (ret); 248*789Sahrens } 249