1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23*eda14cbcSMatt Macy * Use is subject to license terms. 24*eda14cbcSMatt Macy */ 25*eda14cbcSMatt Macy 26*eda14cbcSMatt Macy /* 27*eda14cbcSMatt Macy * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 28*eda14cbcSMatt Macy */ 29*eda14cbcSMatt Macy 30*eda14cbcSMatt Macy #include <sys/zfs_context.h> 31*eda14cbcSMatt Macy #include <sys/spa.h> 32*eda14cbcSMatt Macy #include <sys/vdev_impl.h> 33*eda14cbcSMatt Macy #include <sys/zio.h> 34*eda14cbcSMatt Macy #include <sys/fs/zfs.h> 35*eda14cbcSMatt Macy 36*eda14cbcSMatt Macy /* 37*eda14cbcSMatt Macy * Virtual device vector for the pool's root vdev. 38*eda14cbcSMatt Macy */ 39*eda14cbcSMatt Macy 40*eda14cbcSMatt Macy static uint64_t 41*eda14cbcSMatt Macy vdev_root_core_tvds(vdev_t *vd) 42*eda14cbcSMatt Macy { 43*eda14cbcSMatt Macy uint64_t tvds = 0; 44*eda14cbcSMatt Macy 45*eda14cbcSMatt Macy for (uint64_t c = 0; c < vd->vdev_children; c++) { 46*eda14cbcSMatt Macy vdev_t *cvd = vd->vdev_child[c]; 47*eda14cbcSMatt Macy 48*eda14cbcSMatt Macy if (!cvd->vdev_ishole && !cvd->vdev_islog && 49*eda14cbcSMatt Macy cvd->vdev_ops != &vdev_indirect_ops) { 50*eda14cbcSMatt Macy tvds++; 51*eda14cbcSMatt Macy } 52*eda14cbcSMatt Macy } 53*eda14cbcSMatt Macy 54*eda14cbcSMatt Macy return (tvds); 55*eda14cbcSMatt Macy } 56*eda14cbcSMatt Macy 57*eda14cbcSMatt Macy /* 58*eda14cbcSMatt Macy * We should be able to tolerate one failure with absolutely no damage 59*eda14cbcSMatt Macy * to our metadata. Two failures will take out space maps, a bunch of 60*eda14cbcSMatt Macy * indirect block trees, meta dnodes, dnodes, etc. Probably not a happy 61*eda14cbcSMatt Macy * place to live. When we get smarter, we can liberalize this policy. 62*eda14cbcSMatt Macy * e.g. If we haven't lost two consecutive top-level vdevs, then we are 63*eda14cbcSMatt Macy * probably fine. Adding bean counters during alloc/free can make this 64*eda14cbcSMatt Macy * future guesswork more accurate. 65*eda14cbcSMatt Macy */ 66*eda14cbcSMatt Macy static boolean_t 67*eda14cbcSMatt Macy too_many_errors(vdev_t *vd, uint64_t numerrors) 68*eda14cbcSMatt Macy { 69*eda14cbcSMatt Macy uint64_t tvds; 70*eda14cbcSMatt Macy 71*eda14cbcSMatt Macy if (numerrors == 0) 72*eda14cbcSMatt Macy return (B_FALSE); 73*eda14cbcSMatt Macy 74*eda14cbcSMatt Macy tvds = vdev_root_core_tvds(vd); 75*eda14cbcSMatt Macy ASSERT3U(numerrors, <=, tvds); 76*eda14cbcSMatt Macy 77*eda14cbcSMatt Macy if (numerrors == tvds) 78*eda14cbcSMatt Macy return (B_TRUE); 79*eda14cbcSMatt Macy 80*eda14cbcSMatt Macy return (numerrors > spa_missing_tvds_allowed(vd->vdev_spa)); 81*eda14cbcSMatt Macy } 82*eda14cbcSMatt Macy 83*eda14cbcSMatt Macy static int 84*eda14cbcSMatt Macy vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, 85*eda14cbcSMatt Macy uint64_t *ashift, uint64_t *pshift) 86*eda14cbcSMatt Macy { 87*eda14cbcSMatt Macy spa_t *spa = vd->vdev_spa; 88*eda14cbcSMatt Macy int lasterror = 0; 89*eda14cbcSMatt Macy int numerrors = 0; 90*eda14cbcSMatt Macy 91*eda14cbcSMatt Macy if (vd->vdev_children == 0) { 92*eda14cbcSMatt Macy vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 93*eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 94*eda14cbcSMatt Macy } 95*eda14cbcSMatt Macy 96*eda14cbcSMatt Macy vdev_open_children(vd); 97*eda14cbcSMatt Macy 98*eda14cbcSMatt Macy for (int c = 0; c < vd->vdev_children; c++) { 99*eda14cbcSMatt Macy vdev_t *cvd = vd->vdev_child[c]; 100*eda14cbcSMatt Macy 101*eda14cbcSMatt Macy if (cvd->vdev_open_error && !cvd->vdev_islog && 102*eda14cbcSMatt Macy cvd->vdev_ops != &vdev_indirect_ops) { 103*eda14cbcSMatt Macy lasterror = cvd->vdev_open_error; 104*eda14cbcSMatt Macy numerrors++; 105*eda14cbcSMatt Macy } 106*eda14cbcSMatt Macy } 107*eda14cbcSMatt Macy 108*eda14cbcSMatt Macy if (spa_load_state(spa) != SPA_LOAD_NONE) 109*eda14cbcSMatt Macy spa_set_missing_tvds(spa, numerrors); 110*eda14cbcSMatt Macy 111*eda14cbcSMatt Macy if (too_many_errors(vd, numerrors)) { 112*eda14cbcSMatt Macy vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; 113*eda14cbcSMatt Macy return (lasterror); 114*eda14cbcSMatt Macy } 115*eda14cbcSMatt Macy 116*eda14cbcSMatt Macy *asize = 0; 117*eda14cbcSMatt Macy *max_asize = 0; 118*eda14cbcSMatt Macy *ashift = 0; 119*eda14cbcSMatt Macy *pshift = 0; 120*eda14cbcSMatt Macy 121*eda14cbcSMatt Macy return (0); 122*eda14cbcSMatt Macy } 123*eda14cbcSMatt Macy 124*eda14cbcSMatt Macy static void 125*eda14cbcSMatt Macy vdev_root_close(vdev_t *vd) 126*eda14cbcSMatt Macy { 127*eda14cbcSMatt Macy for (int c = 0; c < vd->vdev_children; c++) 128*eda14cbcSMatt Macy vdev_close(vd->vdev_child[c]); 129*eda14cbcSMatt Macy } 130*eda14cbcSMatt Macy 131*eda14cbcSMatt Macy static void 132*eda14cbcSMatt Macy vdev_root_state_change(vdev_t *vd, int faulted, int degraded) 133*eda14cbcSMatt Macy { 134*eda14cbcSMatt Macy if (too_many_errors(vd, faulted)) { 135*eda14cbcSMatt Macy vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, 136*eda14cbcSMatt Macy VDEV_AUX_NO_REPLICAS); 137*eda14cbcSMatt Macy } else if (degraded || faulted) { 138*eda14cbcSMatt Macy vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); 139*eda14cbcSMatt Macy } else { 140*eda14cbcSMatt Macy vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); 141*eda14cbcSMatt Macy } 142*eda14cbcSMatt Macy } 143*eda14cbcSMatt Macy 144*eda14cbcSMatt Macy vdev_ops_t vdev_root_ops = { 145*eda14cbcSMatt Macy .vdev_op_open = vdev_root_open, 146*eda14cbcSMatt Macy .vdev_op_close = vdev_root_close, 147*eda14cbcSMatt Macy .vdev_op_asize = vdev_default_asize, 148*eda14cbcSMatt Macy .vdev_op_io_start = NULL, /* not applicable to the root */ 149*eda14cbcSMatt Macy .vdev_op_io_done = NULL, /* not applicable to the root */ 150*eda14cbcSMatt Macy .vdev_op_state_change = vdev_root_state_change, 151*eda14cbcSMatt Macy .vdev_op_need_resilver = NULL, 152*eda14cbcSMatt Macy .vdev_op_hold = NULL, 153*eda14cbcSMatt Macy .vdev_op_rele = NULL, 154*eda14cbcSMatt Macy .vdev_op_remap = NULL, 155*eda14cbcSMatt Macy .vdev_op_xlate = NULL, 156*eda14cbcSMatt Macy .vdev_op_type = VDEV_TYPE_ROOT, /* name of this vdev type */ 157*eda14cbcSMatt Macy .vdev_op_leaf = B_FALSE /* not a leaf vdev */ 158*eda14cbcSMatt Macy }; 159