1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23eda14cbcSMatt Macy * Use is subject to license terms. 24eda14cbcSMatt Macy */ 25eda14cbcSMatt Macy 26eda14cbcSMatt Macy /* 27eda14cbcSMatt Macy * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 28eda14cbcSMatt Macy */ 29eda14cbcSMatt Macy 30eda14cbcSMatt Macy #include <sys/zfs_context.h> 31eda14cbcSMatt Macy #include <sys/spa.h> 32eda14cbcSMatt Macy #include <sys/vdev_impl.h> 33eda14cbcSMatt Macy #include <sys/zio.h> 34eda14cbcSMatt Macy #include <sys/fs/zfs.h> 35eda14cbcSMatt Macy 36eda14cbcSMatt Macy /* 37eda14cbcSMatt Macy * Virtual device vector for the pool's root vdev. 38eda14cbcSMatt Macy */ 39eda14cbcSMatt Macy 40eda14cbcSMatt Macy static uint64_t 41eda14cbcSMatt Macy vdev_root_core_tvds(vdev_t *vd) 42eda14cbcSMatt Macy { 43eda14cbcSMatt Macy uint64_t tvds = 0; 44eda14cbcSMatt Macy 45eda14cbcSMatt Macy for (uint64_t c = 0; c < vd->vdev_children; c++) { 46eda14cbcSMatt Macy vdev_t *cvd = vd->vdev_child[c]; 47eda14cbcSMatt Macy 48eda14cbcSMatt Macy if (!cvd->vdev_ishole && !cvd->vdev_islog && 49eda14cbcSMatt Macy cvd->vdev_ops != &vdev_indirect_ops) { 50eda14cbcSMatt Macy tvds++; 51eda14cbcSMatt Macy } 52eda14cbcSMatt Macy } 53eda14cbcSMatt Macy 54eda14cbcSMatt Macy return (tvds); 55eda14cbcSMatt Macy } 56eda14cbcSMatt Macy 57eda14cbcSMatt Macy /* 58eda14cbcSMatt Macy * We should be able to tolerate one failure with absolutely no damage 59eda14cbcSMatt Macy * to our metadata. Two failures will take out space maps, a bunch of 60eda14cbcSMatt Macy * indirect block trees, meta dnodes, dnodes, etc. Probably not a happy 61eda14cbcSMatt Macy * place to live. When we get smarter, we can liberalize this policy. 62eda14cbcSMatt Macy * e.g. If we haven't lost two consecutive top-level vdevs, then we are 63eda14cbcSMatt Macy * probably fine. Adding bean counters during alloc/free can make this 64eda14cbcSMatt Macy * future guesswork more accurate. 65eda14cbcSMatt Macy */ 66eda14cbcSMatt Macy static boolean_t 67eda14cbcSMatt Macy too_many_errors(vdev_t *vd, uint64_t numerrors) 68eda14cbcSMatt Macy { 69eda14cbcSMatt Macy uint64_t tvds; 70eda14cbcSMatt Macy 71eda14cbcSMatt Macy if (numerrors == 0) 72eda14cbcSMatt Macy return (B_FALSE); 73eda14cbcSMatt Macy 74eda14cbcSMatt Macy tvds = vdev_root_core_tvds(vd); 75eda14cbcSMatt Macy ASSERT3U(numerrors, <=, tvds); 76eda14cbcSMatt Macy 77eda14cbcSMatt Macy if (numerrors == tvds) 78eda14cbcSMatt Macy return (B_TRUE); 79eda14cbcSMatt Macy 80eda14cbcSMatt Macy return (numerrors > spa_missing_tvds_allowed(vd->vdev_spa)); 81eda14cbcSMatt Macy } 82eda14cbcSMatt Macy 83eda14cbcSMatt Macy static int 84eda14cbcSMatt Macy vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, 85eda14cbcSMatt Macy uint64_t *ashift, uint64_t *pshift) 86eda14cbcSMatt Macy { 87eda14cbcSMatt Macy spa_t *spa = vd->vdev_spa; 88eda14cbcSMatt Macy int lasterror = 0; 89eda14cbcSMatt Macy int numerrors = 0; 90eda14cbcSMatt Macy 91eda14cbcSMatt Macy if (vd->vdev_children == 0) { 92eda14cbcSMatt Macy vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 93eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 94eda14cbcSMatt Macy } 95eda14cbcSMatt Macy 96eda14cbcSMatt Macy vdev_open_children(vd); 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy for (int c = 0; c < vd->vdev_children; c++) { 99eda14cbcSMatt Macy vdev_t *cvd = vd->vdev_child[c]; 100eda14cbcSMatt Macy 101eda14cbcSMatt Macy if (cvd->vdev_open_error && !cvd->vdev_islog && 102eda14cbcSMatt Macy cvd->vdev_ops != &vdev_indirect_ops) { 103eda14cbcSMatt Macy lasterror = cvd->vdev_open_error; 104eda14cbcSMatt Macy numerrors++; 105eda14cbcSMatt Macy } 106eda14cbcSMatt Macy } 107eda14cbcSMatt Macy 108eda14cbcSMatt Macy if (spa_load_state(spa) != SPA_LOAD_NONE) 109eda14cbcSMatt Macy spa_set_missing_tvds(spa, numerrors); 110eda14cbcSMatt Macy 111eda14cbcSMatt Macy if (too_many_errors(vd, numerrors)) { 112eda14cbcSMatt Macy vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; 113eda14cbcSMatt Macy return (lasterror); 114eda14cbcSMatt Macy } 115eda14cbcSMatt Macy 116eda14cbcSMatt Macy *asize = 0; 117eda14cbcSMatt Macy *max_asize = 0; 118eda14cbcSMatt Macy *ashift = 0; 119eda14cbcSMatt Macy *pshift = 0; 120eda14cbcSMatt Macy 121eda14cbcSMatt Macy return (0); 122eda14cbcSMatt Macy } 123eda14cbcSMatt Macy 124eda14cbcSMatt Macy static void 125eda14cbcSMatt Macy vdev_root_close(vdev_t *vd) 126eda14cbcSMatt Macy { 127eda14cbcSMatt Macy for (int c = 0; c < vd->vdev_children; c++) 128eda14cbcSMatt Macy vdev_close(vd->vdev_child[c]); 129eda14cbcSMatt Macy } 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy static void 132eda14cbcSMatt Macy vdev_root_state_change(vdev_t *vd, int faulted, int degraded) 133eda14cbcSMatt Macy { 134eda14cbcSMatt Macy if (too_many_errors(vd, faulted)) { 135eda14cbcSMatt Macy vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, 136eda14cbcSMatt Macy VDEV_AUX_NO_REPLICAS); 137eda14cbcSMatt Macy } else if (degraded || faulted) { 138eda14cbcSMatt Macy vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); 139eda14cbcSMatt Macy } else { 140eda14cbcSMatt Macy vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); 141eda14cbcSMatt Macy } 142eda14cbcSMatt Macy } 143eda14cbcSMatt Macy 144eda14cbcSMatt Macy vdev_ops_t vdev_root_ops = { 145*7877fdebSMatt Macy .vdev_op_init = NULL, 146*7877fdebSMatt Macy .vdev_op_fini = NULL, 147eda14cbcSMatt Macy .vdev_op_open = vdev_root_open, 148eda14cbcSMatt Macy .vdev_op_close = vdev_root_close, 149eda14cbcSMatt Macy .vdev_op_asize = vdev_default_asize, 150*7877fdebSMatt Macy .vdev_op_min_asize = vdev_default_min_asize, 151*7877fdebSMatt Macy .vdev_op_min_alloc = NULL, 152eda14cbcSMatt Macy .vdev_op_io_start = NULL, /* not applicable to the root */ 153eda14cbcSMatt Macy .vdev_op_io_done = NULL, /* not applicable to the root */ 154eda14cbcSMatt Macy .vdev_op_state_change = vdev_root_state_change, 155eda14cbcSMatt Macy .vdev_op_need_resilver = NULL, 156eda14cbcSMatt Macy .vdev_op_hold = NULL, 157eda14cbcSMatt Macy .vdev_op_rele = NULL, 158eda14cbcSMatt Macy .vdev_op_remap = NULL, 159eda14cbcSMatt Macy .vdev_op_xlate = NULL, 160*7877fdebSMatt Macy .vdev_op_rebuild_asize = NULL, 161*7877fdebSMatt Macy .vdev_op_metaslab_init = NULL, 162*7877fdebSMatt Macy .vdev_op_config_generate = NULL, 163*7877fdebSMatt Macy .vdev_op_nparity = NULL, 164*7877fdebSMatt Macy .vdev_op_ndisks = NULL, 165eda14cbcSMatt Macy .vdev_op_type = VDEV_TYPE_ROOT, /* name of this vdev type */ 166eda14cbcSMatt Macy .vdev_op_leaf = B_FALSE /* not a leaf vdev */ 167eda14cbcSMatt Macy }; 168