xref: /freebsd-src/sys/contrib/openzfs/module/zfs/vdev_root.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy /*
22*eda14cbcSMatt Macy  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23*eda14cbcSMatt Macy  * Use is subject to license terms.
24*eda14cbcSMatt Macy  */
25*eda14cbcSMatt Macy 
26*eda14cbcSMatt Macy /*
27*eda14cbcSMatt Macy  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
28*eda14cbcSMatt Macy  */
29*eda14cbcSMatt Macy 
30*eda14cbcSMatt Macy #include <sys/zfs_context.h>
31*eda14cbcSMatt Macy #include <sys/spa.h>
32*eda14cbcSMatt Macy #include <sys/vdev_impl.h>
33*eda14cbcSMatt Macy #include <sys/zio.h>
34*eda14cbcSMatt Macy #include <sys/fs/zfs.h>
35*eda14cbcSMatt Macy 
36*eda14cbcSMatt Macy /*
37*eda14cbcSMatt Macy  * Virtual device vector for the pool's root vdev.
38*eda14cbcSMatt Macy  */
39*eda14cbcSMatt Macy 
40*eda14cbcSMatt Macy static uint64_t
41*eda14cbcSMatt Macy vdev_root_core_tvds(vdev_t *vd)
42*eda14cbcSMatt Macy {
43*eda14cbcSMatt Macy 	uint64_t tvds = 0;
44*eda14cbcSMatt Macy 
45*eda14cbcSMatt Macy 	for (uint64_t c = 0; c < vd->vdev_children; c++) {
46*eda14cbcSMatt Macy 		vdev_t *cvd = vd->vdev_child[c];
47*eda14cbcSMatt Macy 
48*eda14cbcSMatt Macy 		if (!cvd->vdev_ishole && !cvd->vdev_islog &&
49*eda14cbcSMatt Macy 		    cvd->vdev_ops != &vdev_indirect_ops) {
50*eda14cbcSMatt Macy 			tvds++;
51*eda14cbcSMatt Macy 		}
52*eda14cbcSMatt Macy 	}
53*eda14cbcSMatt Macy 
54*eda14cbcSMatt Macy 	return (tvds);
55*eda14cbcSMatt Macy }
56*eda14cbcSMatt Macy 
57*eda14cbcSMatt Macy /*
58*eda14cbcSMatt Macy  * We should be able to tolerate one failure with absolutely no damage
59*eda14cbcSMatt Macy  * to our metadata.  Two failures will take out space maps, a bunch of
60*eda14cbcSMatt Macy  * indirect block trees, meta dnodes, dnodes, etc.  Probably not a happy
61*eda14cbcSMatt Macy  * place to live.  When we get smarter, we can liberalize this policy.
62*eda14cbcSMatt Macy  * e.g. If we haven't lost two consecutive top-level vdevs, then we are
63*eda14cbcSMatt Macy  * probably fine.  Adding bean counters during alloc/free can make this
64*eda14cbcSMatt Macy  * future guesswork more accurate.
65*eda14cbcSMatt Macy  */
66*eda14cbcSMatt Macy static boolean_t
67*eda14cbcSMatt Macy too_many_errors(vdev_t *vd, uint64_t numerrors)
68*eda14cbcSMatt Macy {
69*eda14cbcSMatt Macy 	uint64_t tvds;
70*eda14cbcSMatt Macy 
71*eda14cbcSMatt Macy 	if (numerrors == 0)
72*eda14cbcSMatt Macy 		return (B_FALSE);
73*eda14cbcSMatt Macy 
74*eda14cbcSMatt Macy 	tvds = vdev_root_core_tvds(vd);
75*eda14cbcSMatt Macy 	ASSERT3U(numerrors, <=, tvds);
76*eda14cbcSMatt Macy 
77*eda14cbcSMatt Macy 	if (numerrors == tvds)
78*eda14cbcSMatt Macy 		return (B_TRUE);
79*eda14cbcSMatt Macy 
80*eda14cbcSMatt Macy 	return (numerrors > spa_missing_tvds_allowed(vd->vdev_spa));
81*eda14cbcSMatt Macy }
82*eda14cbcSMatt Macy 
83*eda14cbcSMatt Macy static int
84*eda14cbcSMatt Macy vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
85*eda14cbcSMatt Macy     uint64_t *ashift, uint64_t *pshift)
86*eda14cbcSMatt Macy {
87*eda14cbcSMatt Macy 	spa_t *spa = vd->vdev_spa;
88*eda14cbcSMatt Macy 	int lasterror = 0;
89*eda14cbcSMatt Macy 	int numerrors = 0;
90*eda14cbcSMatt Macy 
91*eda14cbcSMatt Macy 	if (vd->vdev_children == 0) {
92*eda14cbcSMatt Macy 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
93*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
94*eda14cbcSMatt Macy 	}
95*eda14cbcSMatt Macy 
96*eda14cbcSMatt Macy 	vdev_open_children(vd);
97*eda14cbcSMatt Macy 
98*eda14cbcSMatt Macy 	for (int c = 0; c < vd->vdev_children; c++) {
99*eda14cbcSMatt Macy 		vdev_t *cvd = vd->vdev_child[c];
100*eda14cbcSMatt Macy 
101*eda14cbcSMatt Macy 		if (cvd->vdev_open_error && !cvd->vdev_islog &&
102*eda14cbcSMatt Macy 		    cvd->vdev_ops != &vdev_indirect_ops) {
103*eda14cbcSMatt Macy 			lasterror = cvd->vdev_open_error;
104*eda14cbcSMatt Macy 			numerrors++;
105*eda14cbcSMatt Macy 		}
106*eda14cbcSMatt Macy 	}
107*eda14cbcSMatt Macy 
108*eda14cbcSMatt Macy 	if (spa_load_state(spa) != SPA_LOAD_NONE)
109*eda14cbcSMatt Macy 		spa_set_missing_tvds(spa, numerrors);
110*eda14cbcSMatt Macy 
111*eda14cbcSMatt Macy 	if (too_many_errors(vd, numerrors)) {
112*eda14cbcSMatt Macy 		vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
113*eda14cbcSMatt Macy 		return (lasterror);
114*eda14cbcSMatt Macy 	}
115*eda14cbcSMatt Macy 
116*eda14cbcSMatt Macy 	*asize = 0;
117*eda14cbcSMatt Macy 	*max_asize = 0;
118*eda14cbcSMatt Macy 	*ashift = 0;
119*eda14cbcSMatt Macy 	*pshift = 0;
120*eda14cbcSMatt Macy 
121*eda14cbcSMatt Macy 	return (0);
122*eda14cbcSMatt Macy }
123*eda14cbcSMatt Macy 
124*eda14cbcSMatt Macy static void
125*eda14cbcSMatt Macy vdev_root_close(vdev_t *vd)
126*eda14cbcSMatt Macy {
127*eda14cbcSMatt Macy 	for (int c = 0; c < vd->vdev_children; c++)
128*eda14cbcSMatt Macy 		vdev_close(vd->vdev_child[c]);
129*eda14cbcSMatt Macy }
130*eda14cbcSMatt Macy 
131*eda14cbcSMatt Macy static void
132*eda14cbcSMatt Macy vdev_root_state_change(vdev_t *vd, int faulted, int degraded)
133*eda14cbcSMatt Macy {
134*eda14cbcSMatt Macy 	if (too_many_errors(vd, faulted)) {
135*eda14cbcSMatt Macy 		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
136*eda14cbcSMatt Macy 		    VDEV_AUX_NO_REPLICAS);
137*eda14cbcSMatt Macy 	} else if (degraded || faulted) {
138*eda14cbcSMatt Macy 		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
139*eda14cbcSMatt Macy 	} else {
140*eda14cbcSMatt Macy 		vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
141*eda14cbcSMatt Macy 	}
142*eda14cbcSMatt Macy }
143*eda14cbcSMatt Macy 
144*eda14cbcSMatt Macy vdev_ops_t vdev_root_ops = {
145*eda14cbcSMatt Macy 	.vdev_op_open = vdev_root_open,
146*eda14cbcSMatt Macy 	.vdev_op_close = vdev_root_close,
147*eda14cbcSMatt Macy 	.vdev_op_asize = vdev_default_asize,
148*eda14cbcSMatt Macy 	.vdev_op_io_start = NULL,	/* not applicable to the root */
149*eda14cbcSMatt Macy 	.vdev_op_io_done = NULL,	/* not applicable to the root */
150*eda14cbcSMatt Macy 	.vdev_op_state_change = vdev_root_state_change,
151*eda14cbcSMatt Macy 	.vdev_op_need_resilver = NULL,
152*eda14cbcSMatt Macy 	.vdev_op_hold = NULL,
153*eda14cbcSMatt Macy 	.vdev_op_rele = NULL,
154*eda14cbcSMatt Macy 	.vdev_op_remap = NULL,
155*eda14cbcSMatt Macy 	.vdev_op_xlate = NULL,
156*eda14cbcSMatt Macy 	.vdev_op_type = VDEV_TYPE_ROOT,	/* name of this vdev type */
157*eda14cbcSMatt Macy 	.vdev_op_leaf = B_FALSE		/* not a leaf vdev */
158*eda14cbcSMatt Macy };
159