1094e47e9SGeorge Wilson /*
2094e47e9SGeorge Wilson * CDDL HEADER START
3094e47e9SGeorge Wilson *
4094e47e9SGeorge Wilson * The contents of this file are subject to the terms of the
5094e47e9SGeorge Wilson * Common Development and Distribution License (the "License").
6094e47e9SGeorge Wilson * You may not use this file except in compliance with the License.
7094e47e9SGeorge Wilson *
8094e47e9SGeorge Wilson * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9094e47e9SGeorge Wilson * or http://www.opensolaris.org/os/licensing.
10094e47e9SGeorge Wilson * See the License for the specific language governing permissions
11094e47e9SGeorge Wilson * and limitations under the License.
12094e47e9SGeorge Wilson *
13094e47e9SGeorge Wilson * When distributing Covered Code, include this CDDL HEADER in each
14094e47e9SGeorge Wilson * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15094e47e9SGeorge Wilson * If applicable, add the following below this CDDL HEADER, with the
16094e47e9SGeorge Wilson * fields enclosed by brackets "[]" replaced with your own identifying
17094e47e9SGeorge Wilson * information: Portions Copyright [yyyy] [name of copyright owner]
18094e47e9SGeorge Wilson *
19094e47e9SGeorge Wilson * CDDL HEADER END
20094e47e9SGeorge Wilson */
21094e47e9SGeorge Wilson
22094e47e9SGeorge Wilson /*
23af1d63abSPaul Dagnelie * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
24094e47e9SGeorge Wilson */
25094e47e9SGeorge Wilson
26094e47e9SGeorge Wilson #include <sys/spa.h>
27094e47e9SGeorge Wilson #include <sys/spa_impl.h>
28094e47e9SGeorge Wilson #include <sys/txg.h>
29094e47e9SGeorge Wilson #include <sys/vdev_impl.h>
30094e47e9SGeorge Wilson #include <sys/refcount.h>
31094e47e9SGeorge Wilson #include <sys/metaslab_impl.h>
32094e47e9SGeorge Wilson #include <sys/dsl_synctask.h>
33094e47e9SGeorge Wilson #include <sys/zap.h>
34094e47e9SGeorge Wilson #include <sys/dmu_tx.h>
35094e47e9SGeorge Wilson
36094e47e9SGeorge Wilson /*
37094e47e9SGeorge Wilson * Value that is written to disk during initialization.
38094e47e9SGeorge Wilson */
39094e47e9SGeorge Wilson uint64_t zfs_initialize_value = 0xdeadbeefdeadbeefULL;
40094e47e9SGeorge Wilson
41094e47e9SGeorge Wilson /* maximum number of I/Os outstanding per leaf vdev */
42094e47e9SGeorge Wilson int zfs_initialize_limit = 1;
43094e47e9SGeorge Wilson
44094e47e9SGeorge Wilson /* size of initializing writes; default 1MiB, see zfs_remove_max_segment */
45094e47e9SGeorge Wilson uint64_t zfs_initialize_chunk_size = 1024 * 1024;
46094e47e9SGeorge Wilson
47094e47e9SGeorge Wilson static boolean_t
vdev_initialize_should_stop(vdev_t * vd)48094e47e9SGeorge Wilson vdev_initialize_should_stop(vdev_t *vd)
49094e47e9SGeorge Wilson {
50094e47e9SGeorge Wilson return (vd->vdev_initialize_exit_wanted || !vdev_writeable(vd) ||
51094e47e9SGeorge Wilson vd->vdev_detached || vd->vdev_top->vdev_removing);
52094e47e9SGeorge Wilson }
53094e47e9SGeorge Wilson
54094e47e9SGeorge Wilson static void
vdev_initialize_zap_update_sync(void * arg,dmu_tx_t * tx)55094e47e9SGeorge Wilson vdev_initialize_zap_update_sync(void *arg, dmu_tx_t *tx)
56094e47e9SGeorge Wilson {
57094e47e9SGeorge Wilson /*
58094e47e9SGeorge Wilson * We pass in the guid instead of the vdev_t since the vdev may
59094e47e9SGeorge Wilson * have been freed prior to the sync task being processed. This
60094e47e9SGeorge Wilson * happens when a vdev is detached as we call spa_config_vdev_exit(),
61084fd14fSBrian Behlendorf * stop the initializing thread, schedule the sync task, and free
62094e47e9SGeorge Wilson * the vdev. Later when the scheduled sync task is invoked, it would
63094e47e9SGeorge Wilson * find that the vdev has been freed.
64094e47e9SGeorge Wilson */
65094e47e9SGeorge Wilson uint64_t guid = *(uint64_t *)arg;
66094e47e9SGeorge Wilson uint64_t txg = dmu_tx_get_txg(tx);
67094e47e9SGeorge Wilson kmem_free(arg, sizeof (uint64_t));
68094e47e9SGeorge Wilson
69094e47e9SGeorge Wilson vdev_t *vd = spa_lookup_by_guid(tx->tx_pool->dp_spa, guid, B_FALSE);
70094e47e9SGeorge Wilson if (vd == NULL || vd->vdev_top->vdev_removing || !vdev_is_concrete(vd))
71094e47e9SGeorge Wilson return;
72094e47e9SGeorge Wilson
73094e47e9SGeorge Wilson uint64_t last_offset = vd->vdev_initialize_offset[txg & TXG_MASK];
74094e47e9SGeorge Wilson vd->vdev_initialize_offset[txg & TXG_MASK] = 0;
75094e47e9SGeorge Wilson
76094e47e9SGeorge Wilson VERIFY(vd->vdev_leaf_zap != 0);
77094e47e9SGeorge Wilson
78094e47e9SGeorge Wilson objset_t *mos = vd->vdev_spa->spa_meta_objset;
79094e47e9SGeorge Wilson
80094e47e9SGeorge Wilson if (last_offset > 0) {
81094e47e9SGeorge Wilson vd->vdev_initialize_last_offset = last_offset;
82094e47e9SGeorge Wilson VERIFY0(zap_update(mos, vd->vdev_leaf_zap,
83094e47e9SGeorge Wilson VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET,
84094e47e9SGeorge Wilson sizeof (last_offset), 1, &last_offset, tx));
85094e47e9SGeorge Wilson }
86094e47e9SGeorge Wilson if (vd->vdev_initialize_action_time > 0) {
87094e47e9SGeorge Wilson uint64_t val = (uint64_t)vd->vdev_initialize_action_time;
88094e47e9SGeorge Wilson VERIFY0(zap_update(mos, vd->vdev_leaf_zap,
89094e47e9SGeorge Wilson VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, sizeof (val),
90094e47e9SGeorge Wilson 1, &val, tx));
91094e47e9SGeorge Wilson }
92094e47e9SGeorge Wilson
93094e47e9SGeorge Wilson uint64_t initialize_state = vd->vdev_initialize_state;
94094e47e9SGeorge Wilson VERIFY0(zap_update(mos, vd->vdev_leaf_zap,
95094e47e9SGeorge Wilson VDEV_LEAF_ZAP_INITIALIZE_STATE, sizeof (initialize_state), 1,
96094e47e9SGeorge Wilson &initialize_state, tx));
97094e47e9SGeorge Wilson }
98094e47e9SGeorge Wilson
99094e47e9SGeorge Wilson static void
vdev_initialize_change_state(vdev_t * vd,vdev_initializing_state_t new_state)100094e47e9SGeorge Wilson vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
101094e47e9SGeorge Wilson {
102094e47e9SGeorge Wilson ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
103094e47e9SGeorge Wilson spa_t *spa = vd->vdev_spa;
104094e47e9SGeorge Wilson
105094e47e9SGeorge Wilson if (new_state == vd->vdev_initialize_state)
106094e47e9SGeorge Wilson return;
107094e47e9SGeorge Wilson
108094e47e9SGeorge Wilson /*
109094e47e9SGeorge Wilson * Copy the vd's guid, this will be freed by the sync task.
110094e47e9SGeorge Wilson */
111094e47e9SGeorge Wilson uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
112094e47e9SGeorge Wilson *guid = vd->vdev_guid;
113094e47e9SGeorge Wilson
114094e47e9SGeorge Wilson /*
115094e47e9SGeorge Wilson * If we're suspending, then preserving the original start time.
116094e47e9SGeorge Wilson */
117094e47e9SGeorge Wilson if (vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED) {
118094e47e9SGeorge Wilson vd->vdev_initialize_action_time = gethrestime_sec();
119094e47e9SGeorge Wilson }
120094e47e9SGeorge Wilson vd->vdev_initialize_state = new_state;
121094e47e9SGeorge Wilson
122094e47e9SGeorge Wilson dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
123094e47e9SGeorge Wilson VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
124094e47e9SGeorge Wilson dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
125084fd14fSBrian Behlendorf guid, 2, ZFS_SPACE_CHECK_NONE, tx);
126094e47e9SGeorge Wilson
127094e47e9SGeorge Wilson switch (new_state) {
128094e47e9SGeorge Wilson case VDEV_INITIALIZE_ACTIVE:
129094e47e9SGeorge Wilson spa_history_log_internal(spa, "initialize", tx,
130094e47e9SGeorge Wilson "vdev=%s activated", vd->vdev_path);
131094e47e9SGeorge Wilson break;
132094e47e9SGeorge Wilson case VDEV_INITIALIZE_SUSPENDED:
133094e47e9SGeorge Wilson spa_history_log_internal(spa, "initialize", tx,
134094e47e9SGeorge Wilson "vdev=%s suspended", vd->vdev_path);
135094e47e9SGeorge Wilson break;
136094e47e9SGeorge Wilson case VDEV_INITIALIZE_CANCELED:
137094e47e9SGeorge Wilson spa_history_log_internal(spa, "initialize", tx,
138094e47e9SGeorge Wilson "vdev=%s canceled", vd->vdev_path);
139094e47e9SGeorge Wilson break;
140094e47e9SGeorge Wilson case VDEV_INITIALIZE_COMPLETE:
141094e47e9SGeorge Wilson spa_history_log_internal(spa, "initialize", tx,
142094e47e9SGeorge Wilson "vdev=%s complete", vd->vdev_path);
143094e47e9SGeorge Wilson break;
144094e47e9SGeorge Wilson default:
145094e47e9SGeorge Wilson panic("invalid state %llu", (unsigned long long)new_state);
146094e47e9SGeorge Wilson }
147094e47e9SGeorge Wilson
148094e47e9SGeorge Wilson dmu_tx_commit(tx);
149094e47e9SGeorge Wilson }
150094e47e9SGeorge Wilson
151094e47e9SGeorge Wilson static void
vdev_initialize_cb(zio_t * zio)152094e47e9SGeorge Wilson vdev_initialize_cb(zio_t *zio)
153094e47e9SGeorge Wilson {
154094e47e9SGeorge Wilson vdev_t *vd = zio->io_vd;
155094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_io_lock);
156094e47e9SGeorge Wilson if (zio->io_error == ENXIO && !vdev_writeable(vd)) {
157094e47e9SGeorge Wilson /*
158094e47e9SGeorge Wilson * The I/O failed because the vdev was unavailable; roll the
159094e47e9SGeorge Wilson * last offset back. (This works because spa_sync waits on
160094e47e9SGeorge Wilson * spa_txg_zio before it runs sync tasks.)
161094e47e9SGeorge Wilson */
162094e47e9SGeorge Wilson uint64_t *off =
163094e47e9SGeorge Wilson &vd->vdev_initialize_offset[zio->io_txg & TXG_MASK];
164094e47e9SGeorge Wilson *off = MIN(*off, zio->io_offset);
165094e47e9SGeorge Wilson } else {
166094e47e9SGeorge Wilson /*
167094e47e9SGeorge Wilson * Since initializing is best-effort, we ignore I/O errors and
168094e47e9SGeorge Wilson * rely on vdev_probe to determine if the errors are more
169094e47e9SGeorge Wilson * critical.
170094e47e9SGeorge Wilson */
171094e47e9SGeorge Wilson if (zio->io_error != 0)
172094e47e9SGeorge Wilson vd->vdev_stat.vs_initialize_errors++;
173094e47e9SGeorge Wilson
174094e47e9SGeorge Wilson vd->vdev_initialize_bytes_done += zio->io_orig_size;
175094e47e9SGeorge Wilson }
176094e47e9SGeorge Wilson ASSERT3U(vd->vdev_initialize_inflight, >, 0);
177094e47e9SGeorge Wilson vd->vdev_initialize_inflight--;
178094e47e9SGeorge Wilson cv_broadcast(&vd->vdev_initialize_io_cv);
179094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_io_lock);
180094e47e9SGeorge Wilson
181094e47e9SGeorge Wilson spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
182094e47e9SGeorge Wilson }
183094e47e9SGeorge Wilson
184094e47e9SGeorge Wilson /* Takes care of physical writing and limiting # of concurrent ZIOs. */
185094e47e9SGeorge Wilson static int
vdev_initialize_write(vdev_t * vd,uint64_t start,uint64_t size,abd_t * data)186094e47e9SGeorge Wilson vdev_initialize_write(vdev_t *vd, uint64_t start, uint64_t size, abd_t *data)
187094e47e9SGeorge Wilson {
188094e47e9SGeorge Wilson spa_t *spa = vd->vdev_spa;
189094e47e9SGeorge Wilson
190094e47e9SGeorge Wilson /* Limit inflight initializing I/Os */
191094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_io_lock);
192094e47e9SGeorge Wilson while (vd->vdev_initialize_inflight >= zfs_initialize_limit) {
193094e47e9SGeorge Wilson cv_wait(&vd->vdev_initialize_io_cv,
194094e47e9SGeorge Wilson &vd->vdev_initialize_io_lock);
195094e47e9SGeorge Wilson }
196094e47e9SGeorge Wilson vd->vdev_initialize_inflight++;
197094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_io_lock);
198094e47e9SGeorge Wilson
199094e47e9SGeorge Wilson dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
200094e47e9SGeorge Wilson VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
201094e47e9SGeorge Wilson uint64_t txg = dmu_tx_get_txg(tx);
202094e47e9SGeorge Wilson
203094e47e9SGeorge Wilson spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER);
204094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_lock);
205094e47e9SGeorge Wilson
206094e47e9SGeorge Wilson if (vd->vdev_initialize_offset[txg & TXG_MASK] == 0) {
207094e47e9SGeorge Wilson uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
208094e47e9SGeorge Wilson *guid = vd->vdev_guid;
209094e47e9SGeorge Wilson
210094e47e9SGeorge Wilson /* This is the first write of this txg. */
211094e47e9SGeorge Wilson dsl_sync_task_nowait(spa_get_dsl(spa),
212094e47e9SGeorge Wilson vdev_initialize_zap_update_sync, guid, 2,
213094e47e9SGeorge Wilson ZFS_SPACE_CHECK_RESERVED, tx);
214094e47e9SGeorge Wilson }
215094e47e9SGeorge Wilson
216094e47e9SGeorge Wilson /*
217094e47e9SGeorge Wilson * We know the vdev struct will still be around since all
218094e47e9SGeorge Wilson * consumers of vdev_free must stop the initialization first.
219094e47e9SGeorge Wilson */
220094e47e9SGeorge Wilson if (vdev_initialize_should_stop(vd)) {
221094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_io_lock);
222094e47e9SGeorge Wilson ASSERT3U(vd->vdev_initialize_inflight, >, 0);
223094e47e9SGeorge Wilson vd->vdev_initialize_inflight--;
224094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_io_lock);
225094e47e9SGeorge Wilson spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
226094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_lock);
227094e47e9SGeorge Wilson dmu_tx_commit(tx);
228094e47e9SGeorge Wilson return (SET_ERROR(EINTR));
229094e47e9SGeorge Wilson }
230094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_lock);
231094e47e9SGeorge Wilson
232094e47e9SGeorge Wilson vd->vdev_initialize_offset[txg & TXG_MASK] = start + size;
233094e47e9SGeorge Wilson zio_nowait(zio_write_phys(spa->spa_txg_zio[txg & TXG_MASK], vd, start,
234094e47e9SGeorge Wilson size, data, ZIO_CHECKSUM_OFF, vdev_initialize_cb, NULL,
235094e47e9SGeorge Wilson ZIO_PRIORITY_INITIALIZING, ZIO_FLAG_CANFAIL, B_FALSE));
236094e47e9SGeorge Wilson /* vdev_initialize_cb releases SCL_STATE_ALL */
237094e47e9SGeorge Wilson
238094e47e9SGeorge Wilson dmu_tx_commit(tx);
239094e47e9SGeorge Wilson
240094e47e9SGeorge Wilson return (0);
241094e47e9SGeorge Wilson }
242094e47e9SGeorge Wilson
243094e47e9SGeorge Wilson /*
244094e47e9SGeorge Wilson * Callback to fill each ABD chunk with zfs_initialize_value. len must be
245094e47e9SGeorge Wilson * divisible by sizeof (uint64_t), and buf must be 8-byte aligned. The ABD
246094e47e9SGeorge Wilson * allocation will guarantee these for us.
247094e47e9SGeorge Wilson */
248094e47e9SGeorge Wilson /* ARGSUSED */
249094e47e9SGeorge Wilson static int
vdev_initialize_block_fill(void * buf,size_t len,void * unused)250094e47e9SGeorge Wilson vdev_initialize_block_fill(void *buf, size_t len, void *unused)
251094e47e9SGeorge Wilson {
252094e47e9SGeorge Wilson ASSERT0(len % sizeof (uint64_t));
253094e47e9SGeorge Wilson for (uint64_t i = 0; i < len; i += sizeof (uint64_t)) {
254094e47e9SGeorge Wilson *(uint64_t *)((char *)(buf) + i) = zfs_initialize_value;
255094e47e9SGeorge Wilson }
256094e47e9SGeorge Wilson return (0);
257094e47e9SGeorge Wilson }
258094e47e9SGeorge Wilson
259094e47e9SGeorge Wilson static abd_t *
vdev_initialize_block_alloc()260094e47e9SGeorge Wilson vdev_initialize_block_alloc()
261094e47e9SGeorge Wilson {
262094e47e9SGeorge Wilson /* Allocate ABD for filler data */
263094e47e9SGeorge Wilson abd_t *data = abd_alloc_for_io(zfs_initialize_chunk_size, B_FALSE);
264094e47e9SGeorge Wilson
265094e47e9SGeorge Wilson ASSERT0(zfs_initialize_chunk_size % sizeof (uint64_t));
266094e47e9SGeorge Wilson (void) abd_iterate_func(data, 0, zfs_initialize_chunk_size,
267094e47e9SGeorge Wilson vdev_initialize_block_fill, NULL);
268094e47e9SGeorge Wilson
269094e47e9SGeorge Wilson return (data);
270094e47e9SGeorge Wilson }
271094e47e9SGeorge Wilson
272094e47e9SGeorge Wilson static void
vdev_initialize_block_free(abd_t * data)273094e47e9SGeorge Wilson vdev_initialize_block_free(abd_t *data)
274094e47e9SGeorge Wilson {
275094e47e9SGeorge Wilson abd_free(data);
276094e47e9SGeorge Wilson }
277094e47e9SGeorge Wilson
278094e47e9SGeorge Wilson static int
vdev_initialize_ranges(vdev_t * vd,abd_t * data)279094e47e9SGeorge Wilson vdev_initialize_ranges(vdev_t *vd, abd_t *data)
280094e47e9SGeorge Wilson {
281*4d7988d6SPaul Dagnelie range_tree_t *rt = vd->vdev_initialize_tree;
282*4d7988d6SPaul Dagnelie zfs_btree_t *bt = &rt->rt_root;
283*4d7988d6SPaul Dagnelie zfs_btree_index_t where;
284094e47e9SGeorge Wilson
285*4d7988d6SPaul Dagnelie for (range_seg_t *rs = zfs_btree_first(bt, &where); rs != NULL;
286*4d7988d6SPaul Dagnelie rs = zfs_btree_next(bt, &where, &where)) {
287*4d7988d6SPaul Dagnelie uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
288094e47e9SGeorge Wilson
289094e47e9SGeorge Wilson /* Split range into legally-sized physical chunks */
290094e47e9SGeorge Wilson uint64_t writes_required =
291094e47e9SGeorge Wilson ((size - 1) / zfs_initialize_chunk_size) + 1;
292094e47e9SGeorge Wilson
293094e47e9SGeorge Wilson for (uint64_t w = 0; w < writes_required; w++) {
294094e47e9SGeorge Wilson int error;
295094e47e9SGeorge Wilson
296094e47e9SGeorge Wilson error = vdev_initialize_write(vd,
297*4d7988d6SPaul Dagnelie VDEV_LABEL_START_SIZE + rs_get_start(rs, rt) +
298094e47e9SGeorge Wilson (w * zfs_initialize_chunk_size),
299094e47e9SGeorge Wilson MIN(size - (w * zfs_initialize_chunk_size),
300094e47e9SGeorge Wilson zfs_initialize_chunk_size), data);
301094e47e9SGeorge Wilson if (error != 0)
302094e47e9SGeorge Wilson return (error);
303094e47e9SGeorge Wilson }
304094e47e9SGeorge Wilson }
305094e47e9SGeorge Wilson return (0);
306094e47e9SGeorge Wilson }
307094e47e9SGeorge Wilson
308094e47e9SGeorge Wilson static void
vdev_initialize_calculate_progress(vdev_t * vd)309094e47e9SGeorge Wilson vdev_initialize_calculate_progress(vdev_t *vd)
310094e47e9SGeorge Wilson {
311094e47e9SGeorge Wilson ASSERT(spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_READER) ||
312094e47e9SGeorge Wilson spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_WRITER));
313094e47e9SGeorge Wilson ASSERT(vd->vdev_leaf_zap != 0);
314094e47e9SGeorge Wilson
315094e47e9SGeorge Wilson vd->vdev_initialize_bytes_est = 0;
316094e47e9SGeorge Wilson vd->vdev_initialize_bytes_done = 0;
317094e47e9SGeorge Wilson
318094e47e9SGeorge Wilson for (uint64_t i = 0; i < vd->vdev_top->vdev_ms_count; i++) {
319094e47e9SGeorge Wilson metaslab_t *msp = vd->vdev_top->vdev_ms[i];
320094e47e9SGeorge Wilson mutex_enter(&msp->ms_lock);
321094e47e9SGeorge Wilson
322094e47e9SGeorge Wilson uint64_t ms_free = msp->ms_size -
323555d674dSSerapheim Dimitropoulos metaslab_allocated_space(msp);
324094e47e9SGeorge Wilson
325094e47e9SGeorge Wilson if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
326094e47e9SGeorge Wilson ms_free /= vd->vdev_top->vdev_children;
327094e47e9SGeorge Wilson
328094e47e9SGeorge Wilson /*
329094e47e9SGeorge Wilson * Convert the metaslab range to a physical range
330094e47e9SGeorge Wilson * on our vdev. We use this to determine if we are
331094e47e9SGeorge Wilson * in the middle of this metaslab range.
332094e47e9SGeorge Wilson */
333*4d7988d6SPaul Dagnelie range_seg64_t logical_rs, physical_rs;
334094e47e9SGeorge Wilson logical_rs.rs_start = msp->ms_start;
335094e47e9SGeorge Wilson logical_rs.rs_end = msp->ms_start + msp->ms_size;
336094e47e9SGeorge Wilson vdev_xlate(vd, &logical_rs, &physical_rs);
337094e47e9SGeorge Wilson
338094e47e9SGeorge Wilson if (vd->vdev_initialize_last_offset <= physical_rs.rs_start) {
339094e47e9SGeorge Wilson vd->vdev_initialize_bytes_est += ms_free;
340094e47e9SGeorge Wilson mutex_exit(&msp->ms_lock);
341094e47e9SGeorge Wilson continue;
342094e47e9SGeorge Wilson } else if (vd->vdev_initialize_last_offset >
343094e47e9SGeorge Wilson physical_rs.rs_end) {
344094e47e9SGeorge Wilson vd->vdev_initialize_bytes_done += ms_free;
345094e47e9SGeorge Wilson vd->vdev_initialize_bytes_est += ms_free;
346094e47e9SGeorge Wilson mutex_exit(&msp->ms_lock);
347094e47e9SGeorge Wilson continue;
348094e47e9SGeorge Wilson }
349094e47e9SGeorge Wilson
350094e47e9SGeorge Wilson /*
351094e47e9SGeorge Wilson * If we get here, we're in the middle of initializing this
352094e47e9SGeorge Wilson * metaslab. Load it and walk the free tree for more accurate
353094e47e9SGeorge Wilson * progress estimation.
354094e47e9SGeorge Wilson */
355a0b03b16SSerapheim Dimitropoulos VERIFY0(metaslab_load(msp));
356094e47e9SGeorge Wilson
357*4d7988d6SPaul Dagnelie zfs_btree_index_t where;
358*4d7988d6SPaul Dagnelie range_tree_t *rt = msp->ms_allocatable;
359*4d7988d6SPaul Dagnelie for (range_seg_t *rs =
360*4d7988d6SPaul Dagnelie zfs_btree_first(&rt->rt_root, &where); rs;
361*4d7988d6SPaul Dagnelie rs = zfs_btree_next(&rt->rt_root, &where,
362*4d7988d6SPaul Dagnelie &where)) {
363*4d7988d6SPaul Dagnelie logical_rs.rs_start = rs_get_start(rs, rt);
364*4d7988d6SPaul Dagnelie logical_rs.rs_end = rs_get_end(rs, rt);
365094e47e9SGeorge Wilson vdev_xlate(vd, &logical_rs, &physical_rs);
366094e47e9SGeorge Wilson
367094e47e9SGeorge Wilson uint64_t size = physical_rs.rs_end -
368094e47e9SGeorge Wilson physical_rs.rs_start;
369094e47e9SGeorge Wilson vd->vdev_initialize_bytes_est += size;
370094e47e9SGeorge Wilson if (vd->vdev_initialize_last_offset >
371094e47e9SGeorge Wilson physical_rs.rs_end) {
372094e47e9SGeorge Wilson vd->vdev_initialize_bytes_done += size;
373094e47e9SGeorge Wilson } else if (vd->vdev_initialize_last_offset >
374094e47e9SGeorge Wilson physical_rs.rs_start &&
375094e47e9SGeorge Wilson vd->vdev_initialize_last_offset <
376094e47e9SGeorge Wilson physical_rs.rs_end) {
377094e47e9SGeorge Wilson vd->vdev_initialize_bytes_done +=
378094e47e9SGeorge Wilson vd->vdev_initialize_last_offset -
379094e47e9SGeorge Wilson physical_rs.rs_start;
380094e47e9SGeorge Wilson }
381094e47e9SGeorge Wilson }
382094e47e9SGeorge Wilson mutex_exit(&msp->ms_lock);
383094e47e9SGeorge Wilson }
384094e47e9SGeorge Wilson }
385094e47e9SGeorge Wilson
386084fd14fSBrian Behlendorf static int
vdev_initialize_load(vdev_t * vd)387094e47e9SGeorge Wilson vdev_initialize_load(vdev_t *vd)
388094e47e9SGeorge Wilson {
389084fd14fSBrian Behlendorf int err = 0;
390094e47e9SGeorge Wilson ASSERT(spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_READER) ||
391094e47e9SGeorge Wilson spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_WRITER));
392094e47e9SGeorge Wilson ASSERT(vd->vdev_leaf_zap != 0);
393094e47e9SGeorge Wilson
394094e47e9SGeorge Wilson if (vd->vdev_initialize_state == VDEV_INITIALIZE_ACTIVE ||
395094e47e9SGeorge Wilson vd->vdev_initialize_state == VDEV_INITIALIZE_SUSPENDED) {
396084fd14fSBrian Behlendorf err = zap_lookup(vd->vdev_spa->spa_meta_objset,
397094e47e9SGeorge Wilson vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET,
398094e47e9SGeorge Wilson sizeof (vd->vdev_initialize_last_offset), 1,
399094e47e9SGeorge Wilson &vd->vdev_initialize_last_offset);
400084fd14fSBrian Behlendorf if (err == ENOENT) {
401084fd14fSBrian Behlendorf vd->vdev_initialize_last_offset = 0;
402084fd14fSBrian Behlendorf err = 0;
403084fd14fSBrian Behlendorf }
404094e47e9SGeorge Wilson }
405094e47e9SGeorge Wilson
406094e47e9SGeorge Wilson vdev_initialize_calculate_progress(vd);
407084fd14fSBrian Behlendorf return (err);
408094e47e9SGeorge Wilson }
409094e47e9SGeorge Wilson
410094e47e9SGeorge Wilson
411094e47e9SGeorge Wilson /*
412084fd14fSBrian Behlendorf * Convert the logical range into a physical range and add it to our
413094e47e9SGeorge Wilson * avl tree.
414094e47e9SGeorge Wilson */
415094e47e9SGeorge Wilson void
vdev_initialize_range_add(void * arg,uint64_t start,uint64_t size)416094e47e9SGeorge Wilson vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size)
417094e47e9SGeorge Wilson {
418094e47e9SGeorge Wilson vdev_t *vd = arg;
419*4d7988d6SPaul Dagnelie range_seg64_t logical_rs, physical_rs;
420094e47e9SGeorge Wilson logical_rs.rs_start = start;
421094e47e9SGeorge Wilson logical_rs.rs_end = start + size;
422094e47e9SGeorge Wilson
423094e47e9SGeorge Wilson ASSERT(vd->vdev_ops->vdev_op_leaf);
424094e47e9SGeorge Wilson vdev_xlate(vd, &logical_rs, &physical_rs);
425094e47e9SGeorge Wilson
426094e47e9SGeorge Wilson IMPLY(vd->vdev_top == vd,
427094e47e9SGeorge Wilson logical_rs.rs_start == physical_rs.rs_start);
428094e47e9SGeorge Wilson IMPLY(vd->vdev_top == vd,
429094e47e9SGeorge Wilson logical_rs.rs_end == physical_rs.rs_end);
430094e47e9SGeorge Wilson
431094e47e9SGeorge Wilson /* Only add segments that we have not visited yet */
432094e47e9SGeorge Wilson if (physical_rs.rs_end <= vd->vdev_initialize_last_offset)
433094e47e9SGeorge Wilson return;
434094e47e9SGeorge Wilson
435094e47e9SGeorge Wilson /* Pick up where we left off mid-range. */
436094e47e9SGeorge Wilson if (vd->vdev_initialize_last_offset > physical_rs.rs_start) {
437094e47e9SGeorge Wilson zfs_dbgmsg("range write: vd %s changed (%llu, %llu) to "
438094e47e9SGeorge Wilson "(%llu, %llu)", vd->vdev_path,
439094e47e9SGeorge Wilson (u_longlong_t)physical_rs.rs_start,
440094e47e9SGeorge Wilson (u_longlong_t)physical_rs.rs_end,
441094e47e9SGeorge Wilson (u_longlong_t)vd->vdev_initialize_last_offset,
442094e47e9SGeorge Wilson (u_longlong_t)physical_rs.rs_end);
443094e47e9SGeorge Wilson ASSERT3U(physical_rs.rs_end, >,
444094e47e9SGeorge Wilson vd->vdev_initialize_last_offset);
445094e47e9SGeorge Wilson physical_rs.rs_start = vd->vdev_initialize_last_offset;
446094e47e9SGeorge Wilson }
447094e47e9SGeorge Wilson ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
448094e47e9SGeorge Wilson
449094e47e9SGeorge Wilson /*
450094e47e9SGeorge Wilson * With raidz, it's possible that the logical range does not live on
451094e47e9SGeorge Wilson * this leaf vdev. We only add the physical range to this vdev's if it
452094e47e9SGeorge Wilson * has a length greater than 0.
453094e47e9SGeorge Wilson */
454094e47e9SGeorge Wilson if (physical_rs.rs_end > physical_rs.rs_start) {
455094e47e9SGeorge Wilson range_tree_add(vd->vdev_initialize_tree, physical_rs.rs_start,
456094e47e9SGeorge Wilson physical_rs.rs_end - physical_rs.rs_start);
457094e47e9SGeorge Wilson } else {
458094e47e9SGeorge Wilson ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
459094e47e9SGeorge Wilson }
460094e47e9SGeorge Wilson }
461094e47e9SGeorge Wilson
462094e47e9SGeorge Wilson static void
vdev_initialize_thread(void * arg)463094e47e9SGeorge Wilson vdev_initialize_thread(void *arg)
464094e47e9SGeorge Wilson {
465094e47e9SGeorge Wilson vdev_t *vd = arg;
466094e47e9SGeorge Wilson spa_t *spa = vd->vdev_spa;
467094e47e9SGeorge Wilson int error = 0;
468094e47e9SGeorge Wilson uint64_t ms_count = 0;
469094e47e9SGeorge Wilson
470094e47e9SGeorge Wilson ASSERT(vdev_is_concrete(vd));
471094e47e9SGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
472094e47e9SGeorge Wilson
473094e47e9SGeorge Wilson vd->vdev_initialize_last_offset = 0;
474084fd14fSBrian Behlendorf VERIFY0(vdev_initialize_load(vd));
475094e47e9SGeorge Wilson
476094e47e9SGeorge Wilson abd_t *deadbeef = vdev_initialize_block_alloc();
477094e47e9SGeorge Wilson
478*4d7988d6SPaul Dagnelie vd->vdev_initialize_tree = range_tree_create(NULL, RANGE_SEG64, NULL,
479*4d7988d6SPaul Dagnelie 0, 0);
480094e47e9SGeorge Wilson
481094e47e9SGeorge Wilson for (uint64_t i = 0; !vd->vdev_detached &&
482094e47e9SGeorge Wilson i < vd->vdev_top->vdev_ms_count; i++) {
483094e47e9SGeorge Wilson metaslab_t *msp = vd->vdev_top->vdev_ms[i];
484af1d63abSPaul Dagnelie boolean_t unload_when_done = B_FALSE;
485094e47e9SGeorge Wilson
486094e47e9SGeorge Wilson /*
487094e47e9SGeorge Wilson * If we've expanded the top-level vdev or it's our
488094e47e9SGeorge Wilson * first pass, calculate our progress.
489094e47e9SGeorge Wilson */
490094e47e9SGeorge Wilson if (vd->vdev_top->vdev_ms_count != ms_count) {
491094e47e9SGeorge Wilson vdev_initialize_calculate_progress(vd);
492094e47e9SGeorge Wilson ms_count = vd->vdev_top->vdev_ms_count;
493094e47e9SGeorge Wilson }
494094e47e9SGeorge Wilson
495084fd14fSBrian Behlendorf spa_config_exit(spa, SCL_CONFIG, FTAG);
496084fd14fSBrian Behlendorf metaslab_disable(msp);
497094e47e9SGeorge Wilson mutex_enter(&msp->ms_lock);
498af1d63abSPaul Dagnelie if (!msp->ms_loaded && !msp->ms_loading)
499af1d63abSPaul Dagnelie unload_when_done = B_TRUE;
500a0b03b16SSerapheim Dimitropoulos VERIFY0(metaslab_load(msp));
501094e47e9SGeorge Wilson
502094e47e9SGeorge Wilson range_tree_walk(msp->ms_allocatable, vdev_initialize_range_add,
503094e47e9SGeorge Wilson vd);
504094e47e9SGeorge Wilson mutex_exit(&msp->ms_lock);
505094e47e9SGeorge Wilson
506094e47e9SGeorge Wilson error = vdev_initialize_ranges(vd, deadbeef);
507af1d63abSPaul Dagnelie metaslab_enable(msp, B_TRUE, unload_when_done);
508094e47e9SGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
509094e47e9SGeorge Wilson
510094e47e9SGeorge Wilson range_tree_vacate(vd->vdev_initialize_tree, NULL, NULL);
511094e47e9SGeorge Wilson if (error != 0)
512094e47e9SGeorge Wilson break;
513094e47e9SGeorge Wilson }
514094e47e9SGeorge Wilson
515094e47e9SGeorge Wilson spa_config_exit(spa, SCL_CONFIG, FTAG);
516094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_io_lock);
517094e47e9SGeorge Wilson while (vd->vdev_initialize_inflight > 0) {
518094e47e9SGeorge Wilson cv_wait(&vd->vdev_initialize_io_cv,
519094e47e9SGeorge Wilson &vd->vdev_initialize_io_lock);
520094e47e9SGeorge Wilson }
521094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_io_lock);
522094e47e9SGeorge Wilson
523094e47e9SGeorge Wilson range_tree_destroy(vd->vdev_initialize_tree);
524094e47e9SGeorge Wilson vdev_initialize_block_free(deadbeef);
525094e47e9SGeorge Wilson vd->vdev_initialize_tree = NULL;
526094e47e9SGeorge Wilson
527094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_lock);
528094e47e9SGeorge Wilson if (!vd->vdev_initialize_exit_wanted && vdev_writeable(vd)) {
529094e47e9SGeorge Wilson vdev_initialize_change_state(vd, VDEV_INITIALIZE_COMPLETE);
530094e47e9SGeorge Wilson }
531094e47e9SGeorge Wilson ASSERT(vd->vdev_initialize_thread != NULL ||
532094e47e9SGeorge Wilson vd->vdev_initialize_inflight == 0);
533094e47e9SGeorge Wilson
534094e47e9SGeorge Wilson /*
535094e47e9SGeorge Wilson * Drop the vdev_initialize_lock while we sync out the
536094e47e9SGeorge Wilson * txg since it's possible that a device might be trying to
537094e47e9SGeorge Wilson * come online and must check to see if it needs to restart an
538094e47e9SGeorge Wilson * initialization. That thread will be holding the spa_config_lock
539094e47e9SGeorge Wilson * which would prevent the txg_wait_synced from completing.
540094e47e9SGeorge Wilson */
541094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_lock);
542094e47e9SGeorge Wilson txg_wait_synced(spa_get_dsl(spa), 0);
543094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_lock);
544094e47e9SGeorge Wilson
545094e47e9SGeorge Wilson vd->vdev_initialize_thread = NULL;
546094e47e9SGeorge Wilson cv_broadcast(&vd->vdev_initialize_cv);
547094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_lock);
548094e47e9SGeorge Wilson }
549094e47e9SGeorge Wilson
550094e47e9SGeorge Wilson /*
551094e47e9SGeorge Wilson * Initiates a device. Caller must hold vdev_initialize_lock.
552094e47e9SGeorge Wilson * Device must be a leaf and not already be initializing.
553094e47e9SGeorge Wilson */
554094e47e9SGeorge Wilson void
vdev_initialize(vdev_t * vd)555094e47e9SGeorge Wilson vdev_initialize(vdev_t *vd)
556094e47e9SGeorge Wilson {
557094e47e9SGeorge Wilson ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
558094e47e9SGeorge Wilson ASSERT(vd->vdev_ops->vdev_op_leaf);
559094e47e9SGeorge Wilson ASSERT(vdev_is_concrete(vd));
560094e47e9SGeorge Wilson ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
561094e47e9SGeorge Wilson ASSERT(!vd->vdev_detached);
562094e47e9SGeorge Wilson ASSERT(!vd->vdev_initialize_exit_wanted);
563094e47e9SGeorge Wilson ASSERT(!vd->vdev_top->vdev_removing);
564094e47e9SGeorge Wilson
565094e47e9SGeorge Wilson vdev_initialize_change_state(vd, VDEV_INITIALIZE_ACTIVE);
566094e47e9SGeorge Wilson vd->vdev_initialize_thread = thread_create(NULL, 0,
567094e47e9SGeorge Wilson vdev_initialize_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
568094e47e9SGeorge Wilson }
569094e47e9SGeorge Wilson
570094e47e9SGeorge Wilson /*
571084fd14fSBrian Behlendorf * Wait for the initialize thread to be terminated (cancelled or stopped).
572084fd14fSBrian Behlendorf */
573084fd14fSBrian Behlendorf static void
vdev_initialize_stop_wait_impl(vdev_t * vd)574084fd14fSBrian Behlendorf vdev_initialize_stop_wait_impl(vdev_t *vd)
575084fd14fSBrian Behlendorf {
576084fd14fSBrian Behlendorf ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
577084fd14fSBrian Behlendorf
578084fd14fSBrian Behlendorf while (vd->vdev_initialize_thread != NULL)
579084fd14fSBrian Behlendorf cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock);
580084fd14fSBrian Behlendorf
581084fd14fSBrian Behlendorf ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
582084fd14fSBrian Behlendorf vd->vdev_initialize_exit_wanted = B_FALSE;
583084fd14fSBrian Behlendorf }
584084fd14fSBrian Behlendorf
585084fd14fSBrian Behlendorf /*
586084fd14fSBrian Behlendorf * Wait for vdev initialize threads which were either to cleanly exit.
587094e47e9SGeorge Wilson */
588094e47e9SGeorge Wilson void
vdev_initialize_stop_wait(spa_t * spa,list_t * vd_list)589084fd14fSBrian Behlendorf vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list)
590094e47e9SGeorge Wilson {
591084fd14fSBrian Behlendorf vdev_t *vd;
592094e47e9SGeorge Wilson
593084fd14fSBrian Behlendorf ASSERT(MUTEX_HELD(&spa_namespace_lock));
594084fd14fSBrian Behlendorf
595084fd14fSBrian Behlendorf while ((vd = list_remove_head(vd_list)) != NULL) {
596084fd14fSBrian Behlendorf mutex_enter(&vd->vdev_initialize_lock);
597084fd14fSBrian Behlendorf vdev_initialize_stop_wait_impl(vd);
598084fd14fSBrian Behlendorf mutex_exit(&vd->vdev_initialize_lock);
599084fd14fSBrian Behlendorf }
600084fd14fSBrian Behlendorf }
601084fd14fSBrian Behlendorf
602084fd14fSBrian Behlendorf /*
603084fd14fSBrian Behlendorf * Stop initializing a device, with the resultant initializing state being
604084fd14fSBrian Behlendorf * tgt_state. For blocking behavior pass NULL for vd_list. Otherwise, when
605084fd14fSBrian Behlendorf * a list_t is provided the stopping vdev is inserted in to the list. Callers
606084fd14fSBrian Behlendorf * are then required to call vdev_initialize_stop_wait() to block for all the
607084fd14fSBrian Behlendorf * initialization threads to exit. The caller must hold vdev_initialize_lock
608084fd14fSBrian Behlendorf * and must not be writing to the spa config, as the initializing thread may
609084fd14fSBrian Behlendorf * try to enter the config as a reader before exiting.
610084fd14fSBrian Behlendorf */
611084fd14fSBrian Behlendorf void
vdev_initialize_stop(vdev_t * vd,vdev_initializing_state_t tgt_state,list_t * vd_list)612084fd14fSBrian Behlendorf vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state,
613084fd14fSBrian Behlendorf list_t *vd_list)
614084fd14fSBrian Behlendorf {
615084fd14fSBrian Behlendorf ASSERT(!spa_config_held(vd->vdev_spa, SCL_CONFIG|SCL_STATE, RW_WRITER));
616094e47e9SGeorge Wilson ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
617094e47e9SGeorge Wilson ASSERT(vd->vdev_ops->vdev_op_leaf);
618094e47e9SGeorge Wilson ASSERT(vdev_is_concrete(vd));
619094e47e9SGeorge Wilson
620094e47e9SGeorge Wilson /*
621094e47e9SGeorge Wilson * Allow cancel requests to proceed even if the initialize thread
622094e47e9SGeorge Wilson * has stopped.
623094e47e9SGeorge Wilson */
624094e47e9SGeorge Wilson if (vd->vdev_initialize_thread == NULL &&
625094e47e9SGeorge Wilson tgt_state != VDEV_INITIALIZE_CANCELED) {
626094e47e9SGeorge Wilson return;
627094e47e9SGeorge Wilson }
628094e47e9SGeorge Wilson
629094e47e9SGeorge Wilson vdev_initialize_change_state(vd, tgt_state);
630094e47e9SGeorge Wilson vd->vdev_initialize_exit_wanted = B_TRUE;
631094e47e9SGeorge Wilson
632084fd14fSBrian Behlendorf if (vd_list == NULL) {
633084fd14fSBrian Behlendorf vdev_initialize_stop_wait_impl(vd);
634084fd14fSBrian Behlendorf } else {
635084fd14fSBrian Behlendorf ASSERT(MUTEX_HELD(&spa_namespace_lock));
636084fd14fSBrian Behlendorf list_insert_tail(vd_list, vd);
637084fd14fSBrian Behlendorf }
638094e47e9SGeorge Wilson }
639094e47e9SGeorge Wilson
640094e47e9SGeorge Wilson static void
vdev_initialize_stop_all_impl(vdev_t * vd,vdev_initializing_state_t tgt_state,list_t * vd_list)641084fd14fSBrian Behlendorf vdev_initialize_stop_all_impl(vdev_t *vd, vdev_initializing_state_t tgt_state,
642084fd14fSBrian Behlendorf list_t *vd_list)
643094e47e9SGeorge Wilson {
644094e47e9SGeorge Wilson if (vd->vdev_ops->vdev_op_leaf && vdev_is_concrete(vd)) {
645094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_lock);
646084fd14fSBrian Behlendorf vdev_initialize_stop(vd, tgt_state, vd_list);
647094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_lock);
648094e47e9SGeorge Wilson return;
649094e47e9SGeorge Wilson }
650094e47e9SGeorge Wilson
651094e47e9SGeorge Wilson for (uint64_t i = 0; i < vd->vdev_children; i++) {
652084fd14fSBrian Behlendorf vdev_initialize_stop_all_impl(vd->vdev_child[i], tgt_state,
653084fd14fSBrian Behlendorf vd_list);
654094e47e9SGeorge Wilson }
655094e47e9SGeorge Wilson }
656094e47e9SGeorge Wilson
657094e47e9SGeorge Wilson /*
658094e47e9SGeorge Wilson * Convenience function to stop initializing of a vdev tree and set all
659094e47e9SGeorge Wilson * initialize thread pointers to NULL.
660094e47e9SGeorge Wilson */
661094e47e9SGeorge Wilson void
vdev_initialize_stop_all(vdev_t * vd,vdev_initializing_state_t tgt_state)662094e47e9SGeorge Wilson vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
663094e47e9SGeorge Wilson {
664084fd14fSBrian Behlendorf spa_t *spa = vd->vdev_spa;
665084fd14fSBrian Behlendorf list_t vd_list;
666084fd14fSBrian Behlendorf
667084fd14fSBrian Behlendorf ASSERT(MUTEX_HELD(&spa_namespace_lock));
668084fd14fSBrian Behlendorf
669084fd14fSBrian Behlendorf list_create(&vd_list, sizeof (vdev_t),
670084fd14fSBrian Behlendorf offsetof(vdev_t, vdev_initialize_node));
671084fd14fSBrian Behlendorf
672084fd14fSBrian Behlendorf vdev_initialize_stop_all_impl(vd, tgt_state, &vd_list);
673084fd14fSBrian Behlendorf vdev_initialize_stop_wait(spa, &vd_list);
674094e47e9SGeorge Wilson
675094e47e9SGeorge Wilson if (vd->vdev_spa->spa_sync_on) {
676094e47e9SGeorge Wilson /* Make sure that our state has been synced to disk */
677094e47e9SGeorge Wilson txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
678094e47e9SGeorge Wilson }
679084fd14fSBrian Behlendorf
680084fd14fSBrian Behlendorf list_destroy(&vd_list);
681094e47e9SGeorge Wilson }
682094e47e9SGeorge Wilson
683094e47e9SGeorge Wilson void
vdev_initialize_restart(vdev_t * vd)684094e47e9SGeorge Wilson vdev_initialize_restart(vdev_t *vd)
685094e47e9SGeorge Wilson {
686094e47e9SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock));
687094e47e9SGeorge Wilson ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
688094e47e9SGeorge Wilson
689094e47e9SGeorge Wilson if (vd->vdev_leaf_zap != 0) {
690094e47e9SGeorge Wilson mutex_enter(&vd->vdev_initialize_lock);
691094e47e9SGeorge Wilson uint64_t initialize_state = VDEV_INITIALIZE_NONE;
692094e47e9SGeorge Wilson int err = zap_lookup(vd->vdev_spa->spa_meta_objset,
693094e47e9SGeorge Wilson vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_STATE,
694094e47e9SGeorge Wilson sizeof (initialize_state), 1, &initialize_state);
695094e47e9SGeorge Wilson ASSERT(err == 0 || err == ENOENT);
696094e47e9SGeorge Wilson vd->vdev_initialize_state = initialize_state;
697094e47e9SGeorge Wilson
698094e47e9SGeorge Wilson uint64_t timestamp = 0;
699094e47e9SGeorge Wilson err = zap_lookup(vd->vdev_spa->spa_meta_objset,
700094e47e9SGeorge Wilson vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME,
701094e47e9SGeorge Wilson sizeof (timestamp), 1, ×tamp);
702094e47e9SGeorge Wilson ASSERT(err == 0 || err == ENOENT);
703094e47e9SGeorge Wilson vd->vdev_initialize_action_time = (time_t)timestamp;
704094e47e9SGeorge Wilson
705094e47e9SGeorge Wilson if (vd->vdev_initialize_state == VDEV_INITIALIZE_SUSPENDED ||
706094e47e9SGeorge Wilson vd->vdev_offline) {
707094e47e9SGeorge Wilson /* load progress for reporting, but don't resume */
708084fd14fSBrian Behlendorf VERIFY0(vdev_initialize_load(vd));
709094e47e9SGeorge Wilson } else if (vd->vdev_initialize_state ==
710084fd14fSBrian Behlendorf VDEV_INITIALIZE_ACTIVE && vdev_writeable(vd) &&
711084fd14fSBrian Behlendorf !vd->vdev_top->vdev_removing &&
712084fd14fSBrian Behlendorf vd->vdev_initialize_thread == NULL) {
713094e47e9SGeorge Wilson vdev_initialize(vd);
714094e47e9SGeorge Wilson }
715094e47e9SGeorge Wilson
716094e47e9SGeorge Wilson mutex_exit(&vd->vdev_initialize_lock);
717094e47e9SGeorge Wilson }
718094e47e9SGeorge Wilson
719094e47e9SGeorge Wilson for (uint64_t i = 0; i < vd->vdev_children; i++) {
720094e47e9SGeorge Wilson vdev_initialize_restart(vd->vdev_child[i]);
721094e47e9SGeorge Wilson }
722094e47e9SGeorge Wilson }
723