1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 231354Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28789Sahrens 29789Sahrens /* 30789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 31789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 32789Sahrens * pool. 33789Sahrens */ 34789Sahrens 35789Sahrens #include <sys/zfs_context.h> 361544Seschrock #include <sys/fm/fs/zfs.h> 37789Sahrens #include <sys/spa_impl.h> 38789Sahrens #include <sys/zio.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zio_compress.h> 41789Sahrens #include <sys/dmu.h> 42789Sahrens #include <sys/dmu_tx.h> 43789Sahrens #include <sys/zap.h> 44789Sahrens #include <sys/zil.h> 45789Sahrens #include <sys/vdev_impl.h> 46789Sahrens #include <sys/metaslab.h> 47789Sahrens #include <sys/uberblock_impl.h> 48789Sahrens #include <sys/txg.h> 49789Sahrens #include <sys/avl.h> 50789Sahrens #include <sys/dmu_traverse.h> 51789Sahrens #include <sys/unique.h> 52789Sahrens #include <sys/dsl_pool.h> 53789Sahrens #include <sys/dsl_dir.h> 54789Sahrens #include <sys/dsl_prop.h> 55789Sahrens #include <sys/fs/zfs.h> 56789Sahrens #include <sys/callb.h> 57789Sahrens 582986Sek110237 int zio_taskq_threads = 8; 592986Sek110237 60789Sahrens /* 61789Sahrens * ========================================================================== 62789Sahrens * SPA state manipulation (open/create/destroy/import/export) 63789Sahrens * ========================================================================== 64789Sahrens */ 65789Sahrens 661544Seschrock static int 671544Seschrock spa_error_entry_compare(const void *a, const void *b) 681544Seschrock { 691544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 701544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 711544Seschrock int ret; 721544Seschrock 731544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 741544Seschrock sizeof (zbookmark_t)); 751544Seschrock 761544Seschrock if (ret < 0) 771544Seschrock return (-1); 781544Seschrock else if (ret > 0) 791544Seschrock return (1); 801544Seschrock else 811544Seschrock return (0); 821544Seschrock } 831544Seschrock 841544Seschrock /* 851544Seschrock * Utility function which retrieves copies of the current logs and 861544Seschrock * re-initializes them in the process. 871544Seschrock */ 881544Seschrock void 891544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 901544Seschrock { 911544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 921544Seschrock 931544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 941544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 951544Seschrock 961544Seschrock avl_create(&spa->spa_errlist_scrub, 971544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 981544Seschrock offsetof(spa_error_entry_t, se_avl)); 991544Seschrock avl_create(&spa->spa_errlist_last, 1001544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1011544Seschrock offsetof(spa_error_entry_t, se_avl)); 1021544Seschrock } 1031544Seschrock 104789Sahrens /* 105789Sahrens * Activate an uninitialized pool. 106789Sahrens */ 107789Sahrens static void 108789Sahrens spa_activate(spa_t *spa) 109789Sahrens { 110789Sahrens int t; 111789Sahrens 112789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 113789Sahrens 114789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 115789Sahrens 116789Sahrens spa->spa_normal_class = metaslab_class_create(); 117789Sahrens 118789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 119789Sahrens spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", 1202986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 121789Sahrens TASKQ_PREPOPULATE); 122789Sahrens spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", 1232986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 124789Sahrens TASKQ_PREPOPULATE); 125789Sahrens } 126789Sahrens 127789Sahrens rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL); 128789Sahrens 1292856Snd150628 mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); 1302856Snd150628 mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL); 1312856Snd150628 mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); 1322856Snd150628 mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); 1332856Snd150628 mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); 1342856Snd150628 mutex_init(&spa->spa_config_lock.scl_lock, NULL, MUTEX_DEFAULT, NULL); 1352856Snd150628 mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL); 1362926Sek110237 mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); 1372856Snd150628 138789Sahrens list_create(&spa->spa_dirty_list, sizeof (vdev_t), 139789Sahrens offsetof(vdev_t, vdev_dirty_node)); 140789Sahrens 141789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 142789Sahrens offsetof(struct vdev, vdev_txg_node)); 1431544Seschrock 1441544Seschrock avl_create(&spa->spa_errlist_scrub, 1451544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1461544Seschrock offsetof(spa_error_entry_t, se_avl)); 1471544Seschrock avl_create(&spa->spa_errlist_last, 1481544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1491544Seschrock offsetof(spa_error_entry_t, se_avl)); 150789Sahrens } 151789Sahrens 152789Sahrens /* 153789Sahrens * Opposite of spa_activate(). 154789Sahrens */ 155789Sahrens static void 156789Sahrens spa_deactivate(spa_t *spa) 157789Sahrens { 158789Sahrens int t; 159789Sahrens 160789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 161789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 162789Sahrens ASSERT(spa->spa_root_vdev == NULL); 163789Sahrens 164789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 165789Sahrens 166789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 167789Sahrens 168789Sahrens list_destroy(&spa->spa_dirty_list); 169789Sahrens 170789Sahrens rw_destroy(&spa->spa_traverse_lock); 171789Sahrens 172789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 173789Sahrens taskq_destroy(spa->spa_zio_issue_taskq[t]); 174789Sahrens taskq_destroy(spa->spa_zio_intr_taskq[t]); 175789Sahrens spa->spa_zio_issue_taskq[t] = NULL; 176789Sahrens spa->spa_zio_intr_taskq[t] = NULL; 177789Sahrens } 178789Sahrens 179789Sahrens metaslab_class_destroy(spa->spa_normal_class); 180789Sahrens spa->spa_normal_class = NULL; 181789Sahrens 1821544Seschrock /* 1831544Seschrock * If this was part of an import or the open otherwise failed, we may 1841544Seschrock * still have errors left in the queues. Empty them just in case. 1851544Seschrock */ 1861544Seschrock spa_errlog_drain(spa); 1871544Seschrock 1881544Seschrock avl_destroy(&spa->spa_errlist_scrub); 1891544Seschrock avl_destroy(&spa->spa_errlist_last); 1901544Seschrock 191789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 192789Sahrens } 193789Sahrens 194789Sahrens /* 195789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 196789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 197789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 198789Sahrens * All vdev validation is done by the vdev_alloc() routine. 199789Sahrens */ 2002082Seschrock static int 2012082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 2022082Seschrock uint_t id, int atype) 203789Sahrens { 204789Sahrens nvlist_t **child; 205789Sahrens uint_t c, children; 2062082Seschrock int error; 2072082Seschrock 2082082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 2092082Seschrock return (error); 2102082Seschrock 2112082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 2122082Seschrock return (0); 213789Sahrens 214789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 215789Sahrens &child, &children) != 0) { 2162082Seschrock vdev_free(*vdp); 2172082Seschrock *vdp = NULL; 2182082Seschrock return (EINVAL); 219789Sahrens } 220789Sahrens 221789Sahrens for (c = 0; c < children; c++) { 2222082Seschrock vdev_t *vd; 2232082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 2242082Seschrock atype)) != 0) { 2252082Seschrock vdev_free(*vdp); 2262082Seschrock *vdp = NULL; 2272082Seschrock return (error); 228789Sahrens } 229789Sahrens } 230789Sahrens 2312082Seschrock ASSERT(*vdp != NULL); 2322082Seschrock 2332082Seschrock return (0); 234789Sahrens } 235789Sahrens 236789Sahrens /* 237789Sahrens * Opposite of spa_load(). 238789Sahrens */ 239789Sahrens static void 240789Sahrens spa_unload(spa_t *spa) 241789Sahrens { 2422082Seschrock int i; 2432082Seschrock 244789Sahrens /* 2451544Seschrock * Stop async tasks. 2461544Seschrock */ 2471544Seschrock spa_async_suspend(spa); 2481544Seschrock 2491544Seschrock /* 250789Sahrens * Stop syncing. 251789Sahrens */ 252789Sahrens if (spa->spa_sync_on) { 253789Sahrens txg_sync_stop(spa->spa_dsl_pool); 254789Sahrens spa->spa_sync_on = B_FALSE; 255789Sahrens } 256789Sahrens 257789Sahrens /* 258789Sahrens * Wait for any outstanding prefetch I/O to complete. 259789Sahrens */ 2601544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 2611544Seschrock spa_config_exit(spa, FTAG); 262789Sahrens 263789Sahrens /* 264789Sahrens * Close the dsl pool. 265789Sahrens */ 266789Sahrens if (spa->spa_dsl_pool) { 267789Sahrens dsl_pool_close(spa->spa_dsl_pool); 268789Sahrens spa->spa_dsl_pool = NULL; 269789Sahrens } 270789Sahrens 271789Sahrens /* 272789Sahrens * Close all vdevs. 273789Sahrens */ 2741585Sbonwick if (spa->spa_root_vdev) 275789Sahrens vdev_free(spa->spa_root_vdev); 2761585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 2771544Seschrock 2782082Seschrock for (i = 0; i < spa->spa_nspares; i++) 2792082Seschrock vdev_free(spa->spa_spares[i]); 2802082Seschrock if (spa->spa_spares) { 2812082Seschrock kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); 2822082Seschrock spa->spa_spares = NULL; 2832082Seschrock } 2842082Seschrock if (spa->spa_sparelist) { 2852082Seschrock nvlist_free(spa->spa_sparelist); 2862082Seschrock spa->spa_sparelist = NULL; 2872082Seschrock } 2882082Seschrock 2891544Seschrock spa->spa_async_suspended = 0; 290789Sahrens } 291789Sahrens 292789Sahrens /* 2932082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 2942082Seschrock * this pool. When this is called, we have some form of basic information in 2952082Seschrock * 'spa_sparelist'. We parse this into vdevs, try to open them, and then 2962082Seschrock * re-generate a more complete list including status information. 2972082Seschrock */ 2982082Seschrock static void 2992082Seschrock spa_load_spares(spa_t *spa) 3002082Seschrock { 3012082Seschrock nvlist_t **spares; 3022082Seschrock uint_t nspares; 3032082Seschrock int i; 3042082Seschrock 3052082Seschrock /* 3062082Seschrock * First, close and free any existing spare vdevs. 3072082Seschrock */ 3082082Seschrock for (i = 0; i < spa->spa_nspares; i++) { 3092082Seschrock vdev_close(spa->spa_spares[i]); 3102082Seschrock vdev_free(spa->spa_spares[i]); 3112082Seschrock } 3122082Seschrock if (spa->spa_spares) 3132082Seschrock kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); 3142082Seschrock 3152082Seschrock if (spa->spa_sparelist == NULL) 3162082Seschrock nspares = 0; 3172082Seschrock else 3182082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 3192082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 3202082Seschrock 3212082Seschrock spa->spa_nspares = (int)nspares; 3222082Seschrock spa->spa_spares = NULL; 3232082Seschrock 3242082Seschrock if (nspares == 0) 3252082Seschrock return; 3262082Seschrock 3272082Seschrock /* 3282082Seschrock * Construct the array of vdevs, opening them to get status in the 3292082Seschrock * process. 3302082Seschrock */ 3312082Seschrock spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); 3322082Seschrock for (i = 0; i < spa->spa_nspares; i++) { 3332082Seschrock vdev_t *vd; 3342082Seschrock 3352082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 3362082Seschrock VDEV_ALLOC_SPARE) == 0); 3372082Seschrock ASSERT(vd != NULL); 3382082Seschrock 3392082Seschrock spa->spa_spares[i] = vd; 3402082Seschrock 3412082Seschrock if (vdev_open(vd) != 0) 3422082Seschrock continue; 3432082Seschrock 3442082Seschrock vd->vdev_top = vd; 3452082Seschrock (void) vdev_validate_spare(vd); 3462082Seschrock } 3472082Seschrock 3482082Seschrock /* 3492082Seschrock * Recompute the stashed list of spares, with status information 3502082Seschrock * this time. 3512082Seschrock */ 3522082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 3532082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 3542082Seschrock 3552082Seschrock spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP); 3562082Seschrock for (i = 0; i < spa->spa_nspares; i++) 3572082Seschrock spares[i] = vdev_config_generate(spa, spa->spa_spares[i], 3582082Seschrock B_TRUE, B_TRUE); 3592082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 3602082Seschrock spares, spa->spa_nspares) == 0); 3612082Seschrock for (i = 0; i < spa->spa_nspares; i++) 3622082Seschrock nvlist_free(spares[i]); 3632082Seschrock kmem_free(spares, spa->spa_nspares * sizeof (void *)); 3642082Seschrock } 3652082Seschrock 3662082Seschrock static int 3672082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 3682082Seschrock { 3692082Seschrock dmu_buf_t *db; 3702082Seschrock char *packed = NULL; 3712082Seschrock size_t nvsize = 0; 3722082Seschrock int error; 3732082Seschrock *value = NULL; 3742082Seschrock 3752082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 3762082Seschrock nvsize = *(uint64_t *)db->db_data; 3772082Seschrock dmu_buf_rele(db, FTAG); 3782082Seschrock 3792082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 3802082Seschrock error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 3812082Seschrock if (error == 0) 3822082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 3832082Seschrock kmem_free(packed, nvsize); 3842082Seschrock 3852082Seschrock return (error); 3862082Seschrock } 3872082Seschrock 3882082Seschrock /* 389789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 3901544Seschrock * source of configuration information. 391789Sahrens */ 392789Sahrens static int 3931544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 394789Sahrens { 395789Sahrens int error = 0; 396789Sahrens nvlist_t *nvroot = NULL; 397789Sahrens vdev_t *rvd; 398789Sahrens uberblock_t *ub = &spa->spa_uberblock; 3991635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 400789Sahrens uint64_t pool_guid; 4012082Seschrock uint64_t version; 402789Sahrens zio_t *zio; 403789Sahrens 4041544Seschrock spa->spa_load_state = state; 4051635Sbonwick 406789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 4071733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 4081544Seschrock error = EINVAL; 4091544Seschrock goto out; 4101544Seschrock } 411789Sahrens 4122082Seschrock /* 4132082Seschrock * Versioning wasn't explicitly added to the label until later, so if 4142082Seschrock * it's not present treat it as the initial version. 4152082Seschrock */ 4162082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 4172082Seschrock version = ZFS_VERSION_INITIAL; 4182082Seschrock 4191733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 4201733Sbonwick &spa->spa_config_txg); 4211733Sbonwick 4221635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 4231544Seschrock spa_guid_exists(pool_guid, 0)) { 4241544Seschrock error = EEXIST; 4251544Seschrock goto out; 4261544Seschrock } 427789Sahrens 4282174Seschrock spa->spa_load_guid = pool_guid; 4292174Seschrock 430789Sahrens /* 4312082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 4322082Seschrock * value that will be returned by spa_version() since parsing the 4332082Seschrock * configuration requires knowing the version number. 434789Sahrens */ 4351544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 4362082Seschrock spa->spa_ubsync.ub_version = version; 4372082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 4381544Seschrock spa_config_exit(spa, FTAG); 439789Sahrens 4402082Seschrock if (error != 0) 4411544Seschrock goto out; 442789Sahrens 4431585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 444789Sahrens ASSERT(spa_guid(spa) == pool_guid); 445789Sahrens 446789Sahrens /* 447789Sahrens * Try to open all vdevs, loading each label in the process. 448789Sahrens */ 4491544Seschrock if (vdev_open(rvd) != 0) { 4501544Seschrock error = ENXIO; 4511544Seschrock goto out; 4521544Seschrock } 453789Sahrens 454789Sahrens /* 4551986Seschrock * Validate the labels for all leaf vdevs. We need to grab the config 4561986Seschrock * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD 4571986Seschrock * flag. 4581986Seschrock */ 4591986Seschrock spa_config_enter(spa, RW_READER, FTAG); 4601986Seschrock error = vdev_validate(rvd); 4611986Seschrock spa_config_exit(spa, FTAG); 4621986Seschrock 4631986Seschrock if (error != 0) { 4641986Seschrock error = EBADF; 4651986Seschrock goto out; 4661986Seschrock } 4671986Seschrock 4681986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 4691986Seschrock error = ENXIO; 4701986Seschrock goto out; 4711986Seschrock } 4721986Seschrock 4731986Seschrock /* 474789Sahrens * Find the best uberblock. 475789Sahrens */ 476789Sahrens bzero(ub, sizeof (uberblock_t)); 477789Sahrens 478789Sahrens zio = zio_root(spa, NULL, NULL, 479789Sahrens ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 480789Sahrens vdev_uberblock_load(zio, rvd, ub); 481789Sahrens error = zio_wait(zio); 482789Sahrens 483789Sahrens /* 484789Sahrens * If we weren't able to find a single valid uberblock, return failure. 485789Sahrens */ 486789Sahrens if (ub->ub_txg == 0) { 4871760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 4881760Seschrock VDEV_AUX_CORRUPT_DATA); 4891544Seschrock error = ENXIO; 4901544Seschrock goto out; 4911544Seschrock } 4921544Seschrock 4931544Seschrock /* 4941544Seschrock * If the pool is newer than the code, we can't open it. 4951544Seschrock */ 4961760Seschrock if (ub->ub_version > ZFS_VERSION) { 4971760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 4981760Seschrock VDEV_AUX_VERSION_NEWER); 4991544Seschrock error = ENOTSUP; 5001544Seschrock goto out; 501789Sahrens } 502789Sahrens 503789Sahrens /* 504789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 505789Sahrens * incomplete configuration. 506789Sahrens */ 5071732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 5081544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5091544Seschrock VDEV_AUX_BAD_GUID_SUM); 5101544Seschrock error = ENXIO; 5111544Seschrock goto out; 512789Sahrens } 513789Sahrens 514789Sahrens /* 515789Sahrens * Initialize internal SPA structures. 516789Sahrens */ 517789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 518789Sahrens spa->spa_ubsync = spa->spa_uberblock; 519789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 5201544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 5211544Seschrock if (error) { 5221544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5231544Seschrock VDEV_AUX_CORRUPT_DATA); 5241544Seschrock goto out; 5251544Seschrock } 526789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 527789Sahrens 5281544Seschrock if (zap_lookup(spa->spa_meta_objset, 529789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 5301544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 5311544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5321544Seschrock VDEV_AUX_CORRUPT_DATA); 5331544Seschrock error = EIO; 5341544Seschrock goto out; 5351544Seschrock } 536789Sahrens 537789Sahrens if (!mosconfig) { 5382082Seschrock nvlist_t *newconfig; 5392082Seschrock 5402082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 5411544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5421544Seschrock VDEV_AUX_CORRUPT_DATA); 5431544Seschrock error = EIO; 5441544Seschrock goto out; 5451544Seschrock } 546789Sahrens 547789Sahrens spa_config_set(spa, newconfig); 548789Sahrens spa_unload(spa); 549789Sahrens spa_deactivate(spa); 550789Sahrens spa_activate(spa); 551789Sahrens 5521544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 5531544Seschrock } 5541544Seschrock 5551544Seschrock if (zap_lookup(spa->spa_meta_objset, 5561544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 5571544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 5581544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5591544Seschrock VDEV_AUX_CORRUPT_DATA); 5601544Seschrock error = EIO; 5611544Seschrock goto out; 562789Sahrens } 563789Sahrens 5641544Seschrock /* 5652082Seschrock * Load the bit that tells us to use the new accounting function 5662082Seschrock * (raid-z deflation). If we have an older pool, this will not 5672082Seschrock * be present. 5682082Seschrock */ 5692082Seschrock error = zap_lookup(spa->spa_meta_objset, 5702082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 5712082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 5722082Seschrock if (error != 0 && error != ENOENT) { 5732082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5742082Seschrock VDEV_AUX_CORRUPT_DATA); 5752082Seschrock error = EIO; 5762082Seschrock goto out; 5772082Seschrock } 5782082Seschrock 5792082Seschrock /* 5801544Seschrock * Load the persistent error log. If we have an older pool, this will 5811544Seschrock * not be present. 5821544Seschrock */ 5831544Seschrock error = zap_lookup(spa->spa_meta_objset, 5841544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 5851544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 5861807Sbonwick if (error != 0 && error != ENOENT) { 5871544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5881544Seschrock VDEV_AUX_CORRUPT_DATA); 5891544Seschrock error = EIO; 5901544Seschrock goto out; 5911544Seschrock } 5921544Seschrock 5931544Seschrock error = zap_lookup(spa->spa_meta_objset, 5941544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 5951544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 5961544Seschrock if (error != 0 && error != ENOENT) { 5971544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5981544Seschrock VDEV_AUX_CORRUPT_DATA); 5991544Seschrock error = EIO; 6001544Seschrock goto out; 6011544Seschrock } 602789Sahrens 603789Sahrens /* 6042926Sek110237 * Load the history object. If we have an older pool, this 6052926Sek110237 * will not be present. 6062926Sek110237 */ 6072926Sek110237 error = zap_lookup(spa->spa_meta_objset, 6082926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 6092926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 6102926Sek110237 if (error != 0 && error != ENOENT) { 6112926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6122926Sek110237 VDEV_AUX_CORRUPT_DATA); 6132926Sek110237 error = EIO; 6142926Sek110237 goto out; 6152926Sek110237 } 6162926Sek110237 6172926Sek110237 /* 6182082Seschrock * Load any hot spares for this pool. 6192082Seschrock */ 6202082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 6212082Seschrock DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object); 6222082Seschrock if (error != 0 && error != ENOENT) { 6232082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6242082Seschrock VDEV_AUX_CORRUPT_DATA); 6252082Seschrock error = EIO; 6262082Seschrock goto out; 6272082Seschrock } 6282082Seschrock if (error == 0) { 6292082Seschrock ASSERT(spa_version(spa) >= ZFS_VERSION_SPARES); 6302082Seschrock if (load_nvlist(spa, spa->spa_spares_object, 6312082Seschrock &spa->spa_sparelist) != 0) { 6322082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6332082Seschrock VDEV_AUX_CORRUPT_DATA); 6342082Seschrock error = EIO; 6352082Seschrock goto out; 6362082Seschrock } 6372082Seschrock 6382082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 6392082Seschrock spa_load_spares(spa); 6402082Seschrock spa_config_exit(spa, FTAG); 6412082Seschrock } 6422082Seschrock 6432082Seschrock /* 6441986Seschrock * Load the vdev state for all toplevel vdevs. 645789Sahrens */ 6461986Seschrock vdev_load(rvd); 647789Sahrens 648789Sahrens /* 649789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 650789Sahrens */ 6511544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 652789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 6531544Seschrock spa_config_exit(spa, FTAG); 654789Sahrens 655789Sahrens /* 656789Sahrens * Check the state of the root vdev. If it can't be opened, it 657789Sahrens * indicates one or more toplevel vdevs are faulted. 658789Sahrens */ 6591544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 6601544Seschrock error = ENXIO; 6611544Seschrock goto out; 6621544Seschrock } 663789Sahrens 6641544Seschrock if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { 6651635Sbonwick dmu_tx_t *tx; 6661635Sbonwick int need_update = B_FALSE; 6671585Sbonwick int c; 6681601Sbonwick 6691635Sbonwick /* 6701635Sbonwick * Claim log blocks that haven't been committed yet. 6711635Sbonwick * This must all happen in a single txg. 6721635Sbonwick */ 6731601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 674789Sahrens spa_first_txg(spa)); 6752417Sahrens (void) dmu_objset_find(spa->spa_name, 6762417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 677789Sahrens dmu_tx_commit(tx); 678789Sahrens 679789Sahrens spa->spa_sync_on = B_TRUE; 680789Sahrens txg_sync_start(spa->spa_dsl_pool); 681789Sahrens 682789Sahrens /* 683789Sahrens * Wait for all claims to sync. 684789Sahrens */ 685789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 6861585Sbonwick 6871585Sbonwick /* 6881635Sbonwick * If the config cache is stale, or we have uninitialized 6891635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 6901585Sbonwick */ 6911635Sbonwick if (config_cache_txg != spa->spa_config_txg || 6921635Sbonwick state == SPA_LOAD_IMPORT) 6931635Sbonwick need_update = B_TRUE; 6941635Sbonwick 6951635Sbonwick for (c = 0; c < rvd->vdev_children; c++) 6961635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 6971635Sbonwick need_update = B_TRUE; 6981585Sbonwick 6991585Sbonwick /* 7001635Sbonwick * Update the config cache asychronously in case we're the 7011635Sbonwick * root pool, in which case the config cache isn't writable yet. 7021585Sbonwick */ 7031635Sbonwick if (need_update) 7041635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 705789Sahrens } 706789Sahrens 7071544Seschrock error = 0; 7081544Seschrock out: 7092082Seschrock if (error && error != EBADF) 7101544Seschrock zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); 7111544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 7121544Seschrock spa->spa_ena = 0; 7131544Seschrock 7141544Seschrock return (error); 715789Sahrens } 716789Sahrens 717789Sahrens /* 718789Sahrens * Pool Open/Import 719789Sahrens * 720789Sahrens * The import case is identical to an open except that the configuration is sent 721789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 722789Sahrens * case of an open, the pool configuration will exist in the 723789Sahrens * POOL_STATE_UNITIALIZED state. 724789Sahrens * 725789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 726789Sahrens * the same time open the pool, without having to keep around the spa_t in some 727789Sahrens * ambiguous state. 728789Sahrens */ 729789Sahrens static int 730789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 731789Sahrens { 732789Sahrens spa_t *spa; 733789Sahrens int error; 734789Sahrens int loaded = B_FALSE; 735789Sahrens int locked = B_FALSE; 736789Sahrens 737789Sahrens *spapp = NULL; 738789Sahrens 739789Sahrens /* 740789Sahrens * As disgusting as this is, we need to support recursive calls to this 741789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 742789Sahrens * up calling spa_open() again. The real fix is to figure out how to 743789Sahrens * avoid dsl_dir_open() calling this in the first place. 744789Sahrens */ 745789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 746789Sahrens mutex_enter(&spa_namespace_lock); 747789Sahrens locked = B_TRUE; 748789Sahrens } 749789Sahrens 750789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 751789Sahrens if (locked) 752789Sahrens mutex_exit(&spa_namespace_lock); 753789Sahrens return (ENOENT); 754789Sahrens } 755789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 756789Sahrens 757789Sahrens spa_activate(spa); 758789Sahrens 7591635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 760789Sahrens 761789Sahrens if (error == EBADF) { 762789Sahrens /* 7631986Seschrock * If vdev_validate() returns failure (indicated by 7641986Seschrock * EBADF), it indicates that one of the vdevs indicates 7651986Seschrock * that the pool has been exported or destroyed. If 7661986Seschrock * this is the case, the config cache is out of sync and 7671986Seschrock * we should remove the pool from the namespace. 768789Sahrens */ 7692082Seschrock zfs_post_ok(spa, NULL); 770789Sahrens spa_unload(spa); 771789Sahrens spa_deactivate(spa); 772789Sahrens spa_remove(spa); 773789Sahrens spa_config_sync(); 774789Sahrens if (locked) 775789Sahrens mutex_exit(&spa_namespace_lock); 776789Sahrens return (ENOENT); 7771544Seschrock } 7781544Seschrock 7791544Seschrock if (error) { 780789Sahrens /* 781789Sahrens * We can't open the pool, but we still have useful 782789Sahrens * information: the state of each vdev after the 783789Sahrens * attempted vdev_open(). Return this to the user. 784789Sahrens */ 7851635Sbonwick if (config != NULL && spa->spa_root_vdev != NULL) { 7861635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 787789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 788789Sahrens B_TRUE); 7891635Sbonwick spa_config_exit(spa, FTAG); 7901635Sbonwick } 791789Sahrens spa_unload(spa); 792789Sahrens spa_deactivate(spa); 7931544Seschrock spa->spa_last_open_failed = B_TRUE; 794789Sahrens if (locked) 795789Sahrens mutex_exit(&spa_namespace_lock); 796789Sahrens *spapp = NULL; 797789Sahrens return (error); 7981544Seschrock } else { 7991544Seschrock zfs_post_ok(spa, NULL); 8001544Seschrock spa->spa_last_open_failed = B_FALSE; 801789Sahrens } 802789Sahrens 803789Sahrens loaded = B_TRUE; 804789Sahrens } 805789Sahrens 806789Sahrens spa_open_ref(spa, tag); 807789Sahrens if (locked) 808789Sahrens mutex_exit(&spa_namespace_lock); 809789Sahrens 810789Sahrens *spapp = spa; 811789Sahrens 812789Sahrens if (config != NULL) { 8131544Seschrock spa_config_enter(spa, RW_READER, FTAG); 814789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 8151544Seschrock spa_config_exit(spa, FTAG); 816789Sahrens } 817789Sahrens 818789Sahrens /* 819789Sahrens * If we just loaded the pool, resilver anything that's out of date. 820789Sahrens */ 821789Sahrens if (loaded && (spa_mode & FWRITE)) 822789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 823789Sahrens 824789Sahrens return (0); 825789Sahrens } 826789Sahrens 827789Sahrens int 828789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 829789Sahrens { 830789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 831789Sahrens } 832789Sahrens 8331544Seschrock /* 8341544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 8351544Seschrock * preventing it from being exported or destroyed. 8361544Seschrock */ 8371544Seschrock spa_t * 8381544Seschrock spa_inject_addref(char *name) 8391544Seschrock { 8401544Seschrock spa_t *spa; 8411544Seschrock 8421544Seschrock mutex_enter(&spa_namespace_lock); 8431544Seschrock if ((spa = spa_lookup(name)) == NULL) { 8441544Seschrock mutex_exit(&spa_namespace_lock); 8451544Seschrock return (NULL); 8461544Seschrock } 8471544Seschrock spa->spa_inject_ref++; 8481544Seschrock mutex_exit(&spa_namespace_lock); 8491544Seschrock 8501544Seschrock return (spa); 8511544Seschrock } 8521544Seschrock 8531544Seschrock void 8541544Seschrock spa_inject_delref(spa_t *spa) 8551544Seschrock { 8561544Seschrock mutex_enter(&spa_namespace_lock); 8571544Seschrock spa->spa_inject_ref--; 8581544Seschrock mutex_exit(&spa_namespace_lock); 8591544Seschrock } 8601544Seschrock 8612082Seschrock static void 8622082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 8632082Seschrock { 8642082Seschrock nvlist_t **spares; 8652082Seschrock uint_t i, nspares; 8662082Seschrock nvlist_t *nvroot; 8672082Seschrock uint64_t guid; 8682082Seschrock vdev_stat_t *vs; 8692082Seschrock uint_t vsc; 8702082Seschrock 8712082Seschrock if (spa->spa_nspares == 0) 8722082Seschrock return; 8732082Seschrock 8742082Seschrock VERIFY(nvlist_lookup_nvlist(config, 8752082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 8762082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 8772082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8782082Seschrock if (nspares != 0) { 8792082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 8802082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 8812082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 8822082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8832082Seschrock 8842082Seschrock /* 8852082Seschrock * Go through and find any spares which have since been 8862082Seschrock * repurposed as an active spare. If this is the case, update 8872082Seschrock * their status appropriately. 8882082Seschrock */ 8892082Seschrock for (i = 0; i < nspares; i++) { 8902082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 8912082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 8922082Seschrock if (spa_spare_inuse(guid)) { 8932082Seschrock VERIFY(nvlist_lookup_uint64_array( 8942082Seschrock spares[i], ZPOOL_CONFIG_STATS, 8952082Seschrock (uint64_t **)&vs, &vsc) == 0); 8962082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 8972082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 8982082Seschrock } 8992082Seschrock } 9002082Seschrock } 9012082Seschrock } 9022082Seschrock 903789Sahrens int 9041544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 905789Sahrens { 906789Sahrens int error; 907789Sahrens spa_t *spa; 908789Sahrens 909789Sahrens *config = NULL; 910789Sahrens error = spa_open_common(name, &spa, FTAG, config); 911789Sahrens 9122082Seschrock if (spa && *config != NULL) { 9131544Seschrock VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, 9141544Seschrock spa_get_errlog_size(spa)) == 0); 9151544Seschrock 9162082Seschrock spa_add_spares(spa, *config); 9172082Seschrock } 9182082Seschrock 9191544Seschrock /* 9201544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 9211544Seschrock * and call spa_lookup() directly. 9221544Seschrock */ 9231544Seschrock if (altroot) { 9241544Seschrock if (spa == NULL) { 9251544Seschrock mutex_enter(&spa_namespace_lock); 9261544Seschrock spa = spa_lookup(name); 9271544Seschrock if (spa) 9281544Seschrock spa_altroot(spa, altroot, buflen); 9291544Seschrock else 9301544Seschrock altroot[0] = '\0'; 9311544Seschrock spa = NULL; 9321544Seschrock mutex_exit(&spa_namespace_lock); 9331544Seschrock } else { 9341544Seschrock spa_altroot(spa, altroot, buflen); 9351544Seschrock } 9361544Seschrock } 9371544Seschrock 938789Sahrens if (spa != NULL) 939789Sahrens spa_close(spa, FTAG); 940789Sahrens 941789Sahrens return (error); 942789Sahrens } 943789Sahrens 944789Sahrens /* 9452082Seschrock * Validate that the 'spares' array is well formed. We must have an array of 9462082Seschrock * nvlists, each which describes a valid leaf vdev. 9472082Seschrock */ 9482082Seschrock static int 9492082Seschrock spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 9502082Seschrock { 9512082Seschrock nvlist_t **spares; 9522082Seschrock uint_t i, nspares; 9532082Seschrock vdev_t *vd; 9542082Seschrock int error; 9552082Seschrock 9562082Seschrock /* 9572082Seschrock * It's acceptable to have no spares specified. 9582082Seschrock */ 9592082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 9602082Seschrock &spares, &nspares) != 0) 9612082Seschrock return (0); 9622082Seschrock 9632082Seschrock if (nspares == 0) 9642082Seschrock return (EINVAL); 9652082Seschrock 9662082Seschrock /* 9672082Seschrock * Make sure the pool is formatted with a version that supports hot 9682082Seschrock * spares. 9692082Seschrock */ 9702082Seschrock if (spa_version(spa) < ZFS_VERSION_SPARES) 9712082Seschrock return (ENOTSUP); 9722082Seschrock 9732082Seschrock for (i = 0; i < nspares; i++) { 9742082Seschrock if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0, 9752082Seschrock mode)) != 0) 9762082Seschrock return (error); 9772082Seschrock 9782082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 9792082Seschrock vdev_free(vd); 9802082Seschrock return (EINVAL); 9812082Seschrock } 9822082Seschrock 9832082Seschrock if ((error = vdev_open(vd)) != 0) { 9842082Seschrock vdev_free(vd); 9852082Seschrock return (error); 9862082Seschrock } 9872082Seschrock 9882082Seschrock vd->vdev_top = vd; 9892082Seschrock if ((error = vdev_label_spare(vd, crtxg)) != 0) { 9902082Seschrock vdev_free(vd); 9912082Seschrock return (error); 9922082Seschrock } 9932082Seschrock 9942082Seschrock VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID, 9952082Seschrock vd->vdev_guid) == 0); 9962082Seschrock 9972082Seschrock vdev_free(vd); 9982082Seschrock } 9992082Seschrock 10002082Seschrock return (0); 10012082Seschrock } 10022082Seschrock 10032082Seschrock /* 1004789Sahrens * Pool Creation 1005789Sahrens */ 1006789Sahrens int 10071635Sbonwick spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) 1008789Sahrens { 1009789Sahrens spa_t *spa; 10101635Sbonwick vdev_t *rvd; 1011789Sahrens dsl_pool_t *dp; 1012789Sahrens dmu_tx_t *tx; 10132082Seschrock int c, error = 0; 1014789Sahrens uint64_t txg = TXG_INITIAL; 10152082Seschrock nvlist_t **spares; 10162082Seschrock uint_t nspares; 1017789Sahrens 1018789Sahrens /* 1019789Sahrens * If this pool already exists, return failure. 1020789Sahrens */ 1021789Sahrens mutex_enter(&spa_namespace_lock); 1022789Sahrens if (spa_lookup(pool) != NULL) { 1023789Sahrens mutex_exit(&spa_namespace_lock); 1024789Sahrens return (EEXIST); 1025789Sahrens } 1026789Sahrens 1027789Sahrens /* 1028789Sahrens * Allocate a new spa_t structure. 1029789Sahrens */ 10301635Sbonwick spa = spa_add(pool, altroot); 1031789Sahrens spa_activate(spa); 1032789Sahrens 1033789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 10341760Seschrock spa->spa_uberblock.ub_version = ZFS_VERSION; 1035789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1036789Sahrens 10371635Sbonwick /* 10381635Sbonwick * Create the root vdev. 10391635Sbonwick */ 10401635Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 10411635Sbonwick 10422082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 10432082Seschrock 10442082Seschrock ASSERT(error != 0 || rvd != NULL); 10452082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 10462082Seschrock 10472082Seschrock if (error == 0 && rvd->vdev_children == 0) 10481635Sbonwick error = EINVAL; 10492082Seschrock 10502082Seschrock if (error == 0 && 10512082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 10522082Seschrock (error = spa_validate_spares(spa, nvroot, txg, 10532082Seschrock VDEV_ALLOC_ADD)) == 0) { 10542082Seschrock for (c = 0; c < rvd->vdev_children; c++) 10552082Seschrock vdev_init(rvd->vdev_child[c], txg); 10562082Seschrock vdev_config_dirty(rvd); 10571635Sbonwick } 10581635Sbonwick 10591635Sbonwick spa_config_exit(spa, FTAG); 1060789Sahrens 10612082Seschrock if (error != 0) { 1062789Sahrens spa_unload(spa); 1063789Sahrens spa_deactivate(spa); 1064789Sahrens spa_remove(spa); 1065789Sahrens mutex_exit(&spa_namespace_lock); 1066789Sahrens return (error); 1067789Sahrens } 1068789Sahrens 10692082Seschrock /* 10702082Seschrock * Get the list of spares, if specified. 10712082Seschrock */ 10722082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 10732082Seschrock &spares, &nspares) == 0) { 10742082Seschrock VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME, 10752082Seschrock KM_SLEEP) == 0); 10762082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 10772082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 10782082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 10792082Seschrock spa_load_spares(spa); 10802082Seschrock spa_config_exit(spa, FTAG); 10812082Seschrock spa->spa_sync_spares = B_TRUE; 10822082Seschrock } 10832082Seschrock 1084789Sahrens spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); 1085789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 1086789Sahrens 1087789Sahrens tx = dmu_tx_create_assigned(dp, txg); 1088789Sahrens 1089789Sahrens /* 1090789Sahrens * Create the pool config object. 1091789Sahrens */ 1092789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 1093789Sahrens DMU_OT_PACKED_NVLIST, 1 << 14, 1094789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 1095789Sahrens 10961544Seschrock if (zap_add(spa->spa_meta_objset, 1097789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 10981544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 10991544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 11001544Seschrock } 1101789Sahrens 11022082Seschrock /* Newly created pools are always deflated. */ 11032082Seschrock spa->spa_deflate = TRUE; 11042082Seschrock if (zap_add(spa->spa_meta_objset, 11052082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 11062082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 11072082Seschrock cmn_err(CE_PANIC, "failed to add deflate"); 11082082Seschrock } 11092082Seschrock 1110789Sahrens /* 1111789Sahrens * Create the deferred-free bplist object. Turn off compression 1112789Sahrens * because sync-to-convergence takes longer if the blocksize 1113789Sahrens * keeps changing. 1114789Sahrens */ 1115789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 1116789Sahrens 1 << 14, tx); 1117789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 1118789Sahrens ZIO_COMPRESS_OFF, tx); 1119789Sahrens 11201544Seschrock if (zap_add(spa->spa_meta_objset, 1121789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 11221544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 11231544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 11241544Seschrock } 1125789Sahrens 11262926Sek110237 /* 11272926Sek110237 * Create the pool's history object. 11282926Sek110237 */ 11292926Sek110237 spa_history_create_obj(spa, tx); 11302926Sek110237 1131789Sahrens dmu_tx_commit(tx); 1132789Sahrens 1133789Sahrens spa->spa_sync_on = B_TRUE; 1134789Sahrens txg_sync_start(spa->spa_dsl_pool); 1135789Sahrens 1136789Sahrens /* 1137789Sahrens * We explicitly wait for the first transaction to complete so that our 1138789Sahrens * bean counters are appropriately updated. 1139789Sahrens */ 1140789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 1141789Sahrens 1142789Sahrens spa_config_sync(); 1143789Sahrens 1144789Sahrens mutex_exit(&spa_namespace_lock); 1145789Sahrens 1146789Sahrens return (0); 1147789Sahrens } 1148789Sahrens 1149789Sahrens /* 1150789Sahrens * Import the given pool into the system. We set up the necessary spa_t and 1151789Sahrens * then call spa_load() to do the dirty work. 1152789Sahrens */ 1153789Sahrens int 11541635Sbonwick spa_import(const char *pool, nvlist_t *config, const char *altroot) 1155789Sahrens { 1156789Sahrens spa_t *spa; 1157789Sahrens int error; 11582082Seschrock nvlist_t *nvroot; 11592082Seschrock nvlist_t **spares; 11602082Seschrock uint_t nspares; 1161789Sahrens 1162789Sahrens if (!(spa_mode & FWRITE)) 1163789Sahrens return (EROFS); 1164789Sahrens 1165789Sahrens /* 1166789Sahrens * If a pool with this name exists, return failure. 1167789Sahrens */ 1168789Sahrens mutex_enter(&spa_namespace_lock); 1169789Sahrens if (spa_lookup(pool) != NULL) { 1170789Sahrens mutex_exit(&spa_namespace_lock); 1171789Sahrens return (EEXIST); 1172789Sahrens } 1173789Sahrens 1174789Sahrens /* 11751635Sbonwick * Create and initialize the spa structure. 1176789Sahrens */ 11771635Sbonwick spa = spa_add(pool, altroot); 1178789Sahrens spa_activate(spa); 1179789Sahrens 1180789Sahrens /* 11811635Sbonwick * Pass off the heavy lifting to spa_load(). 11821732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 11831732Sbonwick * is actually the one to trust when doing an import. 11841601Sbonwick */ 11851732Sbonwick error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 1186789Sahrens 11872082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 11882082Seschrock /* 11892082Seschrock * Toss any existing sparelist, as it doesn't have any validity anymore, 11902082Seschrock * and conflicts with spa_has_spare(). 11912082Seschrock */ 11922082Seschrock if (spa->spa_sparelist) { 11932082Seschrock nvlist_free(spa->spa_sparelist); 11942082Seschrock spa->spa_sparelist = NULL; 11952082Seschrock spa_load_spares(spa); 11962082Seschrock } 11972082Seschrock 11982082Seschrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 11992082Seschrock &nvroot) == 0); 12002082Seschrock if (error == 0) 12012082Seschrock error = spa_validate_spares(spa, nvroot, -1ULL, 12022082Seschrock VDEV_ALLOC_SPARE); 12032082Seschrock spa_config_exit(spa, FTAG); 12042082Seschrock 12052082Seschrock if (error != 0) { 1206789Sahrens spa_unload(spa); 1207789Sahrens spa_deactivate(spa); 1208789Sahrens spa_remove(spa); 1209789Sahrens mutex_exit(&spa_namespace_lock); 1210789Sahrens return (error); 1211789Sahrens } 1212789Sahrens 12131635Sbonwick /* 12142082Seschrock * Override any spares as specified by the user, as these may have 12152082Seschrock * correct device names/devids, etc. 12162082Seschrock */ 12172082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 12182082Seschrock &spares, &nspares) == 0) { 12192082Seschrock if (spa->spa_sparelist) 12202082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, 12212082Seschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 12222082Seschrock else 12232082Seschrock VERIFY(nvlist_alloc(&spa->spa_sparelist, 12242082Seschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 12252082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 12262082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 12272082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 12282082Seschrock spa_load_spares(spa); 12292082Seschrock spa_config_exit(spa, FTAG); 12302082Seschrock spa->spa_sync_spares = B_TRUE; 12312082Seschrock } 12322082Seschrock 12332082Seschrock /* 12341635Sbonwick * Update the config cache to include the newly-imported pool. 12351635Sbonwick */ 12361635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 12371635Sbonwick 1238789Sahrens mutex_exit(&spa_namespace_lock); 1239789Sahrens 1240789Sahrens /* 1241789Sahrens * Resilver anything that's out of date. 1242789Sahrens */ 1243789Sahrens if (spa_mode & FWRITE) 1244789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 1245789Sahrens 1246789Sahrens return (0); 1247789Sahrens } 1248789Sahrens 1249789Sahrens /* 1250789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 1251789Sahrens * to get the vdev stats associated with the imported devices. 1252789Sahrens */ 1253789Sahrens #define TRYIMPORT_NAME "$import" 1254789Sahrens 1255789Sahrens nvlist_t * 1256789Sahrens spa_tryimport(nvlist_t *tryconfig) 1257789Sahrens { 1258789Sahrens nvlist_t *config = NULL; 1259789Sahrens char *poolname; 1260789Sahrens spa_t *spa; 1261789Sahrens uint64_t state; 1262789Sahrens 1263789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 1264789Sahrens return (NULL); 1265789Sahrens 1266789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 1267789Sahrens return (NULL); 1268789Sahrens 12691635Sbonwick /* 12701635Sbonwick * Create and initialize the spa structure. 12711635Sbonwick */ 1272789Sahrens mutex_enter(&spa_namespace_lock); 12731635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 1274789Sahrens spa_activate(spa); 1275789Sahrens 1276789Sahrens /* 12771635Sbonwick * Pass off the heavy lifting to spa_load(). 12781732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 12791732Sbonwick * is actually the one to trust when doing an import. 1280789Sahrens */ 12811732Sbonwick (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 1282789Sahrens 1283789Sahrens /* 1284789Sahrens * If 'tryconfig' was at least parsable, return the current config. 1285789Sahrens */ 1286789Sahrens if (spa->spa_root_vdev != NULL) { 12871635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 1288789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 12891635Sbonwick spa_config_exit(spa, FTAG); 1290789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 1291789Sahrens poolname) == 0); 1292789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1293789Sahrens state) == 0); 12942082Seschrock 12952082Seschrock /* 12962082Seschrock * Add the list of hot spares. 12972082Seschrock */ 12982082Seschrock spa_add_spares(spa, config); 1299789Sahrens } 1300789Sahrens 1301789Sahrens spa_unload(spa); 1302789Sahrens spa_deactivate(spa); 1303789Sahrens spa_remove(spa); 1304789Sahrens mutex_exit(&spa_namespace_lock); 1305789Sahrens 1306789Sahrens return (config); 1307789Sahrens } 1308789Sahrens 1309789Sahrens /* 1310789Sahrens * Pool export/destroy 1311789Sahrens * 1312789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 1313789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 1314789Sahrens * update the pool state and sync all the labels to disk, removing the 1315789Sahrens * configuration from the cache afterwards. 1316789Sahrens */ 1317789Sahrens static int 13181775Sbillm spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) 1319789Sahrens { 1320789Sahrens spa_t *spa; 1321789Sahrens 13221775Sbillm if (oldconfig) 13231775Sbillm *oldconfig = NULL; 13241775Sbillm 1325789Sahrens if (!(spa_mode & FWRITE)) 1326789Sahrens return (EROFS); 1327789Sahrens 1328789Sahrens mutex_enter(&spa_namespace_lock); 1329789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1330789Sahrens mutex_exit(&spa_namespace_lock); 1331789Sahrens return (ENOENT); 1332789Sahrens } 1333789Sahrens 1334789Sahrens /* 13351544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 13361544Seschrock * reacquire the namespace lock, and see if we can export. 13371544Seschrock */ 13381544Seschrock spa_open_ref(spa, FTAG); 13391544Seschrock mutex_exit(&spa_namespace_lock); 13401544Seschrock spa_async_suspend(spa); 13411544Seschrock mutex_enter(&spa_namespace_lock); 13421544Seschrock spa_close(spa, FTAG); 13431544Seschrock 13441544Seschrock /* 1345789Sahrens * The pool will be in core if it's openable, 1346789Sahrens * in which case we can modify its state. 1347789Sahrens */ 1348789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 1349789Sahrens /* 1350789Sahrens * Objsets may be open only because they're dirty, so we 1351789Sahrens * have to force it to sync before checking spa_refcnt. 1352789Sahrens */ 1353789Sahrens spa_scrub_suspend(spa); 1354789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 1355789Sahrens 13561544Seschrock /* 13571544Seschrock * A pool cannot be exported or destroyed if there are active 13581544Seschrock * references. If we are resetting a pool, allow references by 13591544Seschrock * fault injection handlers. 13601544Seschrock */ 13611544Seschrock if (!spa_refcount_zero(spa) || 13621544Seschrock (spa->spa_inject_ref != 0 && 13631544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 1364789Sahrens spa_scrub_resume(spa); 13651544Seschrock spa_async_resume(spa); 1366789Sahrens mutex_exit(&spa_namespace_lock); 1367789Sahrens return (EBUSY); 1368789Sahrens } 1369789Sahrens 1370789Sahrens spa_scrub_resume(spa); 1371789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 1372789Sahrens 1373789Sahrens /* 1374789Sahrens * We want this to be reflected on every label, 1375789Sahrens * so mark them all dirty. spa_unload() will do the 1376789Sahrens * final sync that pushes these changes out. 1377789Sahrens */ 13781544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 13791601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 13801544Seschrock spa->spa_state = new_state; 13811635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 13821544Seschrock vdev_config_dirty(spa->spa_root_vdev); 13831601Sbonwick spa_config_exit(spa, FTAG); 13841544Seschrock } 1385789Sahrens } 1386789Sahrens 1387789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 1388789Sahrens spa_unload(spa); 1389789Sahrens spa_deactivate(spa); 1390789Sahrens } 1391789Sahrens 13921775Sbillm if (oldconfig && spa->spa_config) 13931775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 13941775Sbillm 13951544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 13961544Seschrock spa_remove(spa); 13971544Seschrock spa_config_sync(); 13981544Seschrock } 1399789Sahrens mutex_exit(&spa_namespace_lock); 1400789Sahrens 1401789Sahrens return (0); 1402789Sahrens } 1403789Sahrens 1404789Sahrens /* 1405789Sahrens * Destroy a storage pool. 1406789Sahrens */ 1407789Sahrens int 1408789Sahrens spa_destroy(char *pool) 1409789Sahrens { 14101775Sbillm return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); 1411789Sahrens } 1412789Sahrens 1413789Sahrens /* 1414789Sahrens * Export a storage pool. 1415789Sahrens */ 1416789Sahrens int 14171775Sbillm spa_export(char *pool, nvlist_t **oldconfig) 1418789Sahrens { 14191775Sbillm return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); 1420789Sahrens } 1421789Sahrens 1422789Sahrens /* 14231544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 14241544Seschrock * from the namespace in any way. 14251544Seschrock */ 14261544Seschrock int 14271544Seschrock spa_reset(char *pool) 14281544Seschrock { 14291775Sbillm return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); 14301544Seschrock } 14311544Seschrock 14321544Seschrock 14331544Seschrock /* 1434789Sahrens * ========================================================================== 1435789Sahrens * Device manipulation 1436789Sahrens * ========================================================================== 1437789Sahrens */ 1438789Sahrens 1439789Sahrens /* 1440789Sahrens * Add capacity to a storage pool. 1441789Sahrens */ 1442789Sahrens int 1443789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 1444789Sahrens { 1445789Sahrens uint64_t txg; 14461635Sbonwick int c, error; 1447789Sahrens vdev_t *rvd = spa->spa_root_vdev; 14481585Sbonwick vdev_t *vd, *tvd; 14492082Seschrock nvlist_t **spares; 14502082Seschrock uint_t i, nspares; 1451789Sahrens 1452789Sahrens txg = spa_vdev_enter(spa); 1453789Sahrens 14542082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 14552082Seschrock VDEV_ALLOC_ADD)) != 0) 14562082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 14572082Seschrock 14582082Seschrock if ((error = spa_validate_spares(spa, nvroot, txg, 14592082Seschrock VDEV_ALLOC_ADD)) != 0) 1460789Sahrens return (spa_vdev_exit(spa, vd, txg, error)); 1461789Sahrens 14622082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 14632082Seschrock &spares, &nspares) != 0) 14642082Seschrock nspares = 0; 14652082Seschrock 14662082Seschrock if (vd->vdev_children == 0 && nspares == 0) 14672082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 14682082Seschrock 14692082Seschrock if (vd->vdev_children != 0) { 14702082Seschrock if ((error = vdev_create(vd, txg, B_FALSE)) != 0) 14712082Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 14722082Seschrock 14732082Seschrock /* 14742082Seschrock * Transfer each new top-level vdev from vd to rvd. 14752082Seschrock */ 14762082Seschrock for (c = 0; c < vd->vdev_children; c++) { 14772082Seschrock tvd = vd->vdev_child[c]; 14782082Seschrock vdev_remove_child(vd, tvd); 14792082Seschrock tvd->vdev_id = rvd->vdev_children; 14802082Seschrock vdev_add_child(rvd, tvd); 14812082Seschrock vdev_config_dirty(tvd); 14822082Seschrock } 14832082Seschrock } 14842082Seschrock 14852082Seschrock if (nspares != 0) { 14862082Seschrock if (spa->spa_sparelist != NULL) { 14872082Seschrock nvlist_t **oldspares; 14882082Seschrock uint_t oldnspares; 14892082Seschrock nvlist_t **newspares; 14902082Seschrock 14912082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 14922082Seschrock ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0); 14932082Seschrock 14942082Seschrock newspares = kmem_alloc(sizeof (void *) * 14952082Seschrock (nspares + oldnspares), KM_SLEEP); 14962082Seschrock for (i = 0; i < oldnspares; i++) 14972082Seschrock VERIFY(nvlist_dup(oldspares[i], 14982082Seschrock &newspares[i], KM_SLEEP) == 0); 14992082Seschrock for (i = 0; i < nspares; i++) 15002082Seschrock VERIFY(nvlist_dup(spares[i], 15012082Seschrock &newspares[i + oldnspares], 15022082Seschrock KM_SLEEP) == 0); 15032082Seschrock 15042082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, 15052082Seschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 15062082Seschrock 15072082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 15082082Seschrock ZPOOL_CONFIG_SPARES, newspares, 15092082Seschrock nspares + oldnspares) == 0); 15102082Seschrock for (i = 0; i < oldnspares + nspares; i++) 15112082Seschrock nvlist_free(newspares[i]); 15122082Seschrock kmem_free(newspares, (oldnspares + nspares) * 15132082Seschrock sizeof (void *)); 15142082Seschrock } else { 15152082Seschrock VERIFY(nvlist_alloc(&spa->spa_sparelist, 15162082Seschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 15172082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 15182082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 15192082Seschrock } 15202082Seschrock 15212082Seschrock spa_load_spares(spa); 15222082Seschrock spa->spa_sync_spares = B_TRUE; 1523789Sahrens } 1524789Sahrens 1525789Sahrens /* 15261585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 15271585Sbonwick * If other threads start allocating from these vdevs before we 15281585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 15291585Sbonwick * fail to open the pool because there are DVAs that the config cache 15301585Sbonwick * can't translate. Therefore, we first add the vdevs without 15311585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 15321635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 15331585Sbonwick * 15341585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 15351585Sbonwick * if we lose power at any point in this sequence, the remaining 15361585Sbonwick * steps will be completed the next time we load the pool. 1537789Sahrens */ 15381635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 15391585Sbonwick 15401635Sbonwick mutex_enter(&spa_namespace_lock); 15411635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 15421635Sbonwick mutex_exit(&spa_namespace_lock); 1543789Sahrens 15441635Sbonwick return (0); 1545789Sahrens } 1546789Sahrens 1547789Sahrens /* 1548789Sahrens * Attach a device to a mirror. The arguments are the path to any device 1549789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 1550789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 1551789Sahrens * 1552789Sahrens * If 'replacing' is specified, the new device is intended to replace the 1553789Sahrens * existing device; in this case the two devices are made into their own 1554789Sahrens * mirror using the 'replacing' vdev, which is functionally idendical to 1555789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 1556789Sahrens * extra rules: you can't attach to it after it's been created, and upon 1557789Sahrens * completion of resilvering, the first disk (the one being replaced) 1558789Sahrens * is automatically detached. 1559789Sahrens */ 1560789Sahrens int 15611544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 1562789Sahrens { 1563789Sahrens uint64_t txg, open_txg; 1564789Sahrens int error; 1565789Sahrens vdev_t *rvd = spa->spa_root_vdev; 1566789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 15672082Seschrock vdev_ops_t *pvops; 1568789Sahrens 1569789Sahrens txg = spa_vdev_enter(spa); 1570789Sahrens 15711544Seschrock oldvd = vdev_lookup_by_guid(rvd, guid); 1572789Sahrens 1573789Sahrens if (oldvd == NULL) 1574789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 1575789Sahrens 15761585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 15771585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 15781585Sbonwick 1579789Sahrens pvd = oldvd->vdev_parent; 1580789Sahrens 15812082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 15822082Seschrock VDEV_ALLOC_ADD)) != 0 || newrootvd->vdev_children != 1) 1583789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 1584789Sahrens 1585789Sahrens newvd = newrootvd->vdev_child[0]; 1586789Sahrens 1587789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 1588789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 1589789Sahrens 15902082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 1591789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 1592789Sahrens 15932082Seschrock if (!replacing) { 15942082Seschrock /* 15952082Seschrock * For attach, the only allowable parent is a mirror or the root 15962082Seschrock * vdev. 15972082Seschrock */ 15982082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 15992082Seschrock pvd->vdev_ops != &vdev_root_ops) 16002082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 16012082Seschrock 16022082Seschrock pvops = &vdev_mirror_ops; 16032082Seschrock } else { 16042082Seschrock /* 16052082Seschrock * Active hot spares can only be replaced by inactive hot 16062082Seschrock * spares. 16072082Seschrock */ 16082082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 16092082Seschrock pvd->vdev_child[1] == oldvd && 16102082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 16112082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 16122082Seschrock 16132082Seschrock /* 16142082Seschrock * If the source is a hot spare, and the parent isn't already a 16152082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 16162082Seschrock * want to create a replacing vdev. 16172082Seschrock */ 16182082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 16192082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 16202082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 16212082Seschrock newvd->vdev_isspare) 16222082Seschrock pvops = &vdev_spare_ops; 16232082Seschrock else 16242082Seschrock pvops = &vdev_replacing_ops; 16252082Seschrock } 16262082Seschrock 16271175Slling /* 16281175Slling * Compare the new device size with the replaceable/attachable 16291175Slling * device size. 16301175Slling */ 16311175Slling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 1632789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 1633789Sahrens 16341732Sbonwick /* 16351732Sbonwick * The new device cannot have a higher alignment requirement 16361732Sbonwick * than the top-level vdev. 16371732Sbonwick */ 16381732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 1639789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 1640789Sahrens 1641789Sahrens /* 1642789Sahrens * If this is an in-place replacement, update oldvd's path and devid 1643789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 1644789Sahrens */ 1645789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 1646789Sahrens spa_strfree(oldvd->vdev_path); 1647789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 1648789Sahrens KM_SLEEP); 1649789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 1650789Sahrens newvd->vdev_path, "old"); 1651789Sahrens if (oldvd->vdev_devid != NULL) { 1652789Sahrens spa_strfree(oldvd->vdev_devid); 1653789Sahrens oldvd->vdev_devid = NULL; 1654789Sahrens } 1655789Sahrens } 1656789Sahrens 1657789Sahrens /* 16582082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 16592082Seschrock * mirror/replacing/spare vdev above oldvd. 1660789Sahrens */ 1661789Sahrens if (pvd->vdev_ops != pvops) 1662789Sahrens pvd = vdev_add_parent(oldvd, pvops); 1663789Sahrens 1664789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 1665789Sahrens ASSERT(pvd->vdev_ops == pvops); 1666789Sahrens ASSERT(oldvd->vdev_parent == pvd); 1667789Sahrens 1668789Sahrens /* 1669789Sahrens * Extract the new device from its root and add it to pvd. 1670789Sahrens */ 1671789Sahrens vdev_remove_child(newrootvd, newvd); 1672789Sahrens newvd->vdev_id = pvd->vdev_children; 1673789Sahrens vdev_add_child(pvd, newvd); 1674789Sahrens 16751544Seschrock /* 16761544Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 16771544Seschrock * the addition of newvd may have decreased our parent's asize. 16781544Seschrock */ 16791544Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 16801544Seschrock 1681789Sahrens tvd = newvd->vdev_top; 1682789Sahrens ASSERT(pvd->vdev_top == tvd); 1683789Sahrens ASSERT(tvd->vdev_parent == rvd); 1684789Sahrens 1685789Sahrens vdev_config_dirty(tvd); 1686789Sahrens 1687789Sahrens /* 1688789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 1689789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 1690789Sahrens */ 1691789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 1692789Sahrens 1693789Sahrens mutex_enter(&newvd->vdev_dtl_lock); 1694789Sahrens space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, 1695789Sahrens open_txg - TXG_INITIAL + 1); 1696789Sahrens mutex_exit(&newvd->vdev_dtl_lock); 1697789Sahrens 16981544Seschrock dprintf("attached %s in txg %llu\n", newvd->vdev_path, txg); 16991544Seschrock 1700789Sahrens /* 1701789Sahrens * Mark newvd's DTL dirty in this txg. 1702789Sahrens */ 17031732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 1704789Sahrens 1705789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 1706789Sahrens 1707789Sahrens /* 1708789Sahrens * Kick off a resilver to update newvd. 1709789Sahrens */ 1710789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 1711789Sahrens 1712789Sahrens return (0); 1713789Sahrens } 1714789Sahrens 1715789Sahrens /* 1716789Sahrens * Detach a device from a mirror or replacing vdev. 1717789Sahrens * If 'replace_done' is specified, only detach if the parent 1718789Sahrens * is a replacing vdev. 1719789Sahrens */ 1720789Sahrens int 17211544Seschrock spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) 1722789Sahrens { 1723789Sahrens uint64_t txg; 1724789Sahrens int c, t, error; 1725789Sahrens vdev_t *rvd = spa->spa_root_vdev; 1726789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 17272082Seschrock boolean_t unspare = B_FALSE; 17282082Seschrock uint64_t unspare_guid; 1729789Sahrens 1730789Sahrens txg = spa_vdev_enter(spa); 1731789Sahrens 17321544Seschrock vd = vdev_lookup_by_guid(rvd, guid); 1733789Sahrens 1734789Sahrens if (vd == NULL) 1735789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 1736789Sahrens 17371585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 17381585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 17391585Sbonwick 1740789Sahrens pvd = vd->vdev_parent; 1741789Sahrens 1742789Sahrens /* 1743789Sahrens * If replace_done is specified, only remove this device if it's 17442082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 17452082Seschrock * disk can be removed. 1746789Sahrens */ 17472082Seschrock if (replace_done) { 17482082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 17492082Seschrock if (vd->vdev_id != 0) 17502082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 17512082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 17522082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 17532082Seschrock } 17542082Seschrock } 17552082Seschrock 17562082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 17572082Seschrock spa_version(spa) >= ZFS_VERSION_SPARES); 1758789Sahrens 1759789Sahrens /* 17602082Seschrock * Only mirror, replacing, and spare vdevs support detach. 1761789Sahrens */ 1762789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 17632082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 17642082Seschrock pvd->vdev_ops != &vdev_spare_ops) 1765789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 1766789Sahrens 1767789Sahrens /* 1768789Sahrens * If there's only one replica, you can't detach it. 1769789Sahrens */ 1770789Sahrens if (pvd->vdev_children <= 1) 1771789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 1772789Sahrens 1773789Sahrens /* 1774789Sahrens * If all siblings have non-empty DTLs, this device may have the only 1775789Sahrens * valid copy of the data, which means we cannot safely detach it. 1776789Sahrens * 1777789Sahrens * XXX -- as in the vdev_offline() case, we really want a more 1778789Sahrens * precise DTL check. 1779789Sahrens */ 1780789Sahrens for (c = 0; c < pvd->vdev_children; c++) { 1781789Sahrens uint64_t dirty; 1782789Sahrens 1783789Sahrens cvd = pvd->vdev_child[c]; 1784789Sahrens if (cvd == vd) 1785789Sahrens continue; 1786789Sahrens if (vdev_is_dead(cvd)) 1787789Sahrens continue; 1788789Sahrens mutex_enter(&cvd->vdev_dtl_lock); 1789789Sahrens dirty = cvd->vdev_dtl_map.sm_space | 1790789Sahrens cvd->vdev_dtl_scrub.sm_space; 1791789Sahrens mutex_exit(&cvd->vdev_dtl_lock); 1792789Sahrens if (!dirty) 1793789Sahrens break; 1794789Sahrens } 17952082Seschrock 17962082Seschrock /* 17972082Seschrock * If we are a replacing or spare vdev, then we can always detach the 17982082Seschrock * latter child, as that is how one cancels the operation. 17992082Seschrock */ 18002082Seschrock if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && 18012082Seschrock c == pvd->vdev_children) 1802789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 1803789Sahrens 1804789Sahrens /* 18052082Seschrock * If we are detaching the original disk from a spare, then it implies 18062082Seschrock * that the spare should become a real disk, and be removed from the 18072082Seschrock * active spare list for the pool. 18082082Seschrock */ 18092082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 18102082Seschrock vd->vdev_id == 0) 18112082Seschrock unspare = B_TRUE; 18122082Seschrock 18132082Seschrock /* 1814789Sahrens * Erase the disk labels so the disk can be used for other things. 1815789Sahrens * This must be done after all other error cases are handled, 1816789Sahrens * but before we disembowel vd (so we can still do I/O to it). 1817789Sahrens * But if we can't do it, don't treat the error as fatal -- 1818789Sahrens * it may be that the unwritability of the disk is the reason 1819789Sahrens * it's being detached! 1820789Sahrens */ 18212082Seschrock error = vdev_label_init(vd, 0, B_FALSE); 1822789Sahrens if (error) 1823789Sahrens dprintf("unable to erase labels on %s\n", vdev_description(vd)); 1824789Sahrens 1825789Sahrens /* 1826789Sahrens * Remove vd from its parent and compact the parent's children. 1827789Sahrens */ 1828789Sahrens vdev_remove_child(pvd, vd); 1829789Sahrens vdev_compact_children(pvd); 1830789Sahrens 1831789Sahrens /* 1832789Sahrens * Remember one of the remaining children so we can get tvd below. 1833789Sahrens */ 1834789Sahrens cvd = pvd->vdev_child[0]; 1835789Sahrens 1836789Sahrens /* 18372082Seschrock * If we need to remove the remaining child from the list of hot spares, 18382082Seschrock * do it now, marking the vdev as no longer a spare in the process. We 18392082Seschrock * must do this before vdev_remove_parent(), because that can change the 18402082Seschrock * GUID if it creates a new toplevel GUID. 18412082Seschrock */ 18422082Seschrock if (unspare) { 18432082Seschrock ASSERT(cvd->vdev_isspare); 18442082Seschrock spa_spare_remove(cvd->vdev_guid); 18452082Seschrock cvd->vdev_isspare = B_FALSE; 18462082Seschrock unspare_guid = cvd->vdev_guid; 18472082Seschrock } 18482082Seschrock 18492082Seschrock /* 1850789Sahrens * If the parent mirror/replacing vdev only has one child, 1851789Sahrens * the parent is no longer needed. Remove it from the tree. 1852789Sahrens */ 1853789Sahrens if (pvd->vdev_children == 1) 1854789Sahrens vdev_remove_parent(cvd); 1855789Sahrens 1856789Sahrens /* 1857789Sahrens * We don't set tvd until now because the parent we just removed 1858789Sahrens * may have been the previous top-level vdev. 1859789Sahrens */ 1860789Sahrens tvd = cvd->vdev_top; 1861789Sahrens ASSERT(tvd->vdev_parent == rvd); 1862789Sahrens 1863789Sahrens /* 1864789Sahrens * Reopen this top-level vdev to reassess health after detach. 1865789Sahrens */ 18661544Seschrock vdev_reopen(tvd); 1867789Sahrens 1868789Sahrens /* 1869789Sahrens * If the device we just detached was smaller than the others, 18701732Sbonwick * it may be possible to add metaslabs (i.e. grow the pool). 18711732Sbonwick * vdev_metaslab_init() can't fail because the existing metaslabs 18721732Sbonwick * are already in core, so there's nothing to read from disk. 1873789Sahrens */ 18741732Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 1875789Sahrens 1876789Sahrens vdev_config_dirty(tvd); 1877789Sahrens 1878789Sahrens /* 1879789Sahrens * Mark vd's DTL as dirty in this txg. 1880789Sahrens * vdev_dtl_sync() will see that vd->vdev_detached is set 1881789Sahrens * and free vd's DTL object in syncing context. 1882789Sahrens * But first make sure we're not on any *other* txg's DTL list, 1883789Sahrens * to prevent vd from being accessed after it's freed. 1884789Sahrens */ 1885789Sahrens for (t = 0; t < TXG_SIZE; t++) 1886789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 18871732Sbonwick vd->vdev_detached = B_TRUE; 18881732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 1889789Sahrens 18901544Seschrock dprintf("detached %s in txg %llu\n", vd->vdev_path, txg); 1891789Sahrens 18922082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 18932082Seschrock 18942082Seschrock /* 18952082Seschrock * If we are supposed to remove the given vdev from the list of spares, 18962082Seschrock * iterate over all pools in the system and replace it if it's present. 18972082Seschrock */ 18982082Seschrock if (unspare) { 18992082Seschrock spa = NULL; 19002082Seschrock mutex_enter(&spa_namespace_lock); 19012082Seschrock while ((spa = spa_next(spa)) != NULL) { 19022082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 19032082Seschrock continue; 19042082Seschrock 19052082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 19062082Seschrock } 19072082Seschrock mutex_exit(&spa_namespace_lock); 19082082Seschrock } 19092082Seschrock 19102082Seschrock return (error); 19112082Seschrock } 19122082Seschrock 19132082Seschrock /* 19142082Seschrock * Remove a device from the pool. Currently, this supports removing only hot 19152082Seschrock * spares. 19162082Seschrock */ 19172082Seschrock int 19182082Seschrock spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 19192082Seschrock { 19202082Seschrock vdev_t *vd; 19212082Seschrock nvlist_t **spares, *nv, **newspares; 19222082Seschrock uint_t i, j, nspares; 19232082Seschrock int ret = 0; 19242082Seschrock 19252082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 19262082Seschrock 19272082Seschrock vd = spa_lookup_by_guid(spa, guid); 19282082Seschrock 19292082Seschrock nv = NULL; 19302082Seschrock if (spa->spa_spares != NULL && 19312082Seschrock nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 19322082Seschrock &spares, &nspares) == 0) { 19332082Seschrock for (i = 0; i < nspares; i++) { 19342082Seschrock uint64_t theguid; 19352082Seschrock 19362082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 19372082Seschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 19382082Seschrock if (theguid == guid) { 19392082Seschrock nv = spares[i]; 19402082Seschrock break; 19412082Seschrock } 19422082Seschrock } 19432082Seschrock } 19442082Seschrock 19452082Seschrock /* 19462082Seschrock * We only support removing a hot spare, and only if it's not currently 19472082Seschrock * in use in this pool. 19482082Seschrock */ 19492082Seschrock if (nv == NULL && vd == NULL) { 19502082Seschrock ret = ENOENT; 19512082Seschrock goto out; 19522082Seschrock } 19532082Seschrock 19542082Seschrock if (nv == NULL && vd != NULL) { 19552082Seschrock ret = ENOTSUP; 19562082Seschrock goto out; 19572082Seschrock } 19582082Seschrock 19592082Seschrock if (!unspare && nv != NULL && vd != NULL) { 19602082Seschrock ret = EBUSY; 19612082Seschrock goto out; 19622082Seschrock } 19632082Seschrock 19642082Seschrock if (nspares == 1) { 19652082Seschrock newspares = NULL; 19662082Seschrock } else { 19672082Seschrock newspares = kmem_alloc((nspares - 1) * sizeof (void *), 19682082Seschrock KM_SLEEP); 19692082Seschrock for (i = 0, j = 0; i < nspares; i++) { 19702082Seschrock if (spares[i] != nv) 19712082Seschrock VERIFY(nvlist_dup(spares[i], 19722082Seschrock &newspares[j++], KM_SLEEP) == 0); 19732082Seschrock } 19742082Seschrock } 19752082Seschrock 19762082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 19772082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 19782082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 19792082Seschrock newspares, nspares - 1) == 0); 19802082Seschrock for (i = 0; i < nspares - 1; i++) 19812082Seschrock nvlist_free(newspares[i]); 19822082Seschrock kmem_free(newspares, (nspares - 1) * sizeof (void *)); 19832082Seschrock spa_load_spares(spa); 19842082Seschrock spa->spa_sync_spares = B_TRUE; 19852082Seschrock 19862082Seschrock out: 19872082Seschrock spa_config_exit(spa, FTAG); 19882082Seschrock 19892082Seschrock return (ret); 1990789Sahrens } 1991789Sahrens 1992789Sahrens /* 19931544Seschrock * Find any device that's done replacing, so we can detach it. 1994789Sahrens */ 19951544Seschrock static vdev_t * 19961544Seschrock spa_vdev_replace_done_hunt(vdev_t *vd) 1997789Sahrens { 19981544Seschrock vdev_t *newvd, *oldvd; 1999789Sahrens int c; 2000789Sahrens 20011544Seschrock for (c = 0; c < vd->vdev_children; c++) { 20021544Seschrock oldvd = spa_vdev_replace_done_hunt(vd->vdev_child[c]); 20031544Seschrock if (oldvd != NULL) 20041544Seschrock return (oldvd); 20051544Seschrock } 2006789Sahrens 2007789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 20081544Seschrock oldvd = vd->vdev_child[0]; 20091544Seschrock newvd = vd->vdev_child[1]; 2010789Sahrens 20111544Seschrock mutex_enter(&newvd->vdev_dtl_lock); 20121544Seschrock if (newvd->vdev_dtl_map.sm_space == 0 && 20131544Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 20141544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 20151544Seschrock return (oldvd); 20161544Seschrock } 20171544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 20181544Seschrock } 2019789Sahrens 20201544Seschrock return (NULL); 2021789Sahrens } 2022789Sahrens 20231544Seschrock static void 2024789Sahrens spa_vdev_replace_done(spa_t *spa) 2025789Sahrens { 20261544Seschrock vdev_t *vd; 20272082Seschrock vdev_t *pvd; 20281544Seschrock uint64_t guid; 20292082Seschrock uint64_t pguid = 0; 2030789Sahrens 20311544Seschrock spa_config_enter(spa, RW_READER, FTAG); 2032789Sahrens 20331544Seschrock while ((vd = spa_vdev_replace_done_hunt(spa->spa_root_vdev)) != NULL) { 20341544Seschrock guid = vd->vdev_guid; 20352082Seschrock /* 20362082Seschrock * If we have just finished replacing a hot spared device, then 20372082Seschrock * we need to detach the parent's first child (the original hot 20382082Seschrock * spare) as well. 20392082Seschrock */ 20402082Seschrock pvd = vd->vdev_parent; 20412082Seschrock if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && 20422082Seschrock pvd->vdev_id == 0) { 20432082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 20442082Seschrock ASSERT(pvd->vdev_parent->vdev_children == 2); 20452082Seschrock pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; 20462082Seschrock } 20471544Seschrock spa_config_exit(spa, FTAG); 20481544Seschrock if (spa_vdev_detach(spa, guid, B_TRUE) != 0) 20491544Seschrock return; 20502082Seschrock if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) 20512082Seschrock return; 20521544Seschrock spa_config_enter(spa, RW_READER, FTAG); 2053789Sahrens } 2054789Sahrens 20551544Seschrock spa_config_exit(spa, FTAG); 2056789Sahrens } 2057789Sahrens 2058789Sahrens /* 20591354Seschrock * Update the stored path for this vdev. Dirty the vdev configuration, relying 20601354Seschrock * on spa_vdev_enter/exit() to synchronize the labels and cache. 20611354Seschrock */ 20621354Seschrock int 20631354Seschrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 20641354Seschrock { 20651354Seschrock vdev_t *rvd, *vd; 20661354Seschrock uint64_t txg; 20671354Seschrock 20681354Seschrock rvd = spa->spa_root_vdev; 20691354Seschrock 20701354Seschrock txg = spa_vdev_enter(spa); 20711354Seschrock 20722082Seschrock if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 20732082Seschrock /* 20742082Seschrock * Determine if this is a reference to a hot spare. In that 20752082Seschrock * case, update the path as stored in the spare list. 20762082Seschrock */ 20772082Seschrock nvlist_t **spares; 20782082Seschrock uint_t i, nspares; 20792082Seschrock if (spa->spa_sparelist != NULL) { 20802082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 20812082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 20822082Seschrock for (i = 0; i < nspares; i++) { 20832082Seschrock uint64_t theguid; 20842082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 20852082Seschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 20862082Seschrock if (theguid == guid) 20872082Seschrock break; 20882082Seschrock } 20892082Seschrock 20902082Seschrock if (i == nspares) 20912082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 20922082Seschrock 20932082Seschrock VERIFY(nvlist_add_string(spares[i], 20942082Seschrock ZPOOL_CONFIG_PATH, newpath) == 0); 20952082Seschrock spa_load_spares(spa); 20962082Seschrock spa->spa_sync_spares = B_TRUE; 20972082Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 20982082Seschrock } else { 20992082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 21002082Seschrock } 21012082Seschrock } 21021354Seschrock 21031585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 21041585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 21051585Sbonwick 21061354Seschrock spa_strfree(vd->vdev_path); 21071354Seschrock vd->vdev_path = spa_strdup(newpath); 21081354Seschrock 21091354Seschrock vdev_config_dirty(vd->vdev_top); 21101354Seschrock 21111354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 21121354Seschrock } 21131354Seschrock 21141354Seschrock /* 2115789Sahrens * ========================================================================== 2116789Sahrens * SPA Scrubbing 2117789Sahrens * ========================================================================== 2118789Sahrens */ 2119789Sahrens 21201544Seschrock void 21211544Seschrock spa_scrub_throttle(spa_t *spa, int direction) 21221544Seschrock { 21231544Seschrock mutex_enter(&spa->spa_scrub_lock); 21241544Seschrock spa->spa_scrub_throttled += direction; 21251544Seschrock ASSERT(spa->spa_scrub_throttled >= 0); 21261544Seschrock if (spa->spa_scrub_throttled == 0) 21271544Seschrock cv_broadcast(&spa->spa_scrub_io_cv); 21281544Seschrock mutex_exit(&spa->spa_scrub_lock); 21291544Seschrock } 2130789Sahrens 2131789Sahrens static void 2132789Sahrens spa_scrub_io_done(zio_t *zio) 2133789Sahrens { 2134789Sahrens spa_t *spa = zio->io_spa; 2135789Sahrens 2136*3290Sjohansen zio_data_buf_free(zio->io_data, zio->io_size); 2137789Sahrens 2138789Sahrens mutex_enter(&spa->spa_scrub_lock); 21391544Seschrock if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 21401775Sbillm vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; 2141789Sahrens spa->spa_scrub_errors++; 2142789Sahrens mutex_enter(&vd->vdev_stat_lock); 2143789Sahrens vd->vdev_stat.vs_scrub_errors++; 2144789Sahrens mutex_exit(&vd->vdev_stat_lock); 2145789Sahrens } 21461544Seschrock if (--spa->spa_scrub_inflight == 0) { 21471544Seschrock cv_broadcast(&spa->spa_scrub_io_cv); 21481544Seschrock ASSERT(spa->spa_scrub_throttled == 0); 21491544Seschrock } 21501544Seschrock mutex_exit(&spa->spa_scrub_lock); 2151789Sahrens } 2152789Sahrens 2153789Sahrens static void 21541544Seschrock spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, 21551544Seschrock zbookmark_t *zb) 2156789Sahrens { 2157789Sahrens size_t size = BP_GET_LSIZE(bp); 2158*3290Sjohansen void *data = zio_data_buf_alloc(size); 2159789Sahrens 2160789Sahrens mutex_enter(&spa->spa_scrub_lock); 2161789Sahrens spa->spa_scrub_inflight++; 2162789Sahrens mutex_exit(&spa->spa_scrub_lock); 2163789Sahrens 21641544Seschrock if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) 21651544Seschrock flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ 21661544Seschrock 21671807Sbonwick flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; 21681544Seschrock 2169789Sahrens zio_nowait(zio_read(NULL, spa, bp, data, size, 21701544Seschrock spa_scrub_io_done, NULL, priority, flags, zb)); 2171789Sahrens } 2172789Sahrens 2173789Sahrens /* ARGSUSED */ 2174789Sahrens static int 2175789Sahrens spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 2176789Sahrens { 2177789Sahrens blkptr_t *bp = &bc->bc_blkptr; 21781775Sbillm vdev_t *vd = spa->spa_root_vdev; 21791775Sbillm dva_t *dva = bp->blk_dva; 21801775Sbillm int needs_resilver = B_FALSE; 21811775Sbillm int d; 2182789Sahrens 21831775Sbillm if (bc->bc_errno) { 2184789Sahrens /* 2185789Sahrens * We can't scrub this block, but we can continue to scrub 2186789Sahrens * the rest of the pool. Note the error and move along. 2187789Sahrens */ 2188789Sahrens mutex_enter(&spa->spa_scrub_lock); 2189789Sahrens spa->spa_scrub_errors++; 2190789Sahrens mutex_exit(&spa->spa_scrub_lock); 2191789Sahrens 21921775Sbillm mutex_enter(&vd->vdev_stat_lock); 21931775Sbillm vd->vdev_stat.vs_scrub_errors++; 21941775Sbillm mutex_exit(&vd->vdev_stat_lock); 2195789Sahrens 2196789Sahrens return (ERESTART); 2197789Sahrens } 2198789Sahrens 2199789Sahrens ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); 2200789Sahrens 22011775Sbillm for (d = 0; d < BP_GET_NDVAS(bp); d++) { 22021775Sbillm vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); 22031775Sbillm 22041775Sbillm ASSERT(vd != NULL); 22051775Sbillm 22061775Sbillm /* 22071775Sbillm * Keep track of how much data we've examined so that 22081775Sbillm * zpool(1M) status can make useful progress reports. 22091775Sbillm */ 22101775Sbillm mutex_enter(&vd->vdev_stat_lock); 22111775Sbillm vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); 22121775Sbillm mutex_exit(&vd->vdev_stat_lock); 2213789Sahrens 22141775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { 22151775Sbillm if (DVA_GET_GANG(&dva[d])) { 22161775Sbillm /* 22171775Sbillm * Gang members may be spread across multiple 22181775Sbillm * vdevs, so the best we can do is look at the 22191775Sbillm * pool-wide DTL. 22201775Sbillm * XXX -- it would be better to change our 22211775Sbillm * allocation policy to ensure that this can't 22221775Sbillm * happen. 22231775Sbillm */ 22241775Sbillm vd = spa->spa_root_vdev; 22251775Sbillm } 22261775Sbillm if (vdev_dtl_contains(&vd->vdev_dtl_map, 22271775Sbillm bp->blk_birth, 1)) 22281775Sbillm needs_resilver = B_TRUE; 2229789Sahrens } 22301775Sbillm } 22311775Sbillm 22321775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) 2233789Sahrens spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, 22341544Seschrock ZIO_FLAG_SCRUB, &bc->bc_bookmark); 22351775Sbillm else if (needs_resilver) 22361775Sbillm spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, 22371775Sbillm ZIO_FLAG_RESILVER, &bc->bc_bookmark); 2238789Sahrens 2239789Sahrens return (0); 2240789Sahrens } 2241789Sahrens 2242789Sahrens static void 2243789Sahrens spa_scrub_thread(spa_t *spa) 2244789Sahrens { 2245789Sahrens callb_cpr_t cprinfo; 2246789Sahrens traverse_handle_t *th = spa->spa_scrub_th; 2247789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2248789Sahrens pool_scrub_type_t scrub_type = spa->spa_scrub_type; 2249789Sahrens int error = 0; 2250789Sahrens boolean_t complete; 2251789Sahrens 2252789Sahrens CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); 2253789Sahrens 2254797Sbonwick /* 2255797Sbonwick * If we're restarting due to a snapshot create/delete, 2256797Sbonwick * wait for that to complete. 2257797Sbonwick */ 2258797Sbonwick txg_wait_synced(spa_get_dsl(spa), 0); 2259797Sbonwick 22601544Seschrock dprintf("start %s mintxg=%llu maxtxg=%llu\n", 22611544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 22621544Seschrock spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); 22631544Seschrock 22641544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 22651544Seschrock vdev_reopen(rvd); /* purge all vdev caches */ 2266789Sahrens vdev_config_dirty(rvd); /* rewrite all disk labels */ 2267789Sahrens vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); 22681544Seschrock spa_config_exit(spa, FTAG); 2269789Sahrens 2270789Sahrens mutex_enter(&spa->spa_scrub_lock); 2271789Sahrens spa->spa_scrub_errors = 0; 2272789Sahrens spa->spa_scrub_active = 1; 22731544Seschrock ASSERT(spa->spa_scrub_inflight == 0); 22741544Seschrock ASSERT(spa->spa_scrub_throttled == 0); 2275789Sahrens 2276789Sahrens while (!spa->spa_scrub_stop) { 2277789Sahrens CALLB_CPR_SAFE_BEGIN(&cprinfo); 22781544Seschrock while (spa->spa_scrub_suspended) { 2279789Sahrens spa->spa_scrub_active = 0; 2280789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2281789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 2282789Sahrens spa->spa_scrub_active = 1; 2283789Sahrens } 2284789Sahrens CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); 2285789Sahrens 2286789Sahrens if (spa->spa_scrub_restart_txg != 0) 2287789Sahrens break; 2288789Sahrens 2289789Sahrens mutex_exit(&spa->spa_scrub_lock); 2290789Sahrens error = traverse_more(th); 2291789Sahrens mutex_enter(&spa->spa_scrub_lock); 2292789Sahrens if (error != EAGAIN) 2293789Sahrens break; 22941544Seschrock 22951544Seschrock while (spa->spa_scrub_throttled > 0) 22961544Seschrock cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 2297789Sahrens } 2298789Sahrens 2299789Sahrens while (spa->spa_scrub_inflight) 2300789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 2301789Sahrens 23021601Sbonwick spa->spa_scrub_active = 0; 23031601Sbonwick cv_broadcast(&spa->spa_scrub_cv); 23041601Sbonwick 23051601Sbonwick mutex_exit(&spa->spa_scrub_lock); 23061601Sbonwick 23071601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 23081601Sbonwick 23091601Sbonwick mutex_enter(&spa->spa_scrub_lock); 23101601Sbonwick 23111601Sbonwick /* 23121601Sbonwick * Note: we check spa_scrub_restart_txg under both spa_scrub_lock 23131601Sbonwick * AND the spa config lock to synchronize with any config changes 23141601Sbonwick * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). 23151601Sbonwick */ 2316789Sahrens if (spa->spa_scrub_restart_txg != 0) 2317789Sahrens error = ERESTART; 2318789Sahrens 23191544Seschrock if (spa->spa_scrub_stop) 23201544Seschrock error = EINTR; 23211544Seschrock 2322789Sahrens /* 23231544Seschrock * Even if there were uncorrectable errors, we consider the scrub 23241544Seschrock * completed. The downside is that if there is a transient error during 23251544Seschrock * a resilver, we won't resilver the data properly to the target. But 23261544Seschrock * if the damage is permanent (more likely) we will resilver forever, 23271544Seschrock * which isn't really acceptable. Since there is enough information for 23281544Seschrock * the user to know what has failed and why, this seems like a more 23291544Seschrock * tractable approach. 2330789Sahrens */ 23311544Seschrock complete = (error == 0); 2332789Sahrens 23331544Seschrock dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", 23341544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 2335789Sahrens spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", 2336789Sahrens error, spa->spa_scrub_errors, spa->spa_scrub_stop); 2337789Sahrens 2338789Sahrens mutex_exit(&spa->spa_scrub_lock); 2339789Sahrens 2340789Sahrens /* 2341789Sahrens * If the scrub/resilver completed, update all DTLs to reflect this. 2342789Sahrens * Whether it succeeded or not, vacate all temporary scrub DTLs. 2343789Sahrens */ 2344789Sahrens vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, 2345789Sahrens complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); 2346789Sahrens vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); 23471544Seschrock spa_errlog_rotate(spa); 23481601Sbonwick 23491544Seschrock spa_config_exit(spa, FTAG); 2350789Sahrens 2351789Sahrens mutex_enter(&spa->spa_scrub_lock); 2352789Sahrens 23531544Seschrock /* 23541544Seschrock * We may have finished replacing a device. 23551544Seschrock * Let the async thread assess this and handle the detach. 23561544Seschrock */ 23571544Seschrock spa_async_request(spa, SPA_ASYNC_REPLACE_DONE); 2358789Sahrens 2359789Sahrens /* 2360789Sahrens * If we were told to restart, our final act is to start a new scrub. 2361789Sahrens */ 2362789Sahrens if (error == ERESTART) 23631544Seschrock spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? 23641544Seschrock SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); 2365789Sahrens 23661544Seschrock spa->spa_scrub_type = POOL_SCRUB_NONE; 23671544Seschrock spa->spa_scrub_active = 0; 23681544Seschrock spa->spa_scrub_thread = NULL; 23691544Seschrock cv_broadcast(&spa->spa_scrub_cv); 2370789Sahrens CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ 2371789Sahrens thread_exit(); 2372789Sahrens } 2373789Sahrens 2374789Sahrens void 2375789Sahrens spa_scrub_suspend(spa_t *spa) 2376789Sahrens { 2377789Sahrens mutex_enter(&spa->spa_scrub_lock); 23781544Seschrock spa->spa_scrub_suspended++; 2379789Sahrens while (spa->spa_scrub_active) { 2380789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2381789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 2382789Sahrens } 2383789Sahrens while (spa->spa_scrub_inflight) 2384789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 2385789Sahrens mutex_exit(&spa->spa_scrub_lock); 2386789Sahrens } 2387789Sahrens 2388789Sahrens void 2389789Sahrens spa_scrub_resume(spa_t *spa) 2390789Sahrens { 2391789Sahrens mutex_enter(&spa->spa_scrub_lock); 23921544Seschrock ASSERT(spa->spa_scrub_suspended != 0); 23931544Seschrock if (--spa->spa_scrub_suspended == 0) 2394789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2395789Sahrens mutex_exit(&spa->spa_scrub_lock); 2396789Sahrens } 2397789Sahrens 2398789Sahrens void 2399789Sahrens spa_scrub_restart(spa_t *spa, uint64_t txg) 2400789Sahrens { 2401789Sahrens /* 2402789Sahrens * Something happened (e.g. snapshot create/delete) that means 2403789Sahrens * we must restart any in-progress scrubs. The itinerary will 2404789Sahrens * fix this properly. 2405789Sahrens */ 2406789Sahrens mutex_enter(&spa->spa_scrub_lock); 2407789Sahrens spa->spa_scrub_restart_txg = txg; 2408789Sahrens mutex_exit(&spa->spa_scrub_lock); 2409789Sahrens } 2410789Sahrens 24111544Seschrock int 24121544Seschrock spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) 2413789Sahrens { 2414789Sahrens space_seg_t *ss; 2415789Sahrens uint64_t mintxg, maxtxg; 2416789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2417789Sahrens 2418789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 2419789Sahrens return (ENOTSUP); 2420789Sahrens 24211544Seschrock mutex_enter(&spa->spa_scrub_lock); 24221544Seschrock 2423789Sahrens /* 2424789Sahrens * If there's a scrub or resilver already in progress, stop it. 2425789Sahrens */ 2426789Sahrens while (spa->spa_scrub_thread != NULL) { 2427789Sahrens /* 2428789Sahrens * Don't stop a resilver unless forced. 2429789Sahrens */ 24301544Seschrock if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { 24311544Seschrock mutex_exit(&spa->spa_scrub_lock); 2432789Sahrens return (EBUSY); 24331544Seschrock } 2434789Sahrens spa->spa_scrub_stop = 1; 2435789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2436789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 2437789Sahrens } 2438789Sahrens 2439789Sahrens /* 2440789Sahrens * Terminate the previous traverse. 2441789Sahrens */ 2442789Sahrens if (spa->spa_scrub_th != NULL) { 2443789Sahrens traverse_fini(spa->spa_scrub_th); 2444789Sahrens spa->spa_scrub_th = NULL; 2445789Sahrens } 2446789Sahrens 24471544Seschrock if (rvd == NULL) { 24481544Seschrock ASSERT(spa->spa_scrub_stop == 0); 24491544Seschrock ASSERT(spa->spa_scrub_type == type); 24501544Seschrock ASSERT(spa->spa_scrub_restart_txg == 0); 24511544Seschrock mutex_exit(&spa->spa_scrub_lock); 24521544Seschrock return (0); 24531544Seschrock } 2454789Sahrens 2455789Sahrens mintxg = TXG_INITIAL - 1; 2456789Sahrens maxtxg = spa_last_synced_txg(spa) + 1; 2457789Sahrens 24581544Seschrock mutex_enter(&rvd->vdev_dtl_lock); 2459789Sahrens 24601544Seschrock if (rvd->vdev_dtl_map.sm_space == 0) { 24611544Seschrock /* 24621544Seschrock * The pool-wide DTL is empty. 24631732Sbonwick * If this is a resilver, there's nothing to do except 24641732Sbonwick * check whether any in-progress replacements have completed. 24651544Seschrock */ 24661732Sbonwick if (type == POOL_SCRUB_RESILVER) { 24671544Seschrock type = POOL_SCRUB_NONE; 24681732Sbonwick spa_async_request(spa, SPA_ASYNC_REPLACE_DONE); 24691732Sbonwick } 24701544Seschrock } else { 24711544Seschrock /* 24721544Seschrock * The pool-wide DTL is non-empty. 24731544Seschrock * If this is a normal scrub, upgrade to a resilver instead. 24741544Seschrock */ 24751544Seschrock if (type == POOL_SCRUB_EVERYTHING) 24761544Seschrock type = POOL_SCRUB_RESILVER; 24771544Seschrock } 2478789Sahrens 24791544Seschrock if (type == POOL_SCRUB_RESILVER) { 2480789Sahrens /* 2481789Sahrens * Determine the resilvering boundaries. 2482789Sahrens * 2483789Sahrens * Note: (mintxg, maxtxg) is an open interval, 2484789Sahrens * i.e. mintxg and maxtxg themselves are not included. 2485789Sahrens * 2486789Sahrens * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 2487789Sahrens * so we don't claim to resilver a txg that's still changing. 2488789Sahrens */ 2489789Sahrens ss = avl_first(&rvd->vdev_dtl_map.sm_root); 24901544Seschrock mintxg = ss->ss_start - 1; 2491789Sahrens ss = avl_last(&rvd->vdev_dtl_map.sm_root); 24921544Seschrock maxtxg = MIN(ss->ss_end, maxtxg); 2493789Sahrens } 2494789Sahrens 24951544Seschrock mutex_exit(&rvd->vdev_dtl_lock); 24961544Seschrock 24971544Seschrock spa->spa_scrub_stop = 0; 24981544Seschrock spa->spa_scrub_type = type; 24991544Seschrock spa->spa_scrub_restart_txg = 0; 25001544Seschrock 25011544Seschrock if (type != POOL_SCRUB_NONE) { 25021544Seschrock spa->spa_scrub_mintxg = mintxg; 2503789Sahrens spa->spa_scrub_maxtxg = maxtxg; 2504789Sahrens spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, 25051635Sbonwick ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, 25061635Sbonwick ZIO_FLAG_CANFAIL); 2507789Sahrens traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); 2508789Sahrens spa->spa_scrub_thread = thread_create(NULL, 0, 2509789Sahrens spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); 2510789Sahrens } 2511789Sahrens 25121544Seschrock mutex_exit(&spa->spa_scrub_lock); 25131544Seschrock 2514789Sahrens return (0); 2515789Sahrens } 2516789Sahrens 25171544Seschrock /* 25181544Seschrock * ========================================================================== 25191544Seschrock * SPA async task processing 25201544Seschrock * ========================================================================== 25211544Seschrock */ 25221544Seschrock 25231544Seschrock static void 25241544Seschrock spa_async_reopen(spa_t *spa) 2525789Sahrens { 25261544Seschrock vdev_t *rvd = spa->spa_root_vdev; 25271544Seschrock vdev_t *tvd; 25281544Seschrock int c; 25291544Seschrock 25301544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 25311544Seschrock 25321544Seschrock for (c = 0; c < rvd->vdev_children; c++) { 25331544Seschrock tvd = rvd->vdev_child[c]; 25341544Seschrock if (tvd->vdev_reopen_wanted) { 25351544Seschrock tvd->vdev_reopen_wanted = 0; 25361544Seschrock vdev_reopen(tvd); 25371544Seschrock } 25381544Seschrock } 2539789Sahrens 25401544Seschrock spa_config_exit(spa, FTAG); 25411544Seschrock } 25421544Seschrock 25431544Seschrock static void 25441544Seschrock spa_async_thread(spa_t *spa) 25451544Seschrock { 25461544Seschrock int tasks; 25471544Seschrock 25481544Seschrock ASSERT(spa->spa_sync_on); 2549789Sahrens 25501544Seschrock mutex_enter(&spa->spa_async_lock); 25511544Seschrock tasks = spa->spa_async_tasks; 25521544Seschrock spa->spa_async_tasks = 0; 25531544Seschrock mutex_exit(&spa->spa_async_lock); 25541544Seschrock 25551544Seschrock /* 25561635Sbonwick * See if the config needs to be updated. 25571635Sbonwick */ 25581635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 25591635Sbonwick mutex_enter(&spa_namespace_lock); 25601635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 25611635Sbonwick mutex_exit(&spa_namespace_lock); 25621635Sbonwick } 25631635Sbonwick 25641635Sbonwick /* 25651544Seschrock * See if any devices need to be reopened. 25661544Seschrock */ 25671544Seschrock if (tasks & SPA_ASYNC_REOPEN) 25681544Seschrock spa_async_reopen(spa); 25691544Seschrock 25701544Seschrock /* 25711544Seschrock * If any devices are done replacing, detach them. 25721544Seschrock */ 25731544Seschrock if (tasks & SPA_ASYNC_REPLACE_DONE) 2574789Sahrens spa_vdev_replace_done(spa); 2575789Sahrens 25761544Seschrock /* 25771544Seschrock * Kick off a scrub. 25781544Seschrock */ 25791544Seschrock if (tasks & SPA_ASYNC_SCRUB) 25801544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); 25811544Seschrock 25821544Seschrock /* 25831544Seschrock * Kick off a resilver. 25841544Seschrock */ 25851544Seschrock if (tasks & SPA_ASYNC_RESILVER) 25861544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 25871544Seschrock 25881544Seschrock /* 25891544Seschrock * Let the world know that we're done. 25901544Seschrock */ 25911544Seschrock mutex_enter(&spa->spa_async_lock); 25921544Seschrock spa->spa_async_thread = NULL; 25931544Seschrock cv_broadcast(&spa->spa_async_cv); 25941544Seschrock mutex_exit(&spa->spa_async_lock); 25951544Seschrock thread_exit(); 25961544Seschrock } 25971544Seschrock 25981544Seschrock void 25991544Seschrock spa_async_suspend(spa_t *spa) 26001544Seschrock { 26011544Seschrock mutex_enter(&spa->spa_async_lock); 26021544Seschrock spa->spa_async_suspended++; 26031544Seschrock while (spa->spa_async_thread != NULL) 26041544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 26051544Seschrock mutex_exit(&spa->spa_async_lock); 26061544Seschrock } 26071544Seschrock 26081544Seschrock void 26091544Seschrock spa_async_resume(spa_t *spa) 26101544Seschrock { 26111544Seschrock mutex_enter(&spa->spa_async_lock); 26121544Seschrock ASSERT(spa->spa_async_suspended != 0); 26131544Seschrock spa->spa_async_suspended--; 26141544Seschrock mutex_exit(&spa->spa_async_lock); 26151544Seschrock } 26161544Seschrock 26171544Seschrock static void 26181544Seschrock spa_async_dispatch(spa_t *spa) 26191544Seschrock { 26201544Seschrock mutex_enter(&spa->spa_async_lock); 26211544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 26221635Sbonwick spa->spa_async_thread == NULL && 26231635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 26241544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 26251544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 26261544Seschrock mutex_exit(&spa->spa_async_lock); 26271544Seschrock } 26281544Seschrock 26291544Seschrock void 26301544Seschrock spa_async_request(spa_t *spa, int task) 26311544Seschrock { 26321544Seschrock mutex_enter(&spa->spa_async_lock); 26331544Seschrock spa->spa_async_tasks |= task; 26341544Seschrock mutex_exit(&spa->spa_async_lock); 2635789Sahrens } 2636789Sahrens 2637789Sahrens /* 2638789Sahrens * ========================================================================== 2639789Sahrens * SPA syncing routines 2640789Sahrens * ========================================================================== 2641789Sahrens */ 2642789Sahrens 2643789Sahrens static void 2644789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 2645789Sahrens { 2646789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 2647789Sahrens dmu_tx_t *tx; 2648789Sahrens blkptr_t blk; 2649789Sahrens uint64_t itor = 0; 2650789Sahrens zio_t *zio; 2651789Sahrens int error; 2652789Sahrens uint8_t c = 1; 2653789Sahrens 2654789Sahrens zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); 2655789Sahrens 2656789Sahrens while (bplist_iterate(bpl, &itor, &blk) == 0) 2657789Sahrens zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); 2658789Sahrens 2659789Sahrens error = zio_wait(zio); 2660789Sahrens ASSERT3U(error, ==, 0); 2661789Sahrens 2662789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 2663789Sahrens bplist_vacate(bpl, tx); 2664789Sahrens 2665789Sahrens /* 2666789Sahrens * Pre-dirty the first block so we sync to convergence faster. 2667789Sahrens * (Usually only the first block is needed.) 2668789Sahrens */ 2669789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 2670789Sahrens dmu_tx_commit(tx); 2671789Sahrens } 2672789Sahrens 2673789Sahrens static void 26742082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 26752082Seschrock { 26762082Seschrock char *packed = NULL; 26772082Seschrock size_t nvsize = 0; 26782082Seschrock dmu_buf_t *db; 26792082Seschrock 26802082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 26812082Seschrock 26822082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 26832082Seschrock 26842082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 26852082Seschrock KM_SLEEP) == 0); 26862082Seschrock 26872082Seschrock dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); 26882082Seschrock 26892082Seschrock kmem_free(packed, nvsize); 26902082Seschrock 26912082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 26922082Seschrock dmu_buf_will_dirty(db, tx); 26932082Seschrock *(uint64_t *)db->db_data = nvsize; 26942082Seschrock dmu_buf_rele(db, FTAG); 26952082Seschrock } 26962082Seschrock 26972082Seschrock static void 26982082Seschrock spa_sync_spares(spa_t *spa, dmu_tx_t *tx) 26992082Seschrock { 27002082Seschrock nvlist_t *nvroot; 27012082Seschrock nvlist_t **spares; 27022082Seschrock int i; 27032082Seschrock 27042082Seschrock if (!spa->spa_sync_spares) 27052082Seschrock return; 27062082Seschrock 27072082Seschrock /* 27082082Seschrock * Update the MOS nvlist describing the list of available spares. 27092082Seschrock * spa_validate_spares() will have already made sure this nvlist is 27102082Seschrock * valid and the vdevs are labelled appropriately. 27112082Seschrock */ 27122082Seschrock if (spa->spa_spares_object == 0) { 27132082Seschrock spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset, 27142082Seschrock DMU_OT_PACKED_NVLIST, 1 << 14, 27152082Seschrock DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 27162082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 27172082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES, 27182082Seschrock sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0); 27192082Seschrock } 27202082Seschrock 27212082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 27222082Seschrock if (spa->spa_nspares == 0) { 27232082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 27242082Seschrock NULL, 0) == 0); 27252082Seschrock } else { 27262082Seschrock spares = kmem_alloc(spa->spa_nspares * sizeof (void *), 27272082Seschrock KM_SLEEP); 27282082Seschrock for (i = 0; i < spa->spa_nspares; i++) 27292082Seschrock spares[i] = vdev_config_generate(spa, 27302082Seschrock spa->spa_spares[i], B_FALSE, B_TRUE); 27312082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 27322082Seschrock spares, spa->spa_nspares) == 0); 27332082Seschrock for (i = 0; i < spa->spa_nspares; i++) 27342082Seschrock nvlist_free(spares[i]); 27352082Seschrock kmem_free(spares, spa->spa_nspares * sizeof (void *)); 27362082Seschrock } 27372082Seschrock 27382082Seschrock spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx); 27392926Sek110237 nvlist_free(nvroot); 27402082Seschrock 27412082Seschrock spa->spa_sync_spares = B_FALSE; 27422082Seschrock } 27432082Seschrock 27442082Seschrock static void 2745789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 2746789Sahrens { 2747789Sahrens nvlist_t *config; 2748789Sahrens 2749789Sahrens if (list_is_empty(&spa->spa_dirty_list)) 2750789Sahrens return; 2751789Sahrens 2752789Sahrens config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); 2753789Sahrens 27541635Sbonwick if (spa->spa_config_syncing) 27551635Sbonwick nvlist_free(spa->spa_config_syncing); 27561635Sbonwick spa->spa_config_syncing = config; 2757789Sahrens 27582082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 2759789Sahrens } 2760789Sahrens 2761789Sahrens /* 2762789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 2763789Sahrens * part of the process, so we iterate until it converges. 2764789Sahrens */ 2765789Sahrens void 2766789Sahrens spa_sync(spa_t *spa, uint64_t txg) 2767789Sahrens { 2768789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 2769789Sahrens objset_t *mos = spa->spa_meta_objset; 2770789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 27711635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 2772789Sahrens vdev_t *vd; 2773789Sahrens dmu_tx_t *tx; 2774789Sahrens int dirty_vdevs; 2775789Sahrens 2776789Sahrens /* 2777789Sahrens * Lock out configuration changes. 2778789Sahrens */ 27791544Seschrock spa_config_enter(spa, RW_READER, FTAG); 2780789Sahrens 2781789Sahrens spa->spa_syncing_txg = txg; 2782789Sahrens spa->spa_sync_pass = 0; 2783789Sahrens 27841544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 2785789Sahrens 27862082Seschrock tx = dmu_tx_create_assigned(dp, txg); 27872082Seschrock 27882082Seschrock /* 27892082Seschrock * If we are upgrading to ZFS_VERSION_RAIDZ_DEFLATE this txg, 27902082Seschrock * set spa_deflate if we have no raid-z vdevs. 27912082Seschrock */ 27922082Seschrock if (spa->spa_ubsync.ub_version < ZFS_VERSION_RAIDZ_DEFLATE && 27932082Seschrock spa->spa_uberblock.ub_version >= ZFS_VERSION_RAIDZ_DEFLATE) { 27942082Seschrock int i; 27952082Seschrock 27962082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 27972082Seschrock vd = rvd->vdev_child[i]; 27982082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 27992082Seschrock break; 28002082Seschrock } 28012082Seschrock if (i == rvd->vdev_children) { 28022082Seschrock spa->spa_deflate = TRUE; 28032082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 28042082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 28052082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 28062082Seschrock } 28072082Seschrock } 28082082Seschrock 2809789Sahrens /* 2810789Sahrens * If anything has changed in this txg, push the deferred frees 2811789Sahrens * from the previous txg. If not, leave them alone so that we 2812789Sahrens * don't generate work on an otherwise idle system. 2813789Sahrens */ 2814789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 28152329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 28162329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 2817789Sahrens spa_sync_deferred_frees(spa, txg); 2818789Sahrens 2819789Sahrens /* 2820789Sahrens * Iterate to convergence. 2821789Sahrens */ 2822789Sahrens do { 2823789Sahrens spa->spa_sync_pass++; 2824789Sahrens 2825789Sahrens spa_sync_config_object(spa, tx); 28262082Seschrock spa_sync_spares(spa, tx); 28271544Seschrock spa_errlog_sync(spa, txg); 2828789Sahrens dsl_pool_sync(dp, txg); 2829789Sahrens 2830789Sahrens dirty_vdevs = 0; 2831789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 2832789Sahrens vdev_sync(vd, txg); 2833789Sahrens dirty_vdevs++; 2834789Sahrens } 2835789Sahrens 2836789Sahrens bplist_sync(bpl, tx); 2837789Sahrens } while (dirty_vdevs); 2838789Sahrens 2839789Sahrens bplist_close(bpl); 2840789Sahrens 2841789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 2842789Sahrens 2843789Sahrens /* 2844789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 2845789Sahrens * to commit the transaction group. 28461635Sbonwick * 28471635Sbonwick * If there are any dirty vdevs, sync the uberblock to all vdevs. 28481635Sbonwick * Otherwise, pick a random top-level vdev that's known to be 28491635Sbonwick * visible in the config cache (see spa_vdev_add() for details). 28501635Sbonwick * If the write fails, try the next vdev until we're tried them all. 2851789Sahrens */ 28521635Sbonwick if (!list_is_empty(&spa->spa_dirty_list)) { 28531635Sbonwick VERIFY(vdev_config_sync(rvd, txg) == 0); 28541635Sbonwick } else { 28551635Sbonwick int children = rvd->vdev_children; 28561635Sbonwick int c0 = spa_get_random(children); 28571635Sbonwick int c; 28581635Sbonwick 28591635Sbonwick for (c = 0; c < children; c++) { 28601635Sbonwick vd = rvd->vdev_child[(c0 + c) % children]; 28611635Sbonwick if (vd->vdev_ms_array == 0) 28621635Sbonwick continue; 28631635Sbonwick if (vdev_config_sync(vd, txg) == 0) 28641635Sbonwick break; 28651635Sbonwick } 28661635Sbonwick if (c == children) 28671635Sbonwick VERIFY(vdev_config_sync(rvd, txg) == 0); 28681635Sbonwick } 28691635Sbonwick 28702082Seschrock dmu_tx_commit(tx); 28712082Seschrock 28721635Sbonwick /* 28731635Sbonwick * Clear the dirty config list. 28741635Sbonwick */ 28751635Sbonwick while ((vd = list_head(&spa->spa_dirty_list)) != NULL) 28761635Sbonwick vdev_config_clean(vd); 28771635Sbonwick 28781635Sbonwick /* 28791635Sbonwick * Now that the new config has synced transactionally, 28801635Sbonwick * let it become visible to the config cache. 28811635Sbonwick */ 28821635Sbonwick if (spa->spa_config_syncing != NULL) { 28831635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 28841635Sbonwick spa->spa_config_txg = txg; 28851635Sbonwick spa->spa_config_syncing = NULL; 28861635Sbonwick } 2887789Sahrens 2888789Sahrens /* 2889789Sahrens * Make a stable copy of the fully synced uberblock. 2890789Sahrens * We use this as the root for pool traversals. 2891789Sahrens */ 2892789Sahrens spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ 2893789Sahrens 2894789Sahrens spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ 2895789Sahrens 2896789Sahrens rw_enter(&spa->spa_traverse_lock, RW_WRITER); 2897789Sahrens spa->spa_traverse_wanted = 0; 2898789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2899789Sahrens rw_exit(&spa->spa_traverse_lock); 2900789Sahrens 2901789Sahrens spa_scrub_resume(spa); /* resume scrub with new ubsync */ 2902789Sahrens 2903789Sahrens /* 2904789Sahrens * Clean up the ZIL records for the synced txg. 2905789Sahrens */ 2906789Sahrens dsl_pool_zil_clean(dp); 2907789Sahrens 2908789Sahrens /* 2909789Sahrens * Update usable space statistics. 2910789Sahrens */ 2911789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 2912789Sahrens vdev_sync_done(vd, txg); 2913789Sahrens 2914789Sahrens /* 2915789Sahrens * It had better be the case that we didn't dirty anything 29162082Seschrock * since vdev_config_sync(). 2917789Sahrens */ 2918789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 2919789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 2920789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 2921789Sahrens ASSERT(bpl->bpl_queue == NULL); 2922789Sahrens 29231544Seschrock spa_config_exit(spa, FTAG); 29241544Seschrock 29251544Seschrock /* 29261544Seschrock * If any async tasks have been requested, kick them off. 29271544Seschrock */ 29281544Seschrock spa_async_dispatch(spa); 2929789Sahrens } 2930789Sahrens 2931789Sahrens /* 2932789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 2933789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 2934789Sahrens * sync. 2935789Sahrens */ 2936789Sahrens void 2937789Sahrens spa_sync_allpools(void) 2938789Sahrens { 2939789Sahrens spa_t *spa = NULL; 2940789Sahrens mutex_enter(&spa_namespace_lock); 2941789Sahrens while ((spa = spa_next(spa)) != NULL) { 2942789Sahrens if (spa_state(spa) != POOL_STATE_ACTIVE) 2943789Sahrens continue; 2944789Sahrens spa_open_ref(spa, FTAG); 2945789Sahrens mutex_exit(&spa_namespace_lock); 2946789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 2947789Sahrens mutex_enter(&spa_namespace_lock); 2948789Sahrens spa_close(spa, FTAG); 2949789Sahrens } 2950789Sahrens mutex_exit(&spa_namespace_lock); 2951789Sahrens } 2952789Sahrens 2953789Sahrens /* 2954789Sahrens * ========================================================================== 2955789Sahrens * Miscellaneous routines 2956789Sahrens * ========================================================================== 2957789Sahrens */ 2958789Sahrens 2959789Sahrens /* 2960789Sahrens * Remove all pools in the system. 2961789Sahrens */ 2962789Sahrens void 2963789Sahrens spa_evict_all(void) 2964789Sahrens { 2965789Sahrens spa_t *spa; 2966789Sahrens 2967789Sahrens /* 2968789Sahrens * Remove all cached state. All pools should be closed now, 2969789Sahrens * so every spa in the AVL tree should be unreferenced. 2970789Sahrens */ 2971789Sahrens mutex_enter(&spa_namespace_lock); 2972789Sahrens while ((spa = spa_next(NULL)) != NULL) { 2973789Sahrens /* 29741544Seschrock * Stop async tasks. The async thread may need to detach 29751544Seschrock * a device that's been replaced, which requires grabbing 29761544Seschrock * spa_namespace_lock, so we must drop it here. 2977789Sahrens */ 2978789Sahrens spa_open_ref(spa, FTAG); 2979789Sahrens mutex_exit(&spa_namespace_lock); 29801544Seschrock spa_async_suspend(spa); 2981789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 2982789Sahrens mutex_enter(&spa_namespace_lock); 2983789Sahrens spa_close(spa, FTAG); 2984789Sahrens 2985789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2986789Sahrens spa_unload(spa); 2987789Sahrens spa_deactivate(spa); 2988789Sahrens } 2989789Sahrens spa_remove(spa); 2990789Sahrens } 2991789Sahrens mutex_exit(&spa_namespace_lock); 2992789Sahrens } 29931544Seschrock 29941544Seschrock vdev_t * 29951544Seschrock spa_lookup_by_guid(spa_t *spa, uint64_t guid) 29961544Seschrock { 29971544Seschrock return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); 29981544Seschrock } 29991760Seschrock 30001760Seschrock void 30011760Seschrock spa_upgrade(spa_t *spa) 30021760Seschrock { 30031760Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 30041760Seschrock 30051760Seschrock /* 30061760Seschrock * This should only be called for a non-faulted pool, and since a 30071760Seschrock * future version would result in an unopenable pool, this shouldn't be 30081760Seschrock * possible. 30091760Seschrock */ 30101760Seschrock ASSERT(spa->spa_uberblock.ub_version <= ZFS_VERSION); 30111760Seschrock 30121760Seschrock spa->spa_uberblock.ub_version = ZFS_VERSION; 30131760Seschrock vdev_config_dirty(spa->spa_root_vdev); 30141760Seschrock 30151760Seschrock spa_config_exit(spa, FTAG); 30162082Seschrock 30172082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 30181760Seschrock } 30192082Seschrock 30202082Seschrock boolean_t 30212082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 30222082Seschrock { 30232082Seschrock int i; 30242082Seschrock 30252082Seschrock for (i = 0; i < spa->spa_nspares; i++) 30262082Seschrock if (spa->spa_spares[i]->vdev_guid == guid) 30272082Seschrock return (B_TRUE); 30282082Seschrock 30292082Seschrock return (B_FALSE); 30302082Seschrock } 3031