1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 233377Seschrock * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28789Sahrens 29789Sahrens /* 30789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 31789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 32789Sahrens * pool. 33789Sahrens */ 34789Sahrens 35789Sahrens #include <sys/zfs_context.h> 361544Seschrock #include <sys/fm/fs/zfs.h> 37789Sahrens #include <sys/spa_impl.h> 38789Sahrens #include <sys/zio.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zio_compress.h> 41789Sahrens #include <sys/dmu.h> 42789Sahrens #include <sys/dmu_tx.h> 43789Sahrens #include <sys/zap.h> 44789Sahrens #include <sys/zil.h> 45789Sahrens #include <sys/vdev_impl.h> 46789Sahrens #include <sys/metaslab.h> 47789Sahrens #include <sys/uberblock_impl.h> 48789Sahrens #include <sys/txg.h> 49789Sahrens #include <sys/avl.h> 50789Sahrens #include <sys/dmu_traverse.h> 513912Slling #include <sys/dmu_objset.h> 52789Sahrens #include <sys/unique.h> 53789Sahrens #include <sys/dsl_pool.h> 543912Slling #include <sys/dsl_dataset.h> 55789Sahrens #include <sys/dsl_dir.h> 56789Sahrens #include <sys/dsl_prop.h> 573912Slling #include <sys/dsl_synctask.h> 58789Sahrens #include <sys/fs/zfs.h> 59789Sahrens #include <sys/callb.h> 603975Sek110237 #include <sys/systeminfo.h> 613975Sek110237 #include <sys/sunddi.h> 62789Sahrens 632986Sek110237 int zio_taskq_threads = 8; 642986Sek110237 65789Sahrens /* 66789Sahrens * ========================================================================== 67789Sahrens * SPA state manipulation (open/create/destroy/import/export) 68789Sahrens * ========================================================================== 69789Sahrens */ 70789Sahrens 711544Seschrock static int 721544Seschrock spa_error_entry_compare(const void *a, const void *b) 731544Seschrock { 741544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 751544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 761544Seschrock int ret; 771544Seschrock 781544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 791544Seschrock sizeof (zbookmark_t)); 801544Seschrock 811544Seschrock if (ret < 0) 821544Seschrock return (-1); 831544Seschrock else if (ret > 0) 841544Seschrock return (1); 851544Seschrock else 861544Seschrock return (0); 871544Seschrock } 881544Seschrock 891544Seschrock /* 901544Seschrock * Utility function which retrieves copies of the current logs and 911544Seschrock * re-initializes them in the process. 921544Seschrock */ 931544Seschrock void 941544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 951544Seschrock { 961544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 971544Seschrock 981544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 991544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 1001544Seschrock 1011544Seschrock avl_create(&spa->spa_errlist_scrub, 1021544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1031544Seschrock offsetof(spa_error_entry_t, se_avl)); 1041544Seschrock avl_create(&spa->spa_errlist_last, 1051544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1061544Seschrock offsetof(spa_error_entry_t, se_avl)); 1071544Seschrock } 1081544Seschrock 109789Sahrens /* 110789Sahrens * Activate an uninitialized pool. 111789Sahrens */ 112789Sahrens static void 113789Sahrens spa_activate(spa_t *spa) 114789Sahrens { 115789Sahrens int t; 116789Sahrens 117789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 118789Sahrens 119789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 120789Sahrens 121789Sahrens spa->spa_normal_class = metaslab_class_create(); 122789Sahrens 123789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 124789Sahrens spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", 1252986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 126789Sahrens TASKQ_PREPOPULATE); 127789Sahrens spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", 1282986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 129789Sahrens TASKQ_PREPOPULATE); 130789Sahrens } 131789Sahrens 132789Sahrens rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL); 133789Sahrens 1342856Snd150628 mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); 1352856Snd150628 mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL); 1362856Snd150628 mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); 1372856Snd150628 mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); 1382856Snd150628 mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); 1392856Snd150628 mutex_init(&spa->spa_config_lock.scl_lock, NULL, MUTEX_DEFAULT, NULL); 1402856Snd150628 mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL); 1412926Sek110237 mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); 1423912Slling mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); 1432856Snd150628 144789Sahrens list_create(&spa->spa_dirty_list, sizeof (vdev_t), 145789Sahrens offsetof(vdev_t, vdev_dirty_node)); 146789Sahrens 147789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 148789Sahrens offsetof(struct vdev, vdev_txg_node)); 1491544Seschrock 1501544Seschrock avl_create(&spa->spa_errlist_scrub, 1511544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1521544Seschrock offsetof(spa_error_entry_t, se_avl)); 1531544Seschrock avl_create(&spa->spa_errlist_last, 1541544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1551544Seschrock offsetof(spa_error_entry_t, se_avl)); 156789Sahrens } 157789Sahrens 158789Sahrens /* 159789Sahrens * Opposite of spa_activate(). 160789Sahrens */ 161789Sahrens static void 162789Sahrens spa_deactivate(spa_t *spa) 163789Sahrens { 164789Sahrens int t; 165789Sahrens 166789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 167789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 168789Sahrens ASSERT(spa->spa_root_vdev == NULL); 169789Sahrens 170789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 171789Sahrens 172789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 173789Sahrens 174789Sahrens list_destroy(&spa->spa_dirty_list); 175789Sahrens 176789Sahrens rw_destroy(&spa->spa_traverse_lock); 177789Sahrens 178789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 179789Sahrens taskq_destroy(spa->spa_zio_issue_taskq[t]); 180789Sahrens taskq_destroy(spa->spa_zio_intr_taskq[t]); 181789Sahrens spa->spa_zio_issue_taskq[t] = NULL; 182789Sahrens spa->spa_zio_intr_taskq[t] = NULL; 183789Sahrens } 184789Sahrens 185789Sahrens metaslab_class_destroy(spa->spa_normal_class); 186789Sahrens spa->spa_normal_class = NULL; 187789Sahrens 1881544Seschrock /* 1891544Seschrock * If this was part of an import or the open otherwise failed, we may 1901544Seschrock * still have errors left in the queues. Empty them just in case. 1911544Seschrock */ 1921544Seschrock spa_errlog_drain(spa); 1931544Seschrock 1941544Seschrock avl_destroy(&spa->spa_errlist_scrub); 1951544Seschrock avl_destroy(&spa->spa_errlist_last); 1961544Seschrock 197789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 198789Sahrens } 199789Sahrens 200789Sahrens /* 201789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 202789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 203789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 204789Sahrens * All vdev validation is done by the vdev_alloc() routine. 205789Sahrens */ 2062082Seschrock static int 2072082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 2082082Seschrock uint_t id, int atype) 209789Sahrens { 210789Sahrens nvlist_t **child; 211789Sahrens uint_t c, children; 2122082Seschrock int error; 2132082Seschrock 2142082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 2152082Seschrock return (error); 2162082Seschrock 2172082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 2182082Seschrock return (0); 219789Sahrens 220789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 221789Sahrens &child, &children) != 0) { 2222082Seschrock vdev_free(*vdp); 2232082Seschrock *vdp = NULL; 2242082Seschrock return (EINVAL); 225789Sahrens } 226789Sahrens 227789Sahrens for (c = 0; c < children; c++) { 2282082Seschrock vdev_t *vd; 2292082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 2302082Seschrock atype)) != 0) { 2312082Seschrock vdev_free(*vdp); 2322082Seschrock *vdp = NULL; 2332082Seschrock return (error); 234789Sahrens } 235789Sahrens } 236789Sahrens 2372082Seschrock ASSERT(*vdp != NULL); 2382082Seschrock 2392082Seschrock return (0); 240789Sahrens } 241789Sahrens 242789Sahrens /* 243789Sahrens * Opposite of spa_load(). 244789Sahrens */ 245789Sahrens static void 246789Sahrens spa_unload(spa_t *spa) 247789Sahrens { 2482082Seschrock int i; 2492082Seschrock 250789Sahrens /* 2511544Seschrock * Stop async tasks. 2521544Seschrock */ 2531544Seschrock spa_async_suspend(spa); 2541544Seschrock 2551544Seschrock /* 256789Sahrens * Stop syncing. 257789Sahrens */ 258789Sahrens if (spa->spa_sync_on) { 259789Sahrens txg_sync_stop(spa->spa_dsl_pool); 260789Sahrens spa->spa_sync_on = B_FALSE; 261789Sahrens } 262789Sahrens 263789Sahrens /* 264789Sahrens * Wait for any outstanding prefetch I/O to complete. 265789Sahrens */ 2661544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 2671544Seschrock spa_config_exit(spa, FTAG); 268789Sahrens 269789Sahrens /* 270789Sahrens * Close the dsl pool. 271789Sahrens */ 272789Sahrens if (spa->spa_dsl_pool) { 273789Sahrens dsl_pool_close(spa->spa_dsl_pool); 274789Sahrens spa->spa_dsl_pool = NULL; 275789Sahrens } 276789Sahrens 277789Sahrens /* 278789Sahrens * Close all vdevs. 279789Sahrens */ 2801585Sbonwick if (spa->spa_root_vdev) 281789Sahrens vdev_free(spa->spa_root_vdev); 2821585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 2831544Seschrock 2842082Seschrock for (i = 0; i < spa->spa_nspares; i++) 2852082Seschrock vdev_free(spa->spa_spares[i]); 2862082Seschrock if (spa->spa_spares) { 2872082Seschrock kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); 2882082Seschrock spa->spa_spares = NULL; 2892082Seschrock } 2902082Seschrock if (spa->spa_sparelist) { 2912082Seschrock nvlist_free(spa->spa_sparelist); 2922082Seschrock spa->spa_sparelist = NULL; 2932082Seschrock } 2942082Seschrock 2951544Seschrock spa->spa_async_suspended = 0; 296789Sahrens } 297789Sahrens 298789Sahrens /* 2992082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 3002082Seschrock * this pool. When this is called, we have some form of basic information in 3012082Seschrock * 'spa_sparelist'. We parse this into vdevs, try to open them, and then 3022082Seschrock * re-generate a more complete list including status information. 3032082Seschrock */ 3042082Seschrock static void 3052082Seschrock spa_load_spares(spa_t *spa) 3062082Seschrock { 3072082Seschrock nvlist_t **spares; 3082082Seschrock uint_t nspares; 3092082Seschrock int i; 3103377Seschrock vdev_t *vd, *tvd; 3112082Seschrock 3122082Seschrock /* 3132082Seschrock * First, close and free any existing spare vdevs. 3142082Seschrock */ 3152082Seschrock for (i = 0; i < spa->spa_nspares; i++) { 3163377Seschrock vd = spa->spa_spares[i]; 3173377Seschrock 3183377Seschrock /* Undo the call to spa_activate() below */ 3193377Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && 3203377Seschrock tvd->vdev_isspare) 3213377Seschrock spa_spare_remove(tvd); 3223377Seschrock vdev_close(vd); 3233377Seschrock vdev_free(vd); 3242082Seschrock } 3253377Seschrock 3262082Seschrock if (spa->spa_spares) 3272082Seschrock kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); 3282082Seschrock 3292082Seschrock if (spa->spa_sparelist == NULL) 3302082Seschrock nspares = 0; 3312082Seschrock else 3322082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 3332082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 3342082Seschrock 3352082Seschrock spa->spa_nspares = (int)nspares; 3362082Seschrock spa->spa_spares = NULL; 3372082Seschrock 3382082Seschrock if (nspares == 0) 3392082Seschrock return; 3402082Seschrock 3412082Seschrock /* 3422082Seschrock * Construct the array of vdevs, opening them to get status in the 3433377Seschrock * process. For each spare, there is potentially two different vdev_t 3443377Seschrock * structures associated with it: one in the list of spares (used only 3453377Seschrock * for basic validation purposes) and one in the active vdev 3463377Seschrock * configuration (if it's spared in). During this phase we open and 3473377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 3483377Seschrock * active configuration, then we also mark this vdev as an active spare. 3492082Seschrock */ 3502082Seschrock spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); 3512082Seschrock for (i = 0; i < spa->spa_nspares; i++) { 3522082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 3532082Seschrock VDEV_ALLOC_SPARE) == 0); 3542082Seschrock ASSERT(vd != NULL); 3552082Seschrock 3562082Seschrock spa->spa_spares[i] = vd; 3572082Seschrock 3583377Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { 3593377Seschrock if (!tvd->vdev_isspare) 3603377Seschrock spa_spare_add(tvd); 3613377Seschrock 3623377Seschrock /* 3633377Seschrock * We only mark the spare active if we were successfully 3643377Seschrock * able to load the vdev. Otherwise, importing a pool 3653377Seschrock * with a bad active spare would result in strange 3663377Seschrock * behavior, because multiple pool would think the spare 3673377Seschrock * is actively in use. 3683377Seschrock * 3693377Seschrock * There is a vulnerability here to an equally bizarre 3703377Seschrock * circumstance, where a dead active spare is later 3713377Seschrock * brought back to life (onlined or otherwise). Given 3723377Seschrock * the rarity of this scenario, and the extra complexity 3733377Seschrock * it adds, we ignore the possibility. 3743377Seschrock */ 3753377Seschrock if (!vdev_is_dead(tvd)) 3763377Seschrock spa_spare_activate(tvd); 3773377Seschrock } 3783377Seschrock 3792082Seschrock if (vdev_open(vd) != 0) 3802082Seschrock continue; 3812082Seschrock 3822082Seschrock vd->vdev_top = vd; 3832082Seschrock (void) vdev_validate_spare(vd); 3842082Seschrock } 3852082Seschrock 3862082Seschrock /* 3872082Seschrock * Recompute the stashed list of spares, with status information 3882082Seschrock * this time. 3892082Seschrock */ 3902082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 3912082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 3922082Seschrock 3932082Seschrock spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP); 3942082Seschrock for (i = 0; i < spa->spa_nspares; i++) 3952082Seschrock spares[i] = vdev_config_generate(spa, spa->spa_spares[i], 3962082Seschrock B_TRUE, B_TRUE); 3972082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 3982082Seschrock spares, spa->spa_nspares) == 0); 3992082Seschrock for (i = 0; i < spa->spa_nspares; i++) 4002082Seschrock nvlist_free(spares[i]); 4012082Seschrock kmem_free(spares, spa->spa_nspares * sizeof (void *)); 4022082Seschrock } 4032082Seschrock 4042082Seschrock static int 4052082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 4062082Seschrock { 4072082Seschrock dmu_buf_t *db; 4082082Seschrock char *packed = NULL; 4092082Seschrock size_t nvsize = 0; 4102082Seschrock int error; 4112082Seschrock *value = NULL; 4122082Seschrock 4132082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 4142082Seschrock nvsize = *(uint64_t *)db->db_data; 4152082Seschrock dmu_buf_rele(db, FTAG); 4162082Seschrock 4172082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 4182082Seschrock error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 4192082Seschrock if (error == 0) 4202082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 4212082Seschrock kmem_free(packed, nvsize); 4222082Seschrock 4232082Seschrock return (error); 4242082Seschrock } 4252082Seschrock 4262082Seschrock /* 427*4451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 428*4451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 429*4451Seschrock */ 430*4451Seschrock static void 431*4451Seschrock spa_check_removed(vdev_t *vd) 432*4451Seschrock { 433*4451Seschrock int c; 434*4451Seschrock 435*4451Seschrock for (c = 0; c < vd->vdev_children; c++) 436*4451Seschrock spa_check_removed(vd->vdev_child[c]); 437*4451Seschrock 438*4451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 439*4451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 440*4451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 441*4451Seschrock } 442*4451Seschrock } 443*4451Seschrock 444*4451Seschrock /* 445789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 4461544Seschrock * source of configuration information. 447789Sahrens */ 448789Sahrens static int 4491544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 450789Sahrens { 451789Sahrens int error = 0; 452789Sahrens nvlist_t *nvroot = NULL; 453789Sahrens vdev_t *rvd; 454789Sahrens uberblock_t *ub = &spa->spa_uberblock; 4551635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 456789Sahrens uint64_t pool_guid; 4572082Seschrock uint64_t version; 458789Sahrens zio_t *zio; 459*4451Seschrock uint64_t autoreplace = 0; 460789Sahrens 4611544Seschrock spa->spa_load_state = state; 4621635Sbonwick 463789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 4641733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 4651544Seschrock error = EINVAL; 4661544Seschrock goto out; 4671544Seschrock } 468789Sahrens 4692082Seschrock /* 4702082Seschrock * Versioning wasn't explicitly added to the label until later, so if 4712082Seschrock * it's not present treat it as the initial version. 4722082Seschrock */ 4732082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 4742082Seschrock version = ZFS_VERSION_INITIAL; 4752082Seschrock 4761733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 4771733Sbonwick &spa->spa_config_txg); 4781733Sbonwick 4791635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 4801544Seschrock spa_guid_exists(pool_guid, 0)) { 4811544Seschrock error = EEXIST; 4821544Seschrock goto out; 4831544Seschrock } 484789Sahrens 4852174Seschrock spa->spa_load_guid = pool_guid; 4862174Seschrock 487789Sahrens /* 4882082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 4892082Seschrock * value that will be returned by spa_version() since parsing the 4902082Seschrock * configuration requires knowing the version number. 491789Sahrens */ 4921544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 4932082Seschrock spa->spa_ubsync.ub_version = version; 4942082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 4951544Seschrock spa_config_exit(spa, FTAG); 496789Sahrens 4972082Seschrock if (error != 0) 4981544Seschrock goto out; 499789Sahrens 5001585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 501789Sahrens ASSERT(spa_guid(spa) == pool_guid); 502789Sahrens 503789Sahrens /* 504789Sahrens * Try to open all vdevs, loading each label in the process. 505789Sahrens */ 5064070Smc142369 error = vdev_open(rvd); 5074070Smc142369 if (error != 0) 5081544Seschrock goto out; 509789Sahrens 510789Sahrens /* 5111986Seschrock * Validate the labels for all leaf vdevs. We need to grab the config 5121986Seschrock * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD 5131986Seschrock * flag. 5141986Seschrock */ 5151986Seschrock spa_config_enter(spa, RW_READER, FTAG); 5161986Seschrock error = vdev_validate(rvd); 5171986Seschrock spa_config_exit(spa, FTAG); 5181986Seschrock 5194070Smc142369 if (error != 0) 5201986Seschrock goto out; 5211986Seschrock 5221986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 5231986Seschrock error = ENXIO; 5241986Seschrock goto out; 5251986Seschrock } 5261986Seschrock 5271986Seschrock /* 528789Sahrens * Find the best uberblock. 529789Sahrens */ 530789Sahrens bzero(ub, sizeof (uberblock_t)); 531789Sahrens 532789Sahrens zio = zio_root(spa, NULL, NULL, 533789Sahrens ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 534789Sahrens vdev_uberblock_load(zio, rvd, ub); 535789Sahrens error = zio_wait(zio); 536789Sahrens 537789Sahrens /* 538789Sahrens * If we weren't able to find a single valid uberblock, return failure. 539789Sahrens */ 540789Sahrens if (ub->ub_txg == 0) { 5411760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5421760Seschrock VDEV_AUX_CORRUPT_DATA); 5431544Seschrock error = ENXIO; 5441544Seschrock goto out; 5451544Seschrock } 5461544Seschrock 5471544Seschrock /* 5481544Seschrock * If the pool is newer than the code, we can't open it. 5491544Seschrock */ 5501760Seschrock if (ub->ub_version > ZFS_VERSION) { 5511760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5521760Seschrock VDEV_AUX_VERSION_NEWER); 5531544Seschrock error = ENOTSUP; 5541544Seschrock goto out; 555789Sahrens } 556789Sahrens 557789Sahrens /* 558789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 559789Sahrens * incomplete configuration. 560789Sahrens */ 5611732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 5621544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5631544Seschrock VDEV_AUX_BAD_GUID_SUM); 5641544Seschrock error = ENXIO; 5651544Seschrock goto out; 566789Sahrens } 567789Sahrens 568789Sahrens /* 569789Sahrens * Initialize internal SPA structures. 570789Sahrens */ 571789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 572789Sahrens spa->spa_ubsync = spa->spa_uberblock; 573789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 5741544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 5751544Seschrock if (error) { 5761544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5771544Seschrock VDEV_AUX_CORRUPT_DATA); 5781544Seschrock goto out; 5791544Seschrock } 580789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 581789Sahrens 5821544Seschrock if (zap_lookup(spa->spa_meta_objset, 583789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 5841544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 5851544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5861544Seschrock VDEV_AUX_CORRUPT_DATA); 5871544Seschrock error = EIO; 5881544Seschrock goto out; 5891544Seschrock } 590789Sahrens 591789Sahrens if (!mosconfig) { 5922082Seschrock nvlist_t *newconfig; 5933975Sek110237 uint64_t hostid; 5942082Seschrock 5952082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 5961544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 5971544Seschrock VDEV_AUX_CORRUPT_DATA); 5981544Seschrock error = EIO; 5991544Seschrock goto out; 6001544Seschrock } 601789Sahrens 6023975Sek110237 if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID, 6033975Sek110237 &hostid) == 0) { 6043975Sek110237 char *hostname; 6053975Sek110237 unsigned long myhostid = 0; 6063975Sek110237 6073975Sek110237 VERIFY(nvlist_lookup_string(newconfig, 6083975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 6093975Sek110237 6103975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 6114178Slling if (hostid != 0 && myhostid != 0 && 6124178Slling (unsigned long)hostid != myhostid) { 6133975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 6143975Sek110237 "loaded as it was last accessed by " 6153975Sek110237 "another system (host: %s hostid: 0x%lx). " 6163975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 6173975Sek110237 spa->spa_name, hostname, 6183975Sek110237 (unsigned long)hostid); 6193975Sek110237 error = EBADF; 6203975Sek110237 goto out; 6213975Sek110237 } 6223975Sek110237 } 6233975Sek110237 624789Sahrens spa_config_set(spa, newconfig); 625789Sahrens spa_unload(spa); 626789Sahrens spa_deactivate(spa); 627789Sahrens spa_activate(spa); 628789Sahrens 6291544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 6301544Seschrock } 6311544Seschrock 6321544Seschrock if (zap_lookup(spa->spa_meta_objset, 6331544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 6341544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 6351544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6361544Seschrock VDEV_AUX_CORRUPT_DATA); 6371544Seschrock error = EIO; 6381544Seschrock goto out; 639789Sahrens } 640789Sahrens 6411544Seschrock /* 6422082Seschrock * Load the bit that tells us to use the new accounting function 6432082Seschrock * (raid-z deflation). If we have an older pool, this will not 6442082Seschrock * be present. 6452082Seschrock */ 6462082Seschrock error = zap_lookup(spa->spa_meta_objset, 6472082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 6482082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 6492082Seschrock if (error != 0 && error != ENOENT) { 6502082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6512082Seschrock VDEV_AUX_CORRUPT_DATA); 6522082Seschrock error = EIO; 6532082Seschrock goto out; 6542082Seschrock } 6552082Seschrock 6562082Seschrock /* 6571544Seschrock * Load the persistent error log. If we have an older pool, this will 6581544Seschrock * not be present. 6591544Seschrock */ 6601544Seschrock error = zap_lookup(spa->spa_meta_objset, 6611544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 6621544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 6631807Sbonwick if (error != 0 && error != ENOENT) { 6641544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6651544Seschrock VDEV_AUX_CORRUPT_DATA); 6661544Seschrock error = EIO; 6671544Seschrock goto out; 6681544Seschrock } 6691544Seschrock 6701544Seschrock error = zap_lookup(spa->spa_meta_objset, 6711544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 6721544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 6731544Seschrock if (error != 0 && error != ENOENT) { 6741544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6751544Seschrock VDEV_AUX_CORRUPT_DATA); 6761544Seschrock error = EIO; 6771544Seschrock goto out; 6781544Seschrock } 679789Sahrens 680789Sahrens /* 6812926Sek110237 * Load the history object. If we have an older pool, this 6822926Sek110237 * will not be present. 6832926Sek110237 */ 6842926Sek110237 error = zap_lookup(spa->spa_meta_objset, 6852926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 6862926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 6872926Sek110237 if (error != 0 && error != ENOENT) { 6882926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 6892926Sek110237 VDEV_AUX_CORRUPT_DATA); 6902926Sek110237 error = EIO; 6912926Sek110237 goto out; 6922926Sek110237 } 6932926Sek110237 6942926Sek110237 /* 6952082Seschrock * Load any hot spares for this pool. 6962082Seschrock */ 6972082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 6982082Seschrock DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object); 6992082Seschrock if (error != 0 && error != ENOENT) { 7002082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 7012082Seschrock VDEV_AUX_CORRUPT_DATA); 7022082Seschrock error = EIO; 7032082Seschrock goto out; 7042082Seschrock } 7052082Seschrock if (error == 0) { 7062082Seschrock ASSERT(spa_version(spa) >= ZFS_VERSION_SPARES); 7072082Seschrock if (load_nvlist(spa, spa->spa_spares_object, 7082082Seschrock &spa->spa_sparelist) != 0) { 7092082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 7102082Seschrock VDEV_AUX_CORRUPT_DATA); 7112082Seschrock error = EIO; 7122082Seschrock goto out; 7132082Seschrock } 7142082Seschrock 7152082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 7162082Seschrock spa_load_spares(spa); 7172082Seschrock spa_config_exit(spa, FTAG); 7182082Seschrock } 7192082Seschrock 7203912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 7213912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 7223912Slling 7233912Slling if (error && error != ENOENT) { 7243912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 7253912Slling VDEV_AUX_CORRUPT_DATA); 7263912Slling error = EIO; 7273912Slling goto out; 7283912Slling } 7293912Slling 7303912Slling if (error == 0) { 7313912Slling (void) zap_lookup(spa->spa_meta_objset, 7323912Slling spa->spa_pool_props_object, 733*4451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 7343912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 735*4451Seschrock (void) zap_lookup(spa->spa_meta_objset, 736*4451Seschrock spa->spa_pool_props_object, 737*4451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 738*4451Seschrock sizeof (uint64_t), 1, &autoreplace); 7393912Slling } 7403912Slling 7412082Seschrock /* 742*4451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 743*4451Seschrock * the ZFS DE that it should not issue any faults for unopenable 744*4451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 745*4451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 746*4451Seschrock * over. 747*4451Seschrock */ 748*4451Seschrock if (autoreplace) 749*4451Seschrock spa_check_removed(spa->spa_root_vdev); 750*4451Seschrock 751*4451Seschrock /* 7521986Seschrock * Load the vdev state for all toplevel vdevs. 753789Sahrens */ 7541986Seschrock vdev_load(rvd); 755789Sahrens 756789Sahrens /* 757789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 758789Sahrens */ 7591544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 760789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 7611544Seschrock spa_config_exit(spa, FTAG); 762789Sahrens 763789Sahrens /* 764789Sahrens * Check the state of the root vdev. If it can't be opened, it 765789Sahrens * indicates one or more toplevel vdevs are faulted. 766789Sahrens */ 7671544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 7681544Seschrock error = ENXIO; 7691544Seschrock goto out; 7701544Seschrock } 771789Sahrens 7721544Seschrock if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { 7731635Sbonwick dmu_tx_t *tx; 7741635Sbonwick int need_update = B_FALSE; 7751585Sbonwick int c; 7761601Sbonwick 7771635Sbonwick /* 7781635Sbonwick * Claim log blocks that haven't been committed yet. 7791635Sbonwick * This must all happen in a single txg. 7801635Sbonwick */ 7811601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 782789Sahrens spa_first_txg(spa)); 7832417Sahrens (void) dmu_objset_find(spa->spa_name, 7842417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 785789Sahrens dmu_tx_commit(tx); 786789Sahrens 787789Sahrens spa->spa_sync_on = B_TRUE; 788789Sahrens txg_sync_start(spa->spa_dsl_pool); 789789Sahrens 790789Sahrens /* 791789Sahrens * Wait for all claims to sync. 792789Sahrens */ 793789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 7941585Sbonwick 7951585Sbonwick /* 7961635Sbonwick * If the config cache is stale, or we have uninitialized 7971635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 7981585Sbonwick */ 7991635Sbonwick if (config_cache_txg != spa->spa_config_txg || 8001635Sbonwick state == SPA_LOAD_IMPORT) 8011635Sbonwick need_update = B_TRUE; 8021635Sbonwick 8031635Sbonwick for (c = 0; c < rvd->vdev_children; c++) 8041635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 8051635Sbonwick need_update = B_TRUE; 8061585Sbonwick 8071585Sbonwick /* 8081635Sbonwick * Update the config cache asychronously in case we're the 8091635Sbonwick * root pool, in which case the config cache isn't writable yet. 8101585Sbonwick */ 8111635Sbonwick if (need_update) 8121635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 813789Sahrens } 814789Sahrens 8151544Seschrock error = 0; 8161544Seschrock out: 8172082Seschrock if (error && error != EBADF) 8181544Seschrock zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); 8191544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 8201544Seschrock spa->spa_ena = 0; 8211544Seschrock 8221544Seschrock return (error); 823789Sahrens } 824789Sahrens 825789Sahrens /* 826789Sahrens * Pool Open/Import 827789Sahrens * 828789Sahrens * The import case is identical to an open except that the configuration is sent 829789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 830789Sahrens * case of an open, the pool configuration will exist in the 831*4451Seschrock * POOL_STATE_UNINITIALIZED state. 832789Sahrens * 833789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 834789Sahrens * the same time open the pool, without having to keep around the spa_t in some 835789Sahrens * ambiguous state. 836789Sahrens */ 837789Sahrens static int 838789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 839789Sahrens { 840789Sahrens spa_t *spa; 841789Sahrens int error; 842789Sahrens int loaded = B_FALSE; 843789Sahrens int locked = B_FALSE; 844789Sahrens 845789Sahrens *spapp = NULL; 846789Sahrens 847789Sahrens /* 848789Sahrens * As disgusting as this is, we need to support recursive calls to this 849789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 850789Sahrens * up calling spa_open() again. The real fix is to figure out how to 851789Sahrens * avoid dsl_dir_open() calling this in the first place. 852789Sahrens */ 853789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 854789Sahrens mutex_enter(&spa_namespace_lock); 855789Sahrens locked = B_TRUE; 856789Sahrens } 857789Sahrens 858789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 859789Sahrens if (locked) 860789Sahrens mutex_exit(&spa_namespace_lock); 861789Sahrens return (ENOENT); 862789Sahrens } 863789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 864789Sahrens 865789Sahrens spa_activate(spa); 866789Sahrens 8671635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 868789Sahrens 869789Sahrens if (error == EBADF) { 870789Sahrens /* 8711986Seschrock * If vdev_validate() returns failure (indicated by 8721986Seschrock * EBADF), it indicates that one of the vdevs indicates 8731986Seschrock * that the pool has been exported or destroyed. If 8741986Seschrock * this is the case, the config cache is out of sync and 8751986Seschrock * we should remove the pool from the namespace. 876789Sahrens */ 8772082Seschrock zfs_post_ok(spa, NULL); 878789Sahrens spa_unload(spa); 879789Sahrens spa_deactivate(spa); 880789Sahrens spa_remove(spa); 881789Sahrens spa_config_sync(); 882789Sahrens if (locked) 883789Sahrens mutex_exit(&spa_namespace_lock); 884789Sahrens return (ENOENT); 8851544Seschrock } 8861544Seschrock 8871544Seschrock if (error) { 888789Sahrens /* 889789Sahrens * We can't open the pool, but we still have useful 890789Sahrens * information: the state of each vdev after the 891789Sahrens * attempted vdev_open(). Return this to the user. 892789Sahrens */ 8931635Sbonwick if (config != NULL && spa->spa_root_vdev != NULL) { 8941635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 895789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 896789Sahrens B_TRUE); 8971635Sbonwick spa_config_exit(spa, FTAG); 8981635Sbonwick } 899789Sahrens spa_unload(spa); 900789Sahrens spa_deactivate(spa); 9011544Seschrock spa->spa_last_open_failed = B_TRUE; 902789Sahrens if (locked) 903789Sahrens mutex_exit(&spa_namespace_lock); 904789Sahrens *spapp = NULL; 905789Sahrens return (error); 9061544Seschrock } else { 9071544Seschrock zfs_post_ok(spa, NULL); 9081544Seschrock spa->spa_last_open_failed = B_FALSE; 909789Sahrens } 910789Sahrens 911789Sahrens loaded = B_TRUE; 912789Sahrens } 913789Sahrens 914789Sahrens spa_open_ref(spa, tag); 915*4451Seschrock 916*4451Seschrock /* 917*4451Seschrock * If we just loaded the pool, resilver anything that's out of date. 918*4451Seschrock */ 919*4451Seschrock if (loaded && (spa_mode & FWRITE)) 920*4451Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 921*4451Seschrock 922789Sahrens if (locked) 923789Sahrens mutex_exit(&spa_namespace_lock); 924789Sahrens 925789Sahrens *spapp = spa; 926789Sahrens 927789Sahrens if (config != NULL) { 9281544Seschrock spa_config_enter(spa, RW_READER, FTAG); 929789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 9301544Seschrock spa_config_exit(spa, FTAG); 931789Sahrens } 932789Sahrens 933789Sahrens return (0); 934789Sahrens } 935789Sahrens 936789Sahrens int 937789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 938789Sahrens { 939789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 940789Sahrens } 941789Sahrens 9421544Seschrock /* 9431544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 9441544Seschrock * preventing it from being exported or destroyed. 9451544Seschrock */ 9461544Seschrock spa_t * 9471544Seschrock spa_inject_addref(char *name) 9481544Seschrock { 9491544Seschrock spa_t *spa; 9501544Seschrock 9511544Seschrock mutex_enter(&spa_namespace_lock); 9521544Seschrock if ((spa = spa_lookup(name)) == NULL) { 9531544Seschrock mutex_exit(&spa_namespace_lock); 9541544Seschrock return (NULL); 9551544Seschrock } 9561544Seschrock spa->spa_inject_ref++; 9571544Seschrock mutex_exit(&spa_namespace_lock); 9581544Seschrock 9591544Seschrock return (spa); 9601544Seschrock } 9611544Seschrock 9621544Seschrock void 9631544Seschrock spa_inject_delref(spa_t *spa) 9641544Seschrock { 9651544Seschrock mutex_enter(&spa_namespace_lock); 9661544Seschrock spa->spa_inject_ref--; 9671544Seschrock mutex_exit(&spa_namespace_lock); 9681544Seschrock } 9691544Seschrock 9702082Seschrock static void 9712082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 9722082Seschrock { 9732082Seschrock nvlist_t **spares; 9742082Seschrock uint_t i, nspares; 9752082Seschrock nvlist_t *nvroot; 9762082Seschrock uint64_t guid; 9772082Seschrock vdev_stat_t *vs; 9782082Seschrock uint_t vsc; 9793377Seschrock uint64_t pool; 9802082Seschrock 9812082Seschrock if (spa->spa_nspares == 0) 9822082Seschrock return; 9832082Seschrock 9842082Seschrock VERIFY(nvlist_lookup_nvlist(config, 9852082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 9862082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 9872082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 9882082Seschrock if (nspares != 0) { 9892082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 9902082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 9912082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 9922082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 9932082Seschrock 9942082Seschrock /* 9952082Seschrock * Go through and find any spares which have since been 9962082Seschrock * repurposed as an active spare. If this is the case, update 9972082Seschrock * their status appropriately. 9982082Seschrock */ 9992082Seschrock for (i = 0; i < nspares; i++) { 10002082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 10012082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 10023377Seschrock if (spa_spare_exists(guid, &pool) && pool != 0ULL) { 10032082Seschrock VERIFY(nvlist_lookup_uint64_array( 10042082Seschrock spares[i], ZPOOL_CONFIG_STATS, 10052082Seschrock (uint64_t **)&vs, &vsc) == 0); 10062082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 10072082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 10082082Seschrock } 10092082Seschrock } 10102082Seschrock } 10112082Seschrock } 10122082Seschrock 1013789Sahrens int 10141544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1015789Sahrens { 1016789Sahrens int error; 1017789Sahrens spa_t *spa; 1018789Sahrens 1019789Sahrens *config = NULL; 1020789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1021789Sahrens 10222082Seschrock if (spa && *config != NULL) { 10231544Seschrock VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, 10241544Seschrock spa_get_errlog_size(spa)) == 0); 10251544Seschrock 10262082Seschrock spa_add_spares(spa, *config); 10272082Seschrock } 10282082Seschrock 10291544Seschrock /* 10301544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 10311544Seschrock * and call spa_lookup() directly. 10321544Seschrock */ 10331544Seschrock if (altroot) { 10341544Seschrock if (spa == NULL) { 10351544Seschrock mutex_enter(&spa_namespace_lock); 10361544Seschrock spa = spa_lookup(name); 10371544Seschrock if (spa) 10381544Seschrock spa_altroot(spa, altroot, buflen); 10391544Seschrock else 10401544Seschrock altroot[0] = '\0'; 10411544Seschrock spa = NULL; 10421544Seschrock mutex_exit(&spa_namespace_lock); 10431544Seschrock } else { 10441544Seschrock spa_altroot(spa, altroot, buflen); 10451544Seschrock } 10461544Seschrock } 10471544Seschrock 1048789Sahrens if (spa != NULL) 1049789Sahrens spa_close(spa, FTAG); 1050789Sahrens 1051789Sahrens return (error); 1052789Sahrens } 1053789Sahrens 1054789Sahrens /* 10552082Seschrock * Validate that the 'spares' array is well formed. We must have an array of 10563377Seschrock * nvlists, each which describes a valid leaf vdev. If this is an import (mode 10573377Seschrock * is VDEV_ALLOC_SPARE), then we allow corrupted spares to be specified, as long 10583377Seschrock * as they are well-formed. 10592082Seschrock */ 10602082Seschrock static int 10612082Seschrock spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 10622082Seschrock { 10632082Seschrock nvlist_t **spares; 10642082Seschrock uint_t i, nspares; 10652082Seschrock vdev_t *vd; 10662082Seschrock int error; 10672082Seschrock 10682082Seschrock /* 10692082Seschrock * It's acceptable to have no spares specified. 10702082Seschrock */ 10712082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 10722082Seschrock &spares, &nspares) != 0) 10732082Seschrock return (0); 10742082Seschrock 10752082Seschrock if (nspares == 0) 10762082Seschrock return (EINVAL); 10772082Seschrock 10782082Seschrock /* 10792082Seschrock * Make sure the pool is formatted with a version that supports hot 10802082Seschrock * spares. 10812082Seschrock */ 10822082Seschrock if (spa_version(spa) < ZFS_VERSION_SPARES) 10832082Seschrock return (ENOTSUP); 10842082Seschrock 10853377Seschrock /* 10863377Seschrock * Set the pending spare list so we correctly handle device in-use 10873377Seschrock * checking. 10883377Seschrock */ 10893377Seschrock spa->spa_pending_spares = spares; 10903377Seschrock spa->spa_pending_nspares = nspares; 10913377Seschrock 10922082Seschrock for (i = 0; i < nspares; i++) { 10932082Seschrock if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0, 10942082Seschrock mode)) != 0) 10953377Seschrock goto out; 10962082Seschrock 10972082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 10982082Seschrock vdev_free(vd); 10993377Seschrock error = EINVAL; 11003377Seschrock goto out; 11012082Seschrock } 11022082Seschrock 11032082Seschrock vd->vdev_top = vd; 11043377Seschrock 11053377Seschrock if ((error = vdev_open(vd)) == 0 && 11063377Seschrock (error = vdev_label_init(vd, crtxg, 11073377Seschrock VDEV_LABEL_SPARE)) == 0) { 11083377Seschrock VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID, 11093377Seschrock vd->vdev_guid) == 0); 11102082Seschrock } 11112082Seschrock 11122082Seschrock vdev_free(vd); 11133377Seschrock 11143377Seschrock if (error && mode != VDEV_ALLOC_SPARE) 11153377Seschrock goto out; 11163377Seschrock else 11173377Seschrock error = 0; 11182082Seschrock } 11192082Seschrock 11203377Seschrock out: 11213377Seschrock spa->spa_pending_spares = NULL; 11223377Seschrock spa->spa_pending_nspares = 0; 11233377Seschrock return (error); 11242082Seschrock } 11252082Seschrock 11262082Seschrock /* 1127789Sahrens * Pool Creation 1128789Sahrens */ 1129789Sahrens int 11301635Sbonwick spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) 1131789Sahrens { 1132789Sahrens spa_t *spa; 11331635Sbonwick vdev_t *rvd; 1134789Sahrens dsl_pool_t *dp; 1135789Sahrens dmu_tx_t *tx; 11362082Seschrock int c, error = 0; 1137789Sahrens uint64_t txg = TXG_INITIAL; 11382082Seschrock nvlist_t **spares; 11392082Seschrock uint_t nspares; 1140789Sahrens 1141789Sahrens /* 1142789Sahrens * If this pool already exists, return failure. 1143789Sahrens */ 1144789Sahrens mutex_enter(&spa_namespace_lock); 1145789Sahrens if (spa_lookup(pool) != NULL) { 1146789Sahrens mutex_exit(&spa_namespace_lock); 1147789Sahrens return (EEXIST); 1148789Sahrens } 1149789Sahrens 1150789Sahrens /* 1151789Sahrens * Allocate a new spa_t structure. 1152789Sahrens */ 11531635Sbonwick spa = spa_add(pool, altroot); 1154789Sahrens spa_activate(spa); 1155789Sahrens 1156789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 11571760Seschrock spa->spa_uberblock.ub_version = ZFS_VERSION; 1158789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1159789Sahrens 11601635Sbonwick /* 11611635Sbonwick * Create the root vdev. 11621635Sbonwick */ 11631635Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 11641635Sbonwick 11652082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 11662082Seschrock 11672082Seschrock ASSERT(error != 0 || rvd != NULL); 11682082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 11692082Seschrock 11702082Seschrock if (error == 0 && rvd->vdev_children == 0) 11711635Sbonwick error = EINVAL; 11722082Seschrock 11732082Seschrock if (error == 0 && 11742082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 11752082Seschrock (error = spa_validate_spares(spa, nvroot, txg, 11762082Seschrock VDEV_ALLOC_ADD)) == 0) { 11772082Seschrock for (c = 0; c < rvd->vdev_children; c++) 11782082Seschrock vdev_init(rvd->vdev_child[c], txg); 11792082Seschrock vdev_config_dirty(rvd); 11801635Sbonwick } 11811635Sbonwick 11821635Sbonwick spa_config_exit(spa, FTAG); 1183789Sahrens 11842082Seschrock if (error != 0) { 1185789Sahrens spa_unload(spa); 1186789Sahrens spa_deactivate(spa); 1187789Sahrens spa_remove(spa); 1188789Sahrens mutex_exit(&spa_namespace_lock); 1189789Sahrens return (error); 1190789Sahrens } 1191789Sahrens 11922082Seschrock /* 11932082Seschrock * Get the list of spares, if specified. 11942082Seschrock */ 11952082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 11962082Seschrock &spares, &nspares) == 0) { 11972082Seschrock VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME, 11982082Seschrock KM_SLEEP) == 0); 11992082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 12002082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 12012082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 12022082Seschrock spa_load_spares(spa); 12032082Seschrock spa_config_exit(spa, FTAG); 12042082Seschrock spa->spa_sync_spares = B_TRUE; 12052082Seschrock } 12062082Seschrock 1207789Sahrens spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); 1208789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 1209789Sahrens 1210789Sahrens tx = dmu_tx_create_assigned(dp, txg); 1211789Sahrens 1212789Sahrens /* 1213789Sahrens * Create the pool config object. 1214789Sahrens */ 1215789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 1216789Sahrens DMU_OT_PACKED_NVLIST, 1 << 14, 1217789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 1218789Sahrens 12191544Seschrock if (zap_add(spa->spa_meta_objset, 1220789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 12211544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 12221544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 12231544Seschrock } 1224789Sahrens 12252082Seschrock /* Newly created pools are always deflated. */ 12262082Seschrock spa->spa_deflate = TRUE; 12272082Seschrock if (zap_add(spa->spa_meta_objset, 12282082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 12292082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 12302082Seschrock cmn_err(CE_PANIC, "failed to add deflate"); 12312082Seschrock } 12322082Seschrock 1233789Sahrens /* 1234789Sahrens * Create the deferred-free bplist object. Turn off compression 1235789Sahrens * because sync-to-convergence takes longer if the blocksize 1236789Sahrens * keeps changing. 1237789Sahrens */ 1238789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 1239789Sahrens 1 << 14, tx); 1240789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 1241789Sahrens ZIO_COMPRESS_OFF, tx); 1242789Sahrens 12431544Seschrock if (zap_add(spa->spa_meta_objset, 1244789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 12451544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 12461544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 12471544Seschrock } 1248789Sahrens 12492926Sek110237 /* 12502926Sek110237 * Create the pool's history object. 12512926Sek110237 */ 12522926Sek110237 spa_history_create_obj(spa, tx); 12532926Sek110237 1254789Sahrens dmu_tx_commit(tx); 1255789Sahrens 1256*4451Seschrock spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 1257789Sahrens spa->spa_sync_on = B_TRUE; 1258789Sahrens txg_sync_start(spa->spa_dsl_pool); 1259789Sahrens 1260789Sahrens /* 1261789Sahrens * We explicitly wait for the first transaction to complete so that our 1262789Sahrens * bean counters are appropriately updated. 1263789Sahrens */ 1264789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 1265789Sahrens 1266789Sahrens spa_config_sync(); 1267789Sahrens 1268789Sahrens mutex_exit(&spa_namespace_lock); 1269789Sahrens 1270789Sahrens return (0); 1271789Sahrens } 1272789Sahrens 1273789Sahrens /* 1274789Sahrens * Import the given pool into the system. We set up the necessary spa_t and 1275789Sahrens * then call spa_load() to do the dirty work. 1276789Sahrens */ 1277789Sahrens int 12781635Sbonwick spa_import(const char *pool, nvlist_t *config, const char *altroot) 1279789Sahrens { 1280789Sahrens spa_t *spa; 1281789Sahrens int error; 12822082Seschrock nvlist_t *nvroot; 12832082Seschrock nvlist_t **spares; 12842082Seschrock uint_t nspares; 1285789Sahrens 1286789Sahrens if (!(spa_mode & FWRITE)) 1287789Sahrens return (EROFS); 1288789Sahrens 1289789Sahrens /* 1290789Sahrens * If a pool with this name exists, return failure. 1291789Sahrens */ 1292789Sahrens mutex_enter(&spa_namespace_lock); 1293789Sahrens if (spa_lookup(pool) != NULL) { 1294789Sahrens mutex_exit(&spa_namespace_lock); 1295789Sahrens return (EEXIST); 1296789Sahrens } 1297789Sahrens 1298789Sahrens /* 12991635Sbonwick * Create and initialize the spa structure. 1300789Sahrens */ 13011635Sbonwick spa = spa_add(pool, altroot); 1302789Sahrens spa_activate(spa); 1303789Sahrens 1304789Sahrens /* 13051635Sbonwick * Pass off the heavy lifting to spa_load(). 13061732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 13071732Sbonwick * is actually the one to trust when doing an import. 13081601Sbonwick */ 13091732Sbonwick error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 1310789Sahrens 13112082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 13122082Seschrock /* 13132082Seschrock * Toss any existing sparelist, as it doesn't have any validity anymore, 13142082Seschrock * and conflicts with spa_has_spare(). 13152082Seschrock */ 13162082Seschrock if (spa->spa_sparelist) { 13172082Seschrock nvlist_free(spa->spa_sparelist); 13182082Seschrock spa->spa_sparelist = NULL; 13192082Seschrock spa_load_spares(spa); 13202082Seschrock } 13212082Seschrock 13222082Seschrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 13232082Seschrock &nvroot) == 0); 13242082Seschrock if (error == 0) 13252082Seschrock error = spa_validate_spares(spa, nvroot, -1ULL, 13262082Seschrock VDEV_ALLOC_SPARE); 13272082Seschrock spa_config_exit(spa, FTAG); 13282082Seschrock 13292082Seschrock if (error != 0) { 1330789Sahrens spa_unload(spa); 1331789Sahrens spa_deactivate(spa); 1332789Sahrens spa_remove(spa); 1333789Sahrens mutex_exit(&spa_namespace_lock); 1334789Sahrens return (error); 1335789Sahrens } 1336789Sahrens 13371635Sbonwick /* 13382082Seschrock * Override any spares as specified by the user, as these may have 13392082Seschrock * correct device names/devids, etc. 13402082Seschrock */ 13412082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 13422082Seschrock &spares, &nspares) == 0) { 13432082Seschrock if (spa->spa_sparelist) 13442082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, 13452082Seschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 13462082Seschrock else 13472082Seschrock VERIFY(nvlist_alloc(&spa->spa_sparelist, 13482082Seschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 13492082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 13502082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 13512082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 13522082Seschrock spa_load_spares(spa); 13532082Seschrock spa_config_exit(spa, FTAG); 13542082Seschrock spa->spa_sync_spares = B_TRUE; 13552082Seschrock } 13562082Seschrock 13572082Seschrock /* 13581635Sbonwick * Update the config cache to include the newly-imported pool. 13591635Sbonwick */ 13601635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 13611635Sbonwick 1362789Sahrens /* 1363789Sahrens * Resilver anything that's out of date. 1364789Sahrens */ 1365789Sahrens if (spa_mode & FWRITE) 1366789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 1367789Sahrens 1368*4451Seschrock mutex_exit(&spa_namespace_lock); 1369*4451Seschrock 1370789Sahrens return (0); 1371789Sahrens } 1372789Sahrens 1373789Sahrens /* 1374789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 1375789Sahrens * to get the vdev stats associated with the imported devices. 1376789Sahrens */ 1377789Sahrens #define TRYIMPORT_NAME "$import" 1378789Sahrens 1379789Sahrens nvlist_t * 1380789Sahrens spa_tryimport(nvlist_t *tryconfig) 1381789Sahrens { 1382789Sahrens nvlist_t *config = NULL; 1383789Sahrens char *poolname; 1384789Sahrens spa_t *spa; 1385789Sahrens uint64_t state; 1386789Sahrens 1387789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 1388789Sahrens return (NULL); 1389789Sahrens 1390789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 1391789Sahrens return (NULL); 1392789Sahrens 13931635Sbonwick /* 13941635Sbonwick * Create and initialize the spa structure. 13951635Sbonwick */ 1396789Sahrens mutex_enter(&spa_namespace_lock); 13971635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 1398789Sahrens spa_activate(spa); 1399789Sahrens 1400789Sahrens /* 14011635Sbonwick * Pass off the heavy lifting to spa_load(). 14021732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 14031732Sbonwick * is actually the one to trust when doing an import. 1404789Sahrens */ 14051732Sbonwick (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 1406789Sahrens 1407789Sahrens /* 1408789Sahrens * If 'tryconfig' was at least parsable, return the current config. 1409789Sahrens */ 1410789Sahrens if (spa->spa_root_vdev != NULL) { 14111635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 1412789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 14131635Sbonwick spa_config_exit(spa, FTAG); 1414789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 1415789Sahrens poolname) == 0); 1416789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1417789Sahrens state) == 0); 14183975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 14193975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 14202082Seschrock 14212082Seschrock /* 14222082Seschrock * Add the list of hot spares. 14232082Seschrock */ 14242082Seschrock spa_add_spares(spa, config); 1425789Sahrens } 1426789Sahrens 1427789Sahrens spa_unload(spa); 1428789Sahrens spa_deactivate(spa); 1429789Sahrens spa_remove(spa); 1430789Sahrens mutex_exit(&spa_namespace_lock); 1431789Sahrens 1432789Sahrens return (config); 1433789Sahrens } 1434789Sahrens 1435789Sahrens /* 1436789Sahrens * Pool export/destroy 1437789Sahrens * 1438789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 1439789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 1440789Sahrens * update the pool state and sync all the labels to disk, removing the 1441789Sahrens * configuration from the cache afterwards. 1442789Sahrens */ 1443789Sahrens static int 14441775Sbillm spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) 1445789Sahrens { 1446789Sahrens spa_t *spa; 1447789Sahrens 14481775Sbillm if (oldconfig) 14491775Sbillm *oldconfig = NULL; 14501775Sbillm 1451789Sahrens if (!(spa_mode & FWRITE)) 1452789Sahrens return (EROFS); 1453789Sahrens 1454789Sahrens mutex_enter(&spa_namespace_lock); 1455789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1456789Sahrens mutex_exit(&spa_namespace_lock); 1457789Sahrens return (ENOENT); 1458789Sahrens } 1459789Sahrens 1460789Sahrens /* 14611544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 14621544Seschrock * reacquire the namespace lock, and see if we can export. 14631544Seschrock */ 14641544Seschrock spa_open_ref(spa, FTAG); 14651544Seschrock mutex_exit(&spa_namespace_lock); 14661544Seschrock spa_async_suspend(spa); 14671544Seschrock mutex_enter(&spa_namespace_lock); 14681544Seschrock spa_close(spa, FTAG); 14691544Seschrock 14701544Seschrock /* 1471789Sahrens * The pool will be in core if it's openable, 1472789Sahrens * in which case we can modify its state. 1473789Sahrens */ 1474789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 1475789Sahrens /* 1476789Sahrens * Objsets may be open only because they're dirty, so we 1477789Sahrens * have to force it to sync before checking spa_refcnt. 1478789Sahrens */ 1479789Sahrens spa_scrub_suspend(spa); 1480789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 1481789Sahrens 14821544Seschrock /* 14831544Seschrock * A pool cannot be exported or destroyed if there are active 14841544Seschrock * references. If we are resetting a pool, allow references by 14851544Seschrock * fault injection handlers. 14861544Seschrock */ 14871544Seschrock if (!spa_refcount_zero(spa) || 14881544Seschrock (spa->spa_inject_ref != 0 && 14891544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 1490789Sahrens spa_scrub_resume(spa); 14911544Seschrock spa_async_resume(spa); 1492789Sahrens mutex_exit(&spa_namespace_lock); 1493789Sahrens return (EBUSY); 1494789Sahrens } 1495789Sahrens 1496789Sahrens spa_scrub_resume(spa); 1497789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 1498789Sahrens 1499789Sahrens /* 1500789Sahrens * We want this to be reflected on every label, 1501789Sahrens * so mark them all dirty. spa_unload() will do the 1502789Sahrens * final sync that pushes these changes out. 1503789Sahrens */ 15041544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 15051601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 15061544Seschrock spa->spa_state = new_state; 15071635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 15081544Seschrock vdev_config_dirty(spa->spa_root_vdev); 15091601Sbonwick spa_config_exit(spa, FTAG); 15101544Seschrock } 1511789Sahrens } 1512789Sahrens 1513*4451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 1514*4451Seschrock 1515789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 1516789Sahrens spa_unload(spa); 1517789Sahrens spa_deactivate(spa); 1518789Sahrens } 1519789Sahrens 15201775Sbillm if (oldconfig && spa->spa_config) 15211775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 15221775Sbillm 15231544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 15241544Seschrock spa_remove(spa); 15251544Seschrock spa_config_sync(); 15261544Seschrock } 1527789Sahrens mutex_exit(&spa_namespace_lock); 1528789Sahrens 1529789Sahrens return (0); 1530789Sahrens } 1531789Sahrens 1532789Sahrens /* 1533789Sahrens * Destroy a storage pool. 1534789Sahrens */ 1535789Sahrens int 1536789Sahrens spa_destroy(char *pool) 1537789Sahrens { 15381775Sbillm return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); 1539789Sahrens } 1540789Sahrens 1541789Sahrens /* 1542789Sahrens * Export a storage pool. 1543789Sahrens */ 1544789Sahrens int 15451775Sbillm spa_export(char *pool, nvlist_t **oldconfig) 1546789Sahrens { 15471775Sbillm return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); 1548789Sahrens } 1549789Sahrens 1550789Sahrens /* 15511544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 15521544Seschrock * from the namespace in any way. 15531544Seschrock */ 15541544Seschrock int 15551544Seschrock spa_reset(char *pool) 15561544Seschrock { 15571775Sbillm return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); 15581544Seschrock } 15591544Seschrock 15601544Seschrock 15611544Seschrock /* 1562789Sahrens * ========================================================================== 1563789Sahrens * Device manipulation 1564789Sahrens * ========================================================================== 1565789Sahrens */ 1566789Sahrens 1567789Sahrens /* 1568789Sahrens * Add capacity to a storage pool. 1569789Sahrens */ 1570789Sahrens int 1571789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 1572789Sahrens { 1573789Sahrens uint64_t txg; 15741635Sbonwick int c, error; 1575789Sahrens vdev_t *rvd = spa->spa_root_vdev; 15761585Sbonwick vdev_t *vd, *tvd; 15772082Seschrock nvlist_t **spares; 15782082Seschrock uint_t i, nspares; 1579789Sahrens 1580789Sahrens txg = spa_vdev_enter(spa); 1581789Sahrens 15822082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 15832082Seschrock VDEV_ALLOC_ADD)) != 0) 15842082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 15852082Seschrock 15863377Seschrock spa->spa_pending_vdev = vd; 1587789Sahrens 15882082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 15892082Seschrock &spares, &nspares) != 0) 15902082Seschrock nspares = 0; 15912082Seschrock 15923377Seschrock if (vd->vdev_children == 0 && nspares == 0) { 15933377Seschrock spa->spa_pending_vdev = NULL; 15942082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 15953377Seschrock } 15962082Seschrock 15972082Seschrock if (vd->vdev_children != 0) { 15983377Seschrock if ((error = vdev_create(vd, txg, B_FALSE)) != 0) { 15993377Seschrock spa->spa_pending_vdev = NULL; 16002082Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 16012082Seschrock } 16022082Seschrock } 16032082Seschrock 16043377Seschrock /* 16053377Seschrock * We must validate the spares after checking the children. Otherwise, 16063377Seschrock * vdev_inuse() will blindly overwrite the spare. 16073377Seschrock */ 16083377Seschrock if ((error = spa_validate_spares(spa, nvroot, txg, 16093377Seschrock VDEV_ALLOC_ADD)) != 0) { 16103377Seschrock spa->spa_pending_vdev = NULL; 16113377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 16123377Seschrock } 16133377Seschrock 16143377Seschrock spa->spa_pending_vdev = NULL; 16153377Seschrock 16163377Seschrock /* 16173377Seschrock * Transfer each new top-level vdev from vd to rvd. 16183377Seschrock */ 16193377Seschrock for (c = 0; c < vd->vdev_children; c++) { 16203377Seschrock tvd = vd->vdev_child[c]; 16213377Seschrock vdev_remove_child(vd, tvd); 16223377Seschrock tvd->vdev_id = rvd->vdev_children; 16233377Seschrock vdev_add_child(rvd, tvd); 16243377Seschrock vdev_config_dirty(tvd); 16253377Seschrock } 16263377Seschrock 16272082Seschrock if (nspares != 0) { 16282082Seschrock if (spa->spa_sparelist != NULL) { 16292082Seschrock nvlist_t **oldspares; 16302082Seschrock uint_t oldnspares; 16312082Seschrock nvlist_t **newspares; 16322082Seschrock 16332082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 16342082Seschrock ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0); 16352082Seschrock 16362082Seschrock newspares = kmem_alloc(sizeof (void *) * 16372082Seschrock (nspares + oldnspares), KM_SLEEP); 16382082Seschrock for (i = 0; i < oldnspares; i++) 16392082Seschrock VERIFY(nvlist_dup(oldspares[i], 16402082Seschrock &newspares[i], KM_SLEEP) == 0); 16412082Seschrock for (i = 0; i < nspares; i++) 16422082Seschrock VERIFY(nvlist_dup(spares[i], 16432082Seschrock &newspares[i + oldnspares], 16442082Seschrock KM_SLEEP) == 0); 16452082Seschrock 16462082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, 16472082Seschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 16482082Seschrock 16492082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 16502082Seschrock ZPOOL_CONFIG_SPARES, newspares, 16512082Seschrock nspares + oldnspares) == 0); 16522082Seschrock for (i = 0; i < oldnspares + nspares; i++) 16532082Seschrock nvlist_free(newspares[i]); 16542082Seschrock kmem_free(newspares, (oldnspares + nspares) * 16552082Seschrock sizeof (void *)); 16562082Seschrock } else { 16572082Seschrock VERIFY(nvlist_alloc(&spa->spa_sparelist, 16582082Seschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 16592082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, 16602082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 16612082Seschrock } 16622082Seschrock 16632082Seschrock spa_load_spares(spa); 16642082Seschrock spa->spa_sync_spares = B_TRUE; 1665789Sahrens } 1666789Sahrens 1667789Sahrens /* 16681585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 16691585Sbonwick * If other threads start allocating from these vdevs before we 16701585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 16711585Sbonwick * fail to open the pool because there are DVAs that the config cache 16721585Sbonwick * can't translate. Therefore, we first add the vdevs without 16731585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 16741635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 16751585Sbonwick * 16761585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 16771585Sbonwick * if we lose power at any point in this sequence, the remaining 16781585Sbonwick * steps will be completed the next time we load the pool. 1679789Sahrens */ 16801635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 16811585Sbonwick 16821635Sbonwick mutex_enter(&spa_namespace_lock); 16831635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 16841635Sbonwick mutex_exit(&spa_namespace_lock); 1685789Sahrens 16861635Sbonwick return (0); 1687789Sahrens } 1688789Sahrens 1689789Sahrens /* 1690789Sahrens * Attach a device to a mirror. The arguments are the path to any device 1691789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 1692789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 1693789Sahrens * 1694789Sahrens * If 'replacing' is specified, the new device is intended to replace the 1695789Sahrens * existing device; in this case the two devices are made into their own 1696*4451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 1697789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 1698789Sahrens * extra rules: you can't attach to it after it's been created, and upon 1699789Sahrens * completion of resilvering, the first disk (the one being replaced) 1700789Sahrens * is automatically detached. 1701789Sahrens */ 1702789Sahrens int 17031544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 1704789Sahrens { 1705789Sahrens uint64_t txg, open_txg; 1706789Sahrens int error; 1707789Sahrens vdev_t *rvd = spa->spa_root_vdev; 1708789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 17092082Seschrock vdev_ops_t *pvops; 1710789Sahrens 1711789Sahrens txg = spa_vdev_enter(spa); 1712789Sahrens 17131544Seschrock oldvd = vdev_lookup_by_guid(rvd, guid); 1714789Sahrens 1715789Sahrens if (oldvd == NULL) 1716789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 1717789Sahrens 17181585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 17191585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 17201585Sbonwick 1721789Sahrens pvd = oldvd->vdev_parent; 1722789Sahrens 17232082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 1724*4451Seschrock VDEV_ALLOC_ADD)) != 0) 1725*4451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 1726*4451Seschrock 1727*4451Seschrock if (newrootvd->vdev_children != 1) 1728789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 1729789Sahrens 1730789Sahrens newvd = newrootvd->vdev_child[0]; 1731789Sahrens 1732789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 1733789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 1734789Sahrens 17352082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 1736789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 1737789Sahrens 17382082Seschrock if (!replacing) { 17392082Seschrock /* 17402082Seschrock * For attach, the only allowable parent is a mirror or the root 17412082Seschrock * vdev. 17422082Seschrock */ 17432082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 17442082Seschrock pvd->vdev_ops != &vdev_root_ops) 17452082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 17462082Seschrock 17472082Seschrock pvops = &vdev_mirror_ops; 17482082Seschrock } else { 17492082Seschrock /* 17502082Seschrock * Active hot spares can only be replaced by inactive hot 17512082Seschrock * spares. 17522082Seschrock */ 17532082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 17542082Seschrock pvd->vdev_child[1] == oldvd && 17552082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 17562082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 17572082Seschrock 17582082Seschrock /* 17592082Seschrock * If the source is a hot spare, and the parent isn't already a 17602082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 17613377Seschrock * want to create a replacing vdev. The user is not allowed to 17623377Seschrock * attach to a spared vdev child unless the 'isspare' state is 17633377Seschrock * the same (spare replaces spare, non-spare replaces 17643377Seschrock * non-spare). 17652082Seschrock */ 17662082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 17672082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 17683377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 17693377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 17703377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 17712082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 17722082Seschrock newvd->vdev_isspare) 17732082Seschrock pvops = &vdev_spare_ops; 17742082Seschrock else 17752082Seschrock pvops = &vdev_replacing_ops; 17762082Seschrock } 17772082Seschrock 17781175Slling /* 17791175Slling * Compare the new device size with the replaceable/attachable 17801175Slling * device size. 17811175Slling */ 17821175Slling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 1783789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 1784789Sahrens 17851732Sbonwick /* 17861732Sbonwick * The new device cannot have a higher alignment requirement 17871732Sbonwick * than the top-level vdev. 17881732Sbonwick */ 17891732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 1790789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 1791789Sahrens 1792789Sahrens /* 1793789Sahrens * If this is an in-place replacement, update oldvd's path and devid 1794789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 1795789Sahrens */ 1796789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 1797789Sahrens spa_strfree(oldvd->vdev_path); 1798789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 1799789Sahrens KM_SLEEP); 1800789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 1801789Sahrens newvd->vdev_path, "old"); 1802789Sahrens if (oldvd->vdev_devid != NULL) { 1803789Sahrens spa_strfree(oldvd->vdev_devid); 1804789Sahrens oldvd->vdev_devid = NULL; 1805789Sahrens } 1806789Sahrens } 1807789Sahrens 1808789Sahrens /* 18092082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 18102082Seschrock * mirror/replacing/spare vdev above oldvd. 1811789Sahrens */ 1812789Sahrens if (pvd->vdev_ops != pvops) 1813789Sahrens pvd = vdev_add_parent(oldvd, pvops); 1814789Sahrens 1815789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 1816789Sahrens ASSERT(pvd->vdev_ops == pvops); 1817789Sahrens ASSERT(oldvd->vdev_parent == pvd); 1818789Sahrens 1819789Sahrens /* 1820789Sahrens * Extract the new device from its root and add it to pvd. 1821789Sahrens */ 1822789Sahrens vdev_remove_child(newrootvd, newvd); 1823789Sahrens newvd->vdev_id = pvd->vdev_children; 1824789Sahrens vdev_add_child(pvd, newvd); 1825789Sahrens 18261544Seschrock /* 18271544Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 18281544Seschrock * the addition of newvd may have decreased our parent's asize. 18291544Seschrock */ 18301544Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 18311544Seschrock 1832789Sahrens tvd = newvd->vdev_top; 1833789Sahrens ASSERT(pvd->vdev_top == tvd); 1834789Sahrens ASSERT(tvd->vdev_parent == rvd); 1835789Sahrens 1836789Sahrens vdev_config_dirty(tvd); 1837789Sahrens 1838789Sahrens /* 1839789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 1840789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 1841789Sahrens */ 1842789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 1843789Sahrens 1844789Sahrens mutex_enter(&newvd->vdev_dtl_lock); 1845789Sahrens space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, 1846789Sahrens open_txg - TXG_INITIAL + 1); 1847789Sahrens mutex_exit(&newvd->vdev_dtl_lock); 1848789Sahrens 18493377Seschrock if (newvd->vdev_isspare) 18503377Seschrock spa_spare_activate(newvd); 18511544Seschrock 1852789Sahrens /* 1853789Sahrens * Mark newvd's DTL dirty in this txg. 1854789Sahrens */ 18551732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 1856789Sahrens 1857789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 1858789Sahrens 1859789Sahrens /* 1860*4451Seschrock * Kick off a resilver to update newvd. We need to grab the namespace 1861*4451Seschrock * lock because spa_scrub() needs to post a sysevent with the pool name. 1862789Sahrens */ 1863*4451Seschrock mutex_enter(&spa_namespace_lock); 1864789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 1865*4451Seschrock mutex_exit(&spa_namespace_lock); 1866789Sahrens 1867789Sahrens return (0); 1868789Sahrens } 1869789Sahrens 1870789Sahrens /* 1871789Sahrens * Detach a device from a mirror or replacing vdev. 1872789Sahrens * If 'replace_done' is specified, only detach if the parent 1873789Sahrens * is a replacing vdev. 1874789Sahrens */ 1875789Sahrens int 18761544Seschrock spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) 1877789Sahrens { 1878789Sahrens uint64_t txg; 1879789Sahrens int c, t, error; 1880789Sahrens vdev_t *rvd = spa->spa_root_vdev; 1881789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 18822082Seschrock boolean_t unspare = B_FALSE; 18832082Seschrock uint64_t unspare_guid; 1884789Sahrens 1885789Sahrens txg = spa_vdev_enter(spa); 1886789Sahrens 18871544Seschrock vd = vdev_lookup_by_guid(rvd, guid); 1888789Sahrens 1889789Sahrens if (vd == NULL) 1890789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 1891789Sahrens 18921585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 18931585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 18941585Sbonwick 1895789Sahrens pvd = vd->vdev_parent; 1896789Sahrens 1897789Sahrens /* 1898789Sahrens * If replace_done is specified, only remove this device if it's 18992082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 19002082Seschrock * disk can be removed. 1901789Sahrens */ 19022082Seschrock if (replace_done) { 19032082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 19042082Seschrock if (vd->vdev_id != 0) 19052082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 19062082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 19072082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 19082082Seschrock } 19092082Seschrock } 19102082Seschrock 19112082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 19122082Seschrock spa_version(spa) >= ZFS_VERSION_SPARES); 1913789Sahrens 1914789Sahrens /* 19152082Seschrock * Only mirror, replacing, and spare vdevs support detach. 1916789Sahrens */ 1917789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 19182082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 19192082Seschrock pvd->vdev_ops != &vdev_spare_ops) 1920789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 1921789Sahrens 1922789Sahrens /* 1923789Sahrens * If there's only one replica, you can't detach it. 1924789Sahrens */ 1925789Sahrens if (pvd->vdev_children <= 1) 1926789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 1927789Sahrens 1928789Sahrens /* 1929789Sahrens * If all siblings have non-empty DTLs, this device may have the only 1930789Sahrens * valid copy of the data, which means we cannot safely detach it. 1931789Sahrens * 1932789Sahrens * XXX -- as in the vdev_offline() case, we really want a more 1933789Sahrens * precise DTL check. 1934789Sahrens */ 1935789Sahrens for (c = 0; c < pvd->vdev_children; c++) { 1936789Sahrens uint64_t dirty; 1937789Sahrens 1938789Sahrens cvd = pvd->vdev_child[c]; 1939789Sahrens if (cvd == vd) 1940789Sahrens continue; 1941789Sahrens if (vdev_is_dead(cvd)) 1942789Sahrens continue; 1943789Sahrens mutex_enter(&cvd->vdev_dtl_lock); 1944789Sahrens dirty = cvd->vdev_dtl_map.sm_space | 1945789Sahrens cvd->vdev_dtl_scrub.sm_space; 1946789Sahrens mutex_exit(&cvd->vdev_dtl_lock); 1947789Sahrens if (!dirty) 1948789Sahrens break; 1949789Sahrens } 19502082Seschrock 19512082Seschrock /* 19522082Seschrock * If we are a replacing or spare vdev, then we can always detach the 19532082Seschrock * latter child, as that is how one cancels the operation. 19542082Seschrock */ 19552082Seschrock if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && 19562082Seschrock c == pvd->vdev_children) 1957789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 1958789Sahrens 1959789Sahrens /* 19602082Seschrock * If we are detaching the original disk from a spare, then it implies 19612082Seschrock * that the spare should become a real disk, and be removed from the 19622082Seschrock * active spare list for the pool. 19632082Seschrock */ 19642082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 19652082Seschrock vd->vdev_id == 0) 19662082Seschrock unspare = B_TRUE; 19672082Seschrock 19682082Seschrock /* 1969789Sahrens * Erase the disk labels so the disk can be used for other things. 1970789Sahrens * This must be done after all other error cases are handled, 1971789Sahrens * but before we disembowel vd (so we can still do I/O to it). 1972789Sahrens * But if we can't do it, don't treat the error as fatal -- 1973789Sahrens * it may be that the unwritability of the disk is the reason 1974789Sahrens * it's being detached! 1975789Sahrens */ 19763377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 1977789Sahrens 1978789Sahrens /* 1979789Sahrens * Remove vd from its parent and compact the parent's children. 1980789Sahrens */ 1981789Sahrens vdev_remove_child(pvd, vd); 1982789Sahrens vdev_compact_children(pvd); 1983789Sahrens 1984789Sahrens /* 1985789Sahrens * Remember one of the remaining children so we can get tvd below. 1986789Sahrens */ 1987789Sahrens cvd = pvd->vdev_child[0]; 1988789Sahrens 1989789Sahrens /* 19902082Seschrock * If we need to remove the remaining child from the list of hot spares, 19912082Seschrock * do it now, marking the vdev as no longer a spare in the process. We 19922082Seschrock * must do this before vdev_remove_parent(), because that can change the 19932082Seschrock * GUID if it creates a new toplevel GUID. 19942082Seschrock */ 19952082Seschrock if (unspare) { 19962082Seschrock ASSERT(cvd->vdev_isspare); 19973377Seschrock spa_spare_remove(cvd); 19982082Seschrock unspare_guid = cvd->vdev_guid; 19992082Seschrock } 20002082Seschrock 20012082Seschrock /* 2002789Sahrens * If the parent mirror/replacing vdev only has one child, 2003789Sahrens * the parent is no longer needed. Remove it from the tree. 2004789Sahrens */ 2005789Sahrens if (pvd->vdev_children == 1) 2006789Sahrens vdev_remove_parent(cvd); 2007789Sahrens 2008789Sahrens /* 2009789Sahrens * We don't set tvd until now because the parent we just removed 2010789Sahrens * may have been the previous top-level vdev. 2011789Sahrens */ 2012789Sahrens tvd = cvd->vdev_top; 2013789Sahrens ASSERT(tvd->vdev_parent == rvd); 2014789Sahrens 2015789Sahrens /* 20163377Seschrock * Reevaluate the parent vdev state. 2017789Sahrens */ 2018*4451Seschrock vdev_propagate_state(cvd); 2019789Sahrens 2020789Sahrens /* 20213377Seschrock * If the device we just detached was smaller than the others, it may be 20223377Seschrock * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 20233377Seschrock * can't fail because the existing metaslabs are already in core, so 20243377Seschrock * there's nothing to read from disk. 2025789Sahrens */ 20261732Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 2027789Sahrens 2028789Sahrens vdev_config_dirty(tvd); 2029789Sahrens 2030789Sahrens /* 20313377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 20323377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 20333377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 20343377Seschrock * prevent vd from being accessed after it's freed. 2035789Sahrens */ 2036789Sahrens for (t = 0; t < TXG_SIZE; t++) 2037789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 20381732Sbonwick vd->vdev_detached = B_TRUE; 20391732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 2040789Sahrens 2041*4451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 2042*4451Seschrock 20432082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 20442082Seschrock 20452082Seschrock /* 20463377Seschrock * If this was the removal of the original device in a hot spare vdev, 20473377Seschrock * then we want to go through and remove the device from the hot spare 20483377Seschrock * list of every other pool. 20492082Seschrock */ 20502082Seschrock if (unspare) { 20512082Seschrock spa = NULL; 20522082Seschrock mutex_enter(&spa_namespace_lock); 20532082Seschrock while ((spa = spa_next(spa)) != NULL) { 20542082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 20552082Seschrock continue; 20562082Seschrock 20572082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 20582082Seschrock } 20592082Seschrock mutex_exit(&spa_namespace_lock); 20602082Seschrock } 20612082Seschrock 20622082Seschrock return (error); 20632082Seschrock } 20642082Seschrock 20652082Seschrock /* 20662082Seschrock * Remove a device from the pool. Currently, this supports removing only hot 20672082Seschrock * spares. 20682082Seschrock */ 20692082Seschrock int 20702082Seschrock spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 20712082Seschrock { 20722082Seschrock vdev_t *vd; 20732082Seschrock nvlist_t **spares, *nv, **newspares; 20742082Seschrock uint_t i, j, nspares; 20752082Seschrock int ret = 0; 20762082Seschrock 20772082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 20782082Seschrock 20792082Seschrock vd = spa_lookup_by_guid(spa, guid); 20802082Seschrock 20812082Seschrock nv = NULL; 20822082Seschrock if (spa->spa_spares != NULL && 20832082Seschrock nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 20842082Seschrock &spares, &nspares) == 0) { 20852082Seschrock for (i = 0; i < nspares; i++) { 20862082Seschrock uint64_t theguid; 20872082Seschrock 20882082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 20892082Seschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 20902082Seschrock if (theguid == guid) { 20912082Seschrock nv = spares[i]; 20922082Seschrock break; 20932082Seschrock } 20942082Seschrock } 20952082Seschrock } 20962082Seschrock 20972082Seschrock /* 20982082Seschrock * We only support removing a hot spare, and only if it's not currently 20992082Seschrock * in use in this pool. 21002082Seschrock */ 21012082Seschrock if (nv == NULL && vd == NULL) { 21022082Seschrock ret = ENOENT; 21032082Seschrock goto out; 21042082Seschrock } 21052082Seschrock 21062082Seschrock if (nv == NULL && vd != NULL) { 21072082Seschrock ret = ENOTSUP; 21082082Seschrock goto out; 21092082Seschrock } 21102082Seschrock 21112082Seschrock if (!unspare && nv != NULL && vd != NULL) { 21122082Seschrock ret = EBUSY; 21132082Seschrock goto out; 21142082Seschrock } 21152082Seschrock 21162082Seschrock if (nspares == 1) { 21172082Seschrock newspares = NULL; 21182082Seschrock } else { 21192082Seschrock newspares = kmem_alloc((nspares - 1) * sizeof (void *), 21202082Seschrock KM_SLEEP); 21212082Seschrock for (i = 0, j = 0; i < nspares; i++) { 21222082Seschrock if (spares[i] != nv) 21232082Seschrock VERIFY(nvlist_dup(spares[i], 21242082Seschrock &newspares[j++], KM_SLEEP) == 0); 21252082Seschrock } 21262082Seschrock } 21272082Seschrock 21282082Seschrock VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 21292082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 21302082Seschrock VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, 21312082Seschrock newspares, nspares - 1) == 0); 21322082Seschrock for (i = 0; i < nspares - 1; i++) 21332082Seschrock nvlist_free(newspares[i]); 21342082Seschrock kmem_free(newspares, (nspares - 1) * sizeof (void *)); 21352082Seschrock spa_load_spares(spa); 21362082Seschrock spa->spa_sync_spares = B_TRUE; 21372082Seschrock 21382082Seschrock out: 21392082Seschrock spa_config_exit(spa, FTAG); 21402082Seschrock 21412082Seschrock return (ret); 2142789Sahrens } 2143789Sahrens 2144789Sahrens /* 2145*4451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 2146*4451Seschrock * current spared, so we can detach it. 2147789Sahrens */ 21481544Seschrock static vdev_t * 2149*4451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 2150789Sahrens { 21511544Seschrock vdev_t *newvd, *oldvd; 2152789Sahrens int c; 2153789Sahrens 21541544Seschrock for (c = 0; c < vd->vdev_children; c++) { 2155*4451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 21561544Seschrock if (oldvd != NULL) 21571544Seschrock return (oldvd); 21581544Seschrock } 2159789Sahrens 2160*4451Seschrock /* 2161*4451Seschrock * Check for a completed replacement. 2162*4451Seschrock */ 2163789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 21641544Seschrock oldvd = vd->vdev_child[0]; 21651544Seschrock newvd = vd->vdev_child[1]; 2166789Sahrens 21671544Seschrock mutex_enter(&newvd->vdev_dtl_lock); 21681544Seschrock if (newvd->vdev_dtl_map.sm_space == 0 && 21691544Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 21701544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 21711544Seschrock return (oldvd); 21721544Seschrock } 21731544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 21741544Seschrock } 2175789Sahrens 2176*4451Seschrock /* 2177*4451Seschrock * Check for a completed resilver with the 'unspare' flag set. 2178*4451Seschrock */ 2179*4451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 2180*4451Seschrock newvd = vd->vdev_child[0]; 2181*4451Seschrock oldvd = vd->vdev_child[1]; 2182*4451Seschrock 2183*4451Seschrock mutex_enter(&newvd->vdev_dtl_lock); 2184*4451Seschrock if (newvd->vdev_unspare && 2185*4451Seschrock newvd->vdev_dtl_map.sm_space == 0 && 2186*4451Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 2187*4451Seschrock newvd->vdev_unspare = 0; 2188*4451Seschrock mutex_exit(&newvd->vdev_dtl_lock); 2189*4451Seschrock return (oldvd); 2190*4451Seschrock } 2191*4451Seschrock mutex_exit(&newvd->vdev_dtl_lock); 2192*4451Seschrock } 2193*4451Seschrock 21941544Seschrock return (NULL); 2195789Sahrens } 2196789Sahrens 21971544Seschrock static void 2198*4451Seschrock spa_vdev_resilver_done(spa_t *spa) 2199789Sahrens { 22001544Seschrock vdev_t *vd; 22012082Seschrock vdev_t *pvd; 22021544Seschrock uint64_t guid; 22032082Seschrock uint64_t pguid = 0; 2204789Sahrens 22051544Seschrock spa_config_enter(spa, RW_READER, FTAG); 2206789Sahrens 2207*4451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 22081544Seschrock guid = vd->vdev_guid; 22092082Seschrock /* 22102082Seschrock * If we have just finished replacing a hot spared device, then 22112082Seschrock * we need to detach the parent's first child (the original hot 22122082Seschrock * spare) as well. 22132082Seschrock */ 22142082Seschrock pvd = vd->vdev_parent; 22152082Seschrock if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && 22162082Seschrock pvd->vdev_id == 0) { 22172082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 22182082Seschrock ASSERT(pvd->vdev_parent->vdev_children == 2); 22192082Seschrock pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; 22202082Seschrock } 22211544Seschrock spa_config_exit(spa, FTAG); 22221544Seschrock if (spa_vdev_detach(spa, guid, B_TRUE) != 0) 22231544Seschrock return; 22242082Seschrock if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) 22252082Seschrock return; 22261544Seschrock spa_config_enter(spa, RW_READER, FTAG); 2227789Sahrens } 2228789Sahrens 22291544Seschrock spa_config_exit(spa, FTAG); 2230789Sahrens } 2231789Sahrens 2232789Sahrens /* 22331354Seschrock * Update the stored path for this vdev. Dirty the vdev configuration, relying 22341354Seschrock * on spa_vdev_enter/exit() to synchronize the labels and cache. 22351354Seschrock */ 22361354Seschrock int 22371354Seschrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 22381354Seschrock { 22391354Seschrock vdev_t *rvd, *vd; 22401354Seschrock uint64_t txg; 22411354Seschrock 22421354Seschrock rvd = spa->spa_root_vdev; 22431354Seschrock 22441354Seschrock txg = spa_vdev_enter(spa); 22451354Seschrock 22462082Seschrock if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 22472082Seschrock /* 22482082Seschrock * Determine if this is a reference to a hot spare. In that 22492082Seschrock * case, update the path as stored in the spare list. 22502082Seschrock */ 22512082Seschrock nvlist_t **spares; 22522082Seschrock uint_t i, nspares; 22532082Seschrock if (spa->spa_sparelist != NULL) { 22542082Seschrock VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, 22552082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 22562082Seschrock for (i = 0; i < nspares; i++) { 22572082Seschrock uint64_t theguid; 22582082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 22592082Seschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 22602082Seschrock if (theguid == guid) 22612082Seschrock break; 22622082Seschrock } 22632082Seschrock 22642082Seschrock if (i == nspares) 22652082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 22662082Seschrock 22672082Seschrock VERIFY(nvlist_add_string(spares[i], 22682082Seschrock ZPOOL_CONFIG_PATH, newpath) == 0); 22692082Seschrock spa_load_spares(spa); 22702082Seschrock spa->spa_sync_spares = B_TRUE; 22712082Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 22722082Seschrock } else { 22732082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 22742082Seschrock } 22752082Seschrock } 22761354Seschrock 22771585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 22781585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 22791585Sbonwick 22801354Seschrock spa_strfree(vd->vdev_path); 22811354Seschrock vd->vdev_path = spa_strdup(newpath); 22821354Seschrock 22831354Seschrock vdev_config_dirty(vd->vdev_top); 22841354Seschrock 22851354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 22861354Seschrock } 22871354Seschrock 22881354Seschrock /* 2289789Sahrens * ========================================================================== 2290789Sahrens * SPA Scrubbing 2291789Sahrens * ========================================================================== 2292789Sahrens */ 2293789Sahrens 2294789Sahrens static void 2295789Sahrens spa_scrub_io_done(zio_t *zio) 2296789Sahrens { 2297789Sahrens spa_t *spa = zio->io_spa; 2298789Sahrens 22994309Smaybee arc_data_buf_free(zio->io_data, zio->io_size); 2300789Sahrens 2301789Sahrens mutex_enter(&spa->spa_scrub_lock); 23021544Seschrock if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 23031775Sbillm vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; 2304789Sahrens spa->spa_scrub_errors++; 2305789Sahrens mutex_enter(&vd->vdev_stat_lock); 2306789Sahrens vd->vdev_stat.vs_scrub_errors++; 2307789Sahrens mutex_exit(&vd->vdev_stat_lock); 2308789Sahrens } 23093697Smishra 23103697Smishra if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight) 23111544Seschrock cv_broadcast(&spa->spa_scrub_io_cv); 23123697Smishra 23133697Smishra ASSERT(spa->spa_scrub_inflight >= 0); 23143697Smishra 23151544Seschrock mutex_exit(&spa->spa_scrub_lock); 2316789Sahrens } 2317789Sahrens 2318789Sahrens static void 23191544Seschrock spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, 23201544Seschrock zbookmark_t *zb) 2321789Sahrens { 2322789Sahrens size_t size = BP_GET_LSIZE(bp); 23233697Smishra void *data; 2324789Sahrens 2325789Sahrens mutex_enter(&spa->spa_scrub_lock); 23263697Smishra /* 23273697Smishra * Do not give too much work to vdev(s). 23283697Smishra */ 23293697Smishra while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) { 23303697Smishra cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 23313697Smishra } 2332789Sahrens spa->spa_scrub_inflight++; 2333789Sahrens mutex_exit(&spa->spa_scrub_lock); 2334789Sahrens 23354309Smaybee data = arc_data_buf_alloc(size); 23363697Smishra 23371544Seschrock if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) 23381544Seschrock flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ 23391544Seschrock 23401807Sbonwick flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; 23411544Seschrock 2342789Sahrens zio_nowait(zio_read(NULL, spa, bp, data, size, 23431544Seschrock spa_scrub_io_done, NULL, priority, flags, zb)); 2344789Sahrens } 2345789Sahrens 2346789Sahrens /* ARGSUSED */ 2347789Sahrens static int 2348789Sahrens spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 2349789Sahrens { 2350789Sahrens blkptr_t *bp = &bc->bc_blkptr; 23511775Sbillm vdev_t *vd = spa->spa_root_vdev; 23521775Sbillm dva_t *dva = bp->blk_dva; 23531775Sbillm int needs_resilver = B_FALSE; 23541775Sbillm int d; 2355789Sahrens 23561775Sbillm if (bc->bc_errno) { 2357789Sahrens /* 2358789Sahrens * We can't scrub this block, but we can continue to scrub 2359789Sahrens * the rest of the pool. Note the error and move along. 2360789Sahrens */ 2361789Sahrens mutex_enter(&spa->spa_scrub_lock); 2362789Sahrens spa->spa_scrub_errors++; 2363789Sahrens mutex_exit(&spa->spa_scrub_lock); 2364789Sahrens 23651775Sbillm mutex_enter(&vd->vdev_stat_lock); 23661775Sbillm vd->vdev_stat.vs_scrub_errors++; 23671775Sbillm mutex_exit(&vd->vdev_stat_lock); 2368789Sahrens 2369789Sahrens return (ERESTART); 2370789Sahrens } 2371789Sahrens 2372789Sahrens ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); 2373789Sahrens 23741775Sbillm for (d = 0; d < BP_GET_NDVAS(bp); d++) { 23751775Sbillm vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); 23761775Sbillm 23771775Sbillm ASSERT(vd != NULL); 23781775Sbillm 23791775Sbillm /* 23801775Sbillm * Keep track of how much data we've examined so that 23811775Sbillm * zpool(1M) status can make useful progress reports. 23821775Sbillm */ 23831775Sbillm mutex_enter(&vd->vdev_stat_lock); 23841775Sbillm vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); 23851775Sbillm mutex_exit(&vd->vdev_stat_lock); 2386789Sahrens 23871775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { 23881775Sbillm if (DVA_GET_GANG(&dva[d])) { 23891775Sbillm /* 23901775Sbillm * Gang members may be spread across multiple 23911775Sbillm * vdevs, so the best we can do is look at the 23921775Sbillm * pool-wide DTL. 23931775Sbillm * XXX -- it would be better to change our 23941775Sbillm * allocation policy to ensure that this can't 23951775Sbillm * happen. 23961775Sbillm */ 23971775Sbillm vd = spa->spa_root_vdev; 23981775Sbillm } 23991775Sbillm if (vdev_dtl_contains(&vd->vdev_dtl_map, 24001775Sbillm bp->blk_birth, 1)) 24011775Sbillm needs_resilver = B_TRUE; 2402789Sahrens } 24031775Sbillm } 24041775Sbillm 24051775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) 2406789Sahrens spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, 24071544Seschrock ZIO_FLAG_SCRUB, &bc->bc_bookmark); 24081775Sbillm else if (needs_resilver) 24091775Sbillm spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, 24101775Sbillm ZIO_FLAG_RESILVER, &bc->bc_bookmark); 2411789Sahrens 2412789Sahrens return (0); 2413789Sahrens } 2414789Sahrens 2415789Sahrens static void 2416789Sahrens spa_scrub_thread(spa_t *spa) 2417789Sahrens { 2418789Sahrens callb_cpr_t cprinfo; 2419789Sahrens traverse_handle_t *th = spa->spa_scrub_th; 2420789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2421789Sahrens pool_scrub_type_t scrub_type = spa->spa_scrub_type; 2422789Sahrens int error = 0; 2423789Sahrens boolean_t complete; 2424789Sahrens 2425789Sahrens CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); 2426789Sahrens 2427797Sbonwick /* 2428797Sbonwick * If we're restarting due to a snapshot create/delete, 2429797Sbonwick * wait for that to complete. 2430797Sbonwick */ 2431797Sbonwick txg_wait_synced(spa_get_dsl(spa), 0); 2432797Sbonwick 24331544Seschrock dprintf("start %s mintxg=%llu maxtxg=%llu\n", 24341544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 24351544Seschrock spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); 24361544Seschrock 24371544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 24381544Seschrock vdev_reopen(rvd); /* purge all vdev caches */ 2439789Sahrens vdev_config_dirty(rvd); /* rewrite all disk labels */ 2440789Sahrens vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); 24411544Seschrock spa_config_exit(spa, FTAG); 2442789Sahrens 2443789Sahrens mutex_enter(&spa->spa_scrub_lock); 2444789Sahrens spa->spa_scrub_errors = 0; 2445789Sahrens spa->spa_scrub_active = 1; 24461544Seschrock ASSERT(spa->spa_scrub_inflight == 0); 2447789Sahrens 2448789Sahrens while (!spa->spa_scrub_stop) { 2449789Sahrens CALLB_CPR_SAFE_BEGIN(&cprinfo); 24501544Seschrock while (spa->spa_scrub_suspended) { 2451789Sahrens spa->spa_scrub_active = 0; 2452789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2453789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 2454789Sahrens spa->spa_scrub_active = 1; 2455789Sahrens } 2456789Sahrens CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); 2457789Sahrens 2458789Sahrens if (spa->spa_scrub_restart_txg != 0) 2459789Sahrens break; 2460789Sahrens 2461789Sahrens mutex_exit(&spa->spa_scrub_lock); 2462789Sahrens error = traverse_more(th); 2463789Sahrens mutex_enter(&spa->spa_scrub_lock); 2464789Sahrens if (error != EAGAIN) 2465789Sahrens break; 2466789Sahrens } 2467789Sahrens 2468789Sahrens while (spa->spa_scrub_inflight) 2469789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 2470789Sahrens 24711601Sbonwick spa->spa_scrub_active = 0; 24721601Sbonwick cv_broadcast(&spa->spa_scrub_cv); 24731601Sbonwick 24741601Sbonwick mutex_exit(&spa->spa_scrub_lock); 24751601Sbonwick 24761601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 24771601Sbonwick 24781601Sbonwick mutex_enter(&spa->spa_scrub_lock); 24791601Sbonwick 24801601Sbonwick /* 24811601Sbonwick * Note: we check spa_scrub_restart_txg under both spa_scrub_lock 24821601Sbonwick * AND the spa config lock to synchronize with any config changes 24831601Sbonwick * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). 24841601Sbonwick */ 2485789Sahrens if (spa->spa_scrub_restart_txg != 0) 2486789Sahrens error = ERESTART; 2487789Sahrens 24881544Seschrock if (spa->spa_scrub_stop) 24891544Seschrock error = EINTR; 24901544Seschrock 2491789Sahrens /* 24921544Seschrock * Even if there were uncorrectable errors, we consider the scrub 24931544Seschrock * completed. The downside is that if there is a transient error during 24941544Seschrock * a resilver, we won't resilver the data properly to the target. But 24951544Seschrock * if the damage is permanent (more likely) we will resilver forever, 24961544Seschrock * which isn't really acceptable. Since there is enough information for 24971544Seschrock * the user to know what has failed and why, this seems like a more 24981544Seschrock * tractable approach. 2499789Sahrens */ 25001544Seschrock complete = (error == 0); 2501789Sahrens 25021544Seschrock dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", 25031544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 2504789Sahrens spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", 2505789Sahrens error, spa->spa_scrub_errors, spa->spa_scrub_stop); 2506789Sahrens 2507789Sahrens mutex_exit(&spa->spa_scrub_lock); 2508789Sahrens 2509789Sahrens /* 2510789Sahrens * If the scrub/resilver completed, update all DTLs to reflect this. 2511789Sahrens * Whether it succeeded or not, vacate all temporary scrub DTLs. 2512789Sahrens */ 2513789Sahrens vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, 2514789Sahrens complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); 2515789Sahrens vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); 25161544Seschrock spa_errlog_rotate(spa); 25171601Sbonwick 2518*4451Seschrock if (scrub_type == POOL_SCRUB_RESILVER && complete) 2519*4451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH); 2520*4451Seschrock 25211544Seschrock spa_config_exit(spa, FTAG); 2522789Sahrens 2523789Sahrens mutex_enter(&spa->spa_scrub_lock); 2524789Sahrens 25251544Seschrock /* 25261544Seschrock * We may have finished replacing a device. 25271544Seschrock * Let the async thread assess this and handle the detach. 25281544Seschrock */ 2529*4451Seschrock spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 2530789Sahrens 2531789Sahrens /* 2532789Sahrens * If we were told to restart, our final act is to start a new scrub. 2533789Sahrens */ 2534789Sahrens if (error == ERESTART) 25351544Seschrock spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? 25361544Seschrock SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); 2537789Sahrens 25381544Seschrock spa->spa_scrub_type = POOL_SCRUB_NONE; 25391544Seschrock spa->spa_scrub_active = 0; 25401544Seschrock spa->spa_scrub_thread = NULL; 25411544Seschrock cv_broadcast(&spa->spa_scrub_cv); 2542789Sahrens CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ 2543789Sahrens thread_exit(); 2544789Sahrens } 2545789Sahrens 2546789Sahrens void 2547789Sahrens spa_scrub_suspend(spa_t *spa) 2548789Sahrens { 2549789Sahrens mutex_enter(&spa->spa_scrub_lock); 25501544Seschrock spa->spa_scrub_suspended++; 2551789Sahrens while (spa->spa_scrub_active) { 2552789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2553789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 2554789Sahrens } 2555789Sahrens while (spa->spa_scrub_inflight) 2556789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 2557789Sahrens mutex_exit(&spa->spa_scrub_lock); 2558789Sahrens } 2559789Sahrens 2560789Sahrens void 2561789Sahrens spa_scrub_resume(spa_t *spa) 2562789Sahrens { 2563789Sahrens mutex_enter(&spa->spa_scrub_lock); 25641544Seschrock ASSERT(spa->spa_scrub_suspended != 0); 25651544Seschrock if (--spa->spa_scrub_suspended == 0) 2566789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2567789Sahrens mutex_exit(&spa->spa_scrub_lock); 2568789Sahrens } 2569789Sahrens 2570789Sahrens void 2571789Sahrens spa_scrub_restart(spa_t *spa, uint64_t txg) 2572789Sahrens { 2573789Sahrens /* 2574789Sahrens * Something happened (e.g. snapshot create/delete) that means 2575789Sahrens * we must restart any in-progress scrubs. The itinerary will 2576789Sahrens * fix this properly. 2577789Sahrens */ 2578789Sahrens mutex_enter(&spa->spa_scrub_lock); 2579789Sahrens spa->spa_scrub_restart_txg = txg; 2580789Sahrens mutex_exit(&spa->spa_scrub_lock); 2581789Sahrens } 2582789Sahrens 25831544Seschrock int 25841544Seschrock spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) 2585789Sahrens { 2586789Sahrens space_seg_t *ss; 2587789Sahrens uint64_t mintxg, maxtxg; 2588789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2589789Sahrens 2590789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 2591789Sahrens return (ENOTSUP); 2592789Sahrens 25931544Seschrock mutex_enter(&spa->spa_scrub_lock); 25941544Seschrock 2595789Sahrens /* 2596789Sahrens * If there's a scrub or resilver already in progress, stop it. 2597789Sahrens */ 2598789Sahrens while (spa->spa_scrub_thread != NULL) { 2599789Sahrens /* 2600789Sahrens * Don't stop a resilver unless forced. 2601789Sahrens */ 26021544Seschrock if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { 26031544Seschrock mutex_exit(&spa->spa_scrub_lock); 2604789Sahrens return (EBUSY); 26051544Seschrock } 2606789Sahrens spa->spa_scrub_stop = 1; 2607789Sahrens cv_broadcast(&spa->spa_scrub_cv); 2608789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 2609789Sahrens } 2610789Sahrens 2611789Sahrens /* 2612789Sahrens * Terminate the previous traverse. 2613789Sahrens */ 2614789Sahrens if (spa->spa_scrub_th != NULL) { 2615789Sahrens traverse_fini(spa->spa_scrub_th); 2616789Sahrens spa->spa_scrub_th = NULL; 2617789Sahrens } 2618789Sahrens 26191544Seschrock if (rvd == NULL) { 26201544Seschrock ASSERT(spa->spa_scrub_stop == 0); 26211544Seschrock ASSERT(spa->spa_scrub_type == type); 26221544Seschrock ASSERT(spa->spa_scrub_restart_txg == 0); 26231544Seschrock mutex_exit(&spa->spa_scrub_lock); 26241544Seschrock return (0); 26251544Seschrock } 2626789Sahrens 2627789Sahrens mintxg = TXG_INITIAL - 1; 2628789Sahrens maxtxg = spa_last_synced_txg(spa) + 1; 2629789Sahrens 26301544Seschrock mutex_enter(&rvd->vdev_dtl_lock); 2631789Sahrens 26321544Seschrock if (rvd->vdev_dtl_map.sm_space == 0) { 26331544Seschrock /* 26341544Seschrock * The pool-wide DTL is empty. 26351732Sbonwick * If this is a resilver, there's nothing to do except 26361732Sbonwick * check whether any in-progress replacements have completed. 26371544Seschrock */ 26381732Sbonwick if (type == POOL_SCRUB_RESILVER) { 26391544Seschrock type = POOL_SCRUB_NONE; 2640*4451Seschrock spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 26411732Sbonwick } 26421544Seschrock } else { 26431544Seschrock /* 26441544Seschrock * The pool-wide DTL is non-empty. 26451544Seschrock * If this is a normal scrub, upgrade to a resilver instead. 26461544Seschrock */ 26471544Seschrock if (type == POOL_SCRUB_EVERYTHING) 26481544Seschrock type = POOL_SCRUB_RESILVER; 26491544Seschrock } 2650789Sahrens 26511544Seschrock if (type == POOL_SCRUB_RESILVER) { 2652789Sahrens /* 2653789Sahrens * Determine the resilvering boundaries. 2654789Sahrens * 2655789Sahrens * Note: (mintxg, maxtxg) is an open interval, 2656789Sahrens * i.e. mintxg and maxtxg themselves are not included. 2657789Sahrens * 2658789Sahrens * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 2659789Sahrens * so we don't claim to resilver a txg that's still changing. 2660789Sahrens */ 2661789Sahrens ss = avl_first(&rvd->vdev_dtl_map.sm_root); 26621544Seschrock mintxg = ss->ss_start - 1; 2663789Sahrens ss = avl_last(&rvd->vdev_dtl_map.sm_root); 26641544Seschrock maxtxg = MIN(ss->ss_end, maxtxg); 2665*4451Seschrock 2666*4451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); 2667789Sahrens } 2668789Sahrens 26691544Seschrock mutex_exit(&rvd->vdev_dtl_lock); 26701544Seschrock 26711544Seschrock spa->spa_scrub_stop = 0; 26721544Seschrock spa->spa_scrub_type = type; 26731544Seschrock spa->spa_scrub_restart_txg = 0; 26741544Seschrock 26751544Seschrock if (type != POOL_SCRUB_NONE) { 26761544Seschrock spa->spa_scrub_mintxg = mintxg; 2677789Sahrens spa->spa_scrub_maxtxg = maxtxg; 2678789Sahrens spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, 26791635Sbonwick ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, 26801635Sbonwick ZIO_FLAG_CANFAIL); 2681789Sahrens traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); 2682789Sahrens spa->spa_scrub_thread = thread_create(NULL, 0, 2683789Sahrens spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); 2684789Sahrens } 2685789Sahrens 26861544Seschrock mutex_exit(&spa->spa_scrub_lock); 26871544Seschrock 2688789Sahrens return (0); 2689789Sahrens } 2690789Sahrens 26911544Seschrock /* 26921544Seschrock * ========================================================================== 26931544Seschrock * SPA async task processing 26941544Seschrock * ========================================================================== 26951544Seschrock */ 26961544Seschrock 26971544Seschrock static void 2698*4451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 2699789Sahrens { 27001544Seschrock vdev_t *tvd; 27011544Seschrock int c; 27021544Seschrock 2703*4451Seschrock for (c = 0; c < vd->vdev_children; c++) { 2704*4451Seschrock tvd = vd->vdev_child[c]; 2705*4451Seschrock if (tvd->vdev_remove_wanted) { 2706*4451Seschrock tvd->vdev_remove_wanted = 0; 2707*4451Seschrock vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, 2708*4451Seschrock VDEV_AUX_NONE); 2709*4451Seschrock vdev_clear(spa, tvd); 2710*4451Seschrock vdev_config_dirty(tvd->vdev_top); 27111544Seschrock } 2712*4451Seschrock spa_async_remove(spa, tvd); 27131544Seschrock } 27141544Seschrock } 27151544Seschrock 27161544Seschrock static void 27171544Seschrock spa_async_thread(spa_t *spa) 27181544Seschrock { 27191544Seschrock int tasks; 2720*4451Seschrock uint64_t txg; 27211544Seschrock 27221544Seschrock ASSERT(spa->spa_sync_on); 2723789Sahrens 27241544Seschrock mutex_enter(&spa->spa_async_lock); 27251544Seschrock tasks = spa->spa_async_tasks; 27261544Seschrock spa->spa_async_tasks = 0; 27271544Seschrock mutex_exit(&spa->spa_async_lock); 27281544Seschrock 27291544Seschrock /* 27301635Sbonwick * See if the config needs to be updated. 27311635Sbonwick */ 27321635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 27331635Sbonwick mutex_enter(&spa_namespace_lock); 27341635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 27351635Sbonwick mutex_exit(&spa_namespace_lock); 27361635Sbonwick } 27371635Sbonwick 27381635Sbonwick /* 2739*4451Seschrock * See if any devices need to be marked REMOVED. 27401544Seschrock */ 2741*4451Seschrock if (tasks & SPA_ASYNC_REMOVE) { 2742*4451Seschrock txg = spa_vdev_enter(spa); 2743*4451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 2744*4451Seschrock (void) spa_vdev_exit(spa, NULL, txg, 0); 2745*4451Seschrock } 27461544Seschrock 27471544Seschrock /* 27481544Seschrock * If any devices are done replacing, detach them. 27491544Seschrock */ 2750*4451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 2751*4451Seschrock spa_vdev_resilver_done(spa); 2752789Sahrens 27531544Seschrock /* 2754*4451Seschrock * Kick off a scrub. When starting a RESILVER scrub (or an EVERYTHING 2755*4451Seschrock * scrub which can become a resilver), we need to hold 2756*4451Seschrock * spa_namespace_lock() because the sysevent we post via 2757*4451Seschrock * spa_event_notify() needs to get the name of the pool. 27581544Seschrock */ 2759*4451Seschrock if (tasks & SPA_ASYNC_SCRUB) { 2760*4451Seschrock mutex_enter(&spa_namespace_lock); 27611544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); 2762*4451Seschrock mutex_exit(&spa_namespace_lock); 2763*4451Seschrock } 27641544Seschrock 27651544Seschrock /* 27661544Seschrock * Kick off a resilver. 27671544Seschrock */ 2768*4451Seschrock if (tasks & SPA_ASYNC_RESILVER) { 2769*4451Seschrock mutex_enter(&spa_namespace_lock); 27701544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 2771*4451Seschrock mutex_exit(&spa_namespace_lock); 2772*4451Seschrock } 27731544Seschrock 27741544Seschrock /* 27751544Seschrock * Let the world know that we're done. 27761544Seschrock */ 27771544Seschrock mutex_enter(&spa->spa_async_lock); 27781544Seschrock spa->spa_async_thread = NULL; 27791544Seschrock cv_broadcast(&spa->spa_async_cv); 27801544Seschrock mutex_exit(&spa->spa_async_lock); 27811544Seschrock thread_exit(); 27821544Seschrock } 27831544Seschrock 27841544Seschrock void 27851544Seschrock spa_async_suspend(spa_t *spa) 27861544Seschrock { 27871544Seschrock mutex_enter(&spa->spa_async_lock); 27881544Seschrock spa->spa_async_suspended++; 27891544Seschrock while (spa->spa_async_thread != NULL) 27901544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 27911544Seschrock mutex_exit(&spa->spa_async_lock); 27921544Seschrock } 27931544Seschrock 27941544Seschrock void 27951544Seschrock spa_async_resume(spa_t *spa) 27961544Seschrock { 27971544Seschrock mutex_enter(&spa->spa_async_lock); 27981544Seschrock ASSERT(spa->spa_async_suspended != 0); 27991544Seschrock spa->spa_async_suspended--; 28001544Seschrock mutex_exit(&spa->spa_async_lock); 28011544Seschrock } 28021544Seschrock 28031544Seschrock static void 28041544Seschrock spa_async_dispatch(spa_t *spa) 28051544Seschrock { 28061544Seschrock mutex_enter(&spa->spa_async_lock); 28071544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 28081635Sbonwick spa->spa_async_thread == NULL && 28091635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 28101544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 28111544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 28121544Seschrock mutex_exit(&spa->spa_async_lock); 28131544Seschrock } 28141544Seschrock 28151544Seschrock void 28161544Seschrock spa_async_request(spa_t *spa, int task) 28171544Seschrock { 28181544Seschrock mutex_enter(&spa->spa_async_lock); 28191544Seschrock spa->spa_async_tasks |= task; 28201544Seschrock mutex_exit(&spa->spa_async_lock); 2821789Sahrens } 2822789Sahrens 2823789Sahrens /* 2824789Sahrens * ========================================================================== 2825789Sahrens * SPA syncing routines 2826789Sahrens * ========================================================================== 2827789Sahrens */ 2828789Sahrens 2829789Sahrens static void 2830789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 2831789Sahrens { 2832789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 2833789Sahrens dmu_tx_t *tx; 2834789Sahrens blkptr_t blk; 2835789Sahrens uint64_t itor = 0; 2836789Sahrens zio_t *zio; 2837789Sahrens int error; 2838789Sahrens uint8_t c = 1; 2839789Sahrens 2840789Sahrens zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); 2841789Sahrens 2842789Sahrens while (bplist_iterate(bpl, &itor, &blk) == 0) 2843789Sahrens zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); 2844789Sahrens 2845789Sahrens error = zio_wait(zio); 2846789Sahrens ASSERT3U(error, ==, 0); 2847789Sahrens 2848789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 2849789Sahrens bplist_vacate(bpl, tx); 2850789Sahrens 2851789Sahrens /* 2852789Sahrens * Pre-dirty the first block so we sync to convergence faster. 2853789Sahrens * (Usually only the first block is needed.) 2854789Sahrens */ 2855789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 2856789Sahrens dmu_tx_commit(tx); 2857789Sahrens } 2858789Sahrens 2859789Sahrens static void 28602082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 28612082Seschrock { 28622082Seschrock char *packed = NULL; 28632082Seschrock size_t nvsize = 0; 28642082Seschrock dmu_buf_t *db; 28652082Seschrock 28662082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 28672082Seschrock 28682082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 28692082Seschrock 28702082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 28712082Seschrock KM_SLEEP) == 0); 28722082Seschrock 28732082Seschrock dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); 28742082Seschrock 28752082Seschrock kmem_free(packed, nvsize); 28762082Seschrock 28772082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 28782082Seschrock dmu_buf_will_dirty(db, tx); 28792082Seschrock *(uint64_t *)db->db_data = nvsize; 28802082Seschrock dmu_buf_rele(db, FTAG); 28812082Seschrock } 28822082Seschrock 28832082Seschrock static void 28842082Seschrock spa_sync_spares(spa_t *spa, dmu_tx_t *tx) 28852082Seschrock { 28862082Seschrock nvlist_t *nvroot; 28872082Seschrock nvlist_t **spares; 28882082Seschrock int i; 28892082Seschrock 28902082Seschrock if (!spa->spa_sync_spares) 28912082Seschrock return; 28922082Seschrock 28932082Seschrock /* 28942082Seschrock * Update the MOS nvlist describing the list of available spares. 28952082Seschrock * spa_validate_spares() will have already made sure this nvlist is 2896*4451Seschrock * valid and the vdevs are labeled appropriately. 28972082Seschrock */ 28982082Seschrock if (spa->spa_spares_object == 0) { 28992082Seschrock spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset, 29002082Seschrock DMU_OT_PACKED_NVLIST, 1 << 14, 29012082Seschrock DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 29022082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 29032082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES, 29042082Seschrock sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0); 29052082Seschrock } 29062082Seschrock 29072082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 29082082Seschrock if (spa->spa_nspares == 0) { 29092082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 29102082Seschrock NULL, 0) == 0); 29112082Seschrock } else { 29122082Seschrock spares = kmem_alloc(spa->spa_nspares * sizeof (void *), 29132082Seschrock KM_SLEEP); 29142082Seschrock for (i = 0; i < spa->spa_nspares; i++) 29152082Seschrock spares[i] = vdev_config_generate(spa, 29162082Seschrock spa->spa_spares[i], B_FALSE, B_TRUE); 29172082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 29182082Seschrock spares, spa->spa_nspares) == 0); 29192082Seschrock for (i = 0; i < spa->spa_nspares; i++) 29202082Seschrock nvlist_free(spares[i]); 29212082Seschrock kmem_free(spares, spa->spa_nspares * sizeof (void *)); 29222082Seschrock } 29232082Seschrock 29242082Seschrock spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx); 29252926Sek110237 nvlist_free(nvroot); 29262082Seschrock 29272082Seschrock spa->spa_sync_spares = B_FALSE; 29282082Seschrock } 29292082Seschrock 29302082Seschrock static void 2931789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 2932789Sahrens { 2933789Sahrens nvlist_t *config; 2934789Sahrens 2935789Sahrens if (list_is_empty(&spa->spa_dirty_list)) 2936789Sahrens return; 2937789Sahrens 2938789Sahrens config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); 2939789Sahrens 29401635Sbonwick if (spa->spa_config_syncing) 29411635Sbonwick nvlist_free(spa->spa_config_syncing); 29421635Sbonwick spa->spa_config_syncing = config; 2943789Sahrens 29442082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 2945789Sahrens } 2946789Sahrens 29473912Slling static void 29483912Slling spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) 29493912Slling { 29503912Slling spa_t *spa = arg1; 29513912Slling nvlist_t *nvp = arg2; 29523912Slling nvpair_t *nvpair; 29533912Slling objset_t *mos = spa->spa_meta_objset; 29543912Slling uint64_t zapobj; 2955*4451Seschrock uint64_t intval; 29563912Slling 29573912Slling mutex_enter(&spa->spa_props_lock); 29583912Slling if (spa->spa_pool_props_object == 0) { 29593912Slling zapobj = zap_create(mos, DMU_OT_POOL_PROPS, DMU_OT_NONE, 0, tx); 29603912Slling VERIFY(zapobj > 0); 29613912Slling 29623912Slling spa->spa_pool_props_object = zapobj; 29633912Slling 29643912Slling VERIFY(zap_update(mos, DMU_POOL_DIRECTORY_OBJECT, 29653912Slling DMU_POOL_PROPS, 8, 1, 29663912Slling &spa->spa_pool_props_object, tx) == 0); 29673912Slling } 29683912Slling mutex_exit(&spa->spa_props_lock); 29693912Slling 29703912Slling nvpair = NULL; 29713912Slling while ((nvpair = nvlist_next_nvpair(nvp, nvpair))) { 29723912Slling switch (zpool_name_to_prop(nvpair_name(nvpair))) { 2973*4451Seschrock case ZPOOL_PROP_BOOTFS: 29743912Slling VERIFY(nvlist_lookup_uint64(nvp, 29753912Slling nvpair_name(nvpair), &spa->spa_bootfs) == 0); 29763912Slling VERIFY(zap_update(mos, 29773912Slling spa->spa_pool_props_object, 2978*4451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 8, 1, 29793912Slling &spa->spa_bootfs, tx) == 0); 29803912Slling break; 2981*4451Seschrock 2982*4451Seschrock case ZPOOL_PROP_AUTOREPLACE: 2983*4451Seschrock VERIFY(nvlist_lookup_uint64(nvp, 2984*4451Seschrock nvpair_name(nvpair), &intval) == 0); 2985*4451Seschrock VERIFY(zap_update(mos, 2986*4451Seschrock spa->spa_pool_props_object, 2987*4451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 8, 1, 2988*4451Seschrock &intval, tx) == 0); 2989*4451Seschrock break; 29903912Slling } 29913912Slling } 29923912Slling } 29933912Slling 2994789Sahrens /* 2995789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 2996789Sahrens * part of the process, so we iterate until it converges. 2997789Sahrens */ 2998789Sahrens void 2999789Sahrens spa_sync(spa_t *spa, uint64_t txg) 3000789Sahrens { 3001789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 3002789Sahrens objset_t *mos = spa->spa_meta_objset; 3003789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 30041635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 3005789Sahrens vdev_t *vd; 3006789Sahrens dmu_tx_t *tx; 3007789Sahrens int dirty_vdevs; 3008789Sahrens 3009789Sahrens /* 3010789Sahrens * Lock out configuration changes. 3011789Sahrens */ 30121544Seschrock spa_config_enter(spa, RW_READER, FTAG); 3013789Sahrens 3014789Sahrens spa->spa_syncing_txg = txg; 3015789Sahrens spa->spa_sync_pass = 0; 3016789Sahrens 30171544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 3018789Sahrens 30192082Seschrock tx = dmu_tx_create_assigned(dp, txg); 30202082Seschrock 30212082Seschrock /* 30222082Seschrock * If we are upgrading to ZFS_VERSION_RAIDZ_DEFLATE this txg, 30232082Seschrock * set spa_deflate if we have no raid-z vdevs. 30242082Seschrock */ 30252082Seschrock if (spa->spa_ubsync.ub_version < ZFS_VERSION_RAIDZ_DEFLATE && 30262082Seschrock spa->spa_uberblock.ub_version >= ZFS_VERSION_RAIDZ_DEFLATE) { 30272082Seschrock int i; 30282082Seschrock 30292082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 30302082Seschrock vd = rvd->vdev_child[i]; 30312082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 30322082Seschrock break; 30332082Seschrock } 30342082Seschrock if (i == rvd->vdev_children) { 30352082Seschrock spa->spa_deflate = TRUE; 30362082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 30372082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 30382082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 30392082Seschrock } 30402082Seschrock } 30412082Seschrock 3042789Sahrens /* 3043789Sahrens * If anything has changed in this txg, push the deferred frees 3044789Sahrens * from the previous txg. If not, leave them alone so that we 3045789Sahrens * don't generate work on an otherwise idle system. 3046789Sahrens */ 3047789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 30482329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 30492329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 3050789Sahrens spa_sync_deferred_frees(spa, txg); 3051789Sahrens 3052789Sahrens /* 3053789Sahrens * Iterate to convergence. 3054789Sahrens */ 3055789Sahrens do { 3056789Sahrens spa->spa_sync_pass++; 3057789Sahrens 3058789Sahrens spa_sync_config_object(spa, tx); 30592082Seschrock spa_sync_spares(spa, tx); 30601544Seschrock spa_errlog_sync(spa, txg); 3061789Sahrens dsl_pool_sync(dp, txg); 3062789Sahrens 3063789Sahrens dirty_vdevs = 0; 3064789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 3065789Sahrens vdev_sync(vd, txg); 3066789Sahrens dirty_vdevs++; 3067789Sahrens } 3068789Sahrens 3069789Sahrens bplist_sync(bpl, tx); 3070789Sahrens } while (dirty_vdevs); 3071789Sahrens 3072789Sahrens bplist_close(bpl); 3073789Sahrens 3074789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 3075789Sahrens 3076789Sahrens /* 3077789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 3078789Sahrens * to commit the transaction group. 30791635Sbonwick * 30801635Sbonwick * If there are any dirty vdevs, sync the uberblock to all vdevs. 30811635Sbonwick * Otherwise, pick a random top-level vdev that's known to be 30821635Sbonwick * visible in the config cache (see spa_vdev_add() for details). 30831635Sbonwick * If the write fails, try the next vdev until we're tried them all. 3084789Sahrens */ 30851635Sbonwick if (!list_is_empty(&spa->spa_dirty_list)) { 30861635Sbonwick VERIFY(vdev_config_sync(rvd, txg) == 0); 30871635Sbonwick } else { 30881635Sbonwick int children = rvd->vdev_children; 30891635Sbonwick int c0 = spa_get_random(children); 30901635Sbonwick int c; 30911635Sbonwick 30921635Sbonwick for (c = 0; c < children; c++) { 30931635Sbonwick vd = rvd->vdev_child[(c0 + c) % children]; 30941635Sbonwick if (vd->vdev_ms_array == 0) 30951635Sbonwick continue; 30961635Sbonwick if (vdev_config_sync(vd, txg) == 0) 30971635Sbonwick break; 30981635Sbonwick } 30991635Sbonwick if (c == children) 31001635Sbonwick VERIFY(vdev_config_sync(rvd, txg) == 0); 31011635Sbonwick } 31021635Sbonwick 31032082Seschrock dmu_tx_commit(tx); 31042082Seschrock 31051635Sbonwick /* 31061635Sbonwick * Clear the dirty config list. 31071635Sbonwick */ 31081635Sbonwick while ((vd = list_head(&spa->spa_dirty_list)) != NULL) 31091635Sbonwick vdev_config_clean(vd); 31101635Sbonwick 31111635Sbonwick /* 31121635Sbonwick * Now that the new config has synced transactionally, 31131635Sbonwick * let it become visible to the config cache. 31141635Sbonwick */ 31151635Sbonwick if (spa->spa_config_syncing != NULL) { 31161635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 31171635Sbonwick spa->spa_config_txg = txg; 31181635Sbonwick spa->spa_config_syncing = NULL; 31191635Sbonwick } 3120789Sahrens 3121789Sahrens /* 3122789Sahrens * Make a stable copy of the fully synced uberblock. 3123789Sahrens * We use this as the root for pool traversals. 3124789Sahrens */ 3125789Sahrens spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ 3126789Sahrens 3127789Sahrens spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ 3128789Sahrens 3129789Sahrens rw_enter(&spa->spa_traverse_lock, RW_WRITER); 3130789Sahrens spa->spa_traverse_wanted = 0; 3131789Sahrens spa->spa_ubsync = spa->spa_uberblock; 3132789Sahrens rw_exit(&spa->spa_traverse_lock); 3133789Sahrens 3134789Sahrens spa_scrub_resume(spa); /* resume scrub with new ubsync */ 3135789Sahrens 3136789Sahrens /* 3137789Sahrens * Clean up the ZIL records for the synced txg. 3138789Sahrens */ 3139789Sahrens dsl_pool_zil_clean(dp); 3140789Sahrens 3141789Sahrens /* 3142789Sahrens * Update usable space statistics. 3143789Sahrens */ 3144789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 3145789Sahrens vdev_sync_done(vd, txg); 3146789Sahrens 3147789Sahrens /* 3148789Sahrens * It had better be the case that we didn't dirty anything 31492082Seschrock * since vdev_config_sync(). 3150789Sahrens */ 3151789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 3152789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 3153789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 3154789Sahrens ASSERT(bpl->bpl_queue == NULL); 3155789Sahrens 31561544Seschrock spa_config_exit(spa, FTAG); 31571544Seschrock 31581544Seschrock /* 31591544Seschrock * If any async tasks have been requested, kick them off. 31601544Seschrock */ 31611544Seschrock spa_async_dispatch(spa); 3162789Sahrens } 3163789Sahrens 3164789Sahrens /* 3165789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 3166789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 3167789Sahrens * sync. 3168789Sahrens */ 3169789Sahrens void 3170789Sahrens spa_sync_allpools(void) 3171789Sahrens { 3172789Sahrens spa_t *spa = NULL; 3173789Sahrens mutex_enter(&spa_namespace_lock); 3174789Sahrens while ((spa = spa_next(spa)) != NULL) { 3175789Sahrens if (spa_state(spa) != POOL_STATE_ACTIVE) 3176789Sahrens continue; 3177789Sahrens spa_open_ref(spa, FTAG); 3178789Sahrens mutex_exit(&spa_namespace_lock); 3179789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 3180789Sahrens mutex_enter(&spa_namespace_lock); 3181789Sahrens spa_close(spa, FTAG); 3182789Sahrens } 3183789Sahrens mutex_exit(&spa_namespace_lock); 3184789Sahrens } 3185789Sahrens 3186789Sahrens /* 3187789Sahrens * ========================================================================== 3188789Sahrens * Miscellaneous routines 3189789Sahrens * ========================================================================== 3190789Sahrens */ 3191789Sahrens 3192789Sahrens /* 3193789Sahrens * Remove all pools in the system. 3194789Sahrens */ 3195789Sahrens void 3196789Sahrens spa_evict_all(void) 3197789Sahrens { 3198789Sahrens spa_t *spa; 3199789Sahrens 3200789Sahrens /* 3201789Sahrens * Remove all cached state. All pools should be closed now, 3202789Sahrens * so every spa in the AVL tree should be unreferenced. 3203789Sahrens */ 3204789Sahrens mutex_enter(&spa_namespace_lock); 3205789Sahrens while ((spa = spa_next(NULL)) != NULL) { 3206789Sahrens /* 32071544Seschrock * Stop async tasks. The async thread may need to detach 32081544Seschrock * a device that's been replaced, which requires grabbing 32091544Seschrock * spa_namespace_lock, so we must drop it here. 3210789Sahrens */ 3211789Sahrens spa_open_ref(spa, FTAG); 3212789Sahrens mutex_exit(&spa_namespace_lock); 32131544Seschrock spa_async_suspend(spa); 3214789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 3215789Sahrens mutex_enter(&spa_namespace_lock); 3216789Sahrens spa_close(spa, FTAG); 3217789Sahrens 3218789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 3219789Sahrens spa_unload(spa); 3220789Sahrens spa_deactivate(spa); 3221789Sahrens } 3222789Sahrens spa_remove(spa); 3223789Sahrens } 3224789Sahrens mutex_exit(&spa_namespace_lock); 3225789Sahrens } 32261544Seschrock 32271544Seschrock vdev_t * 32281544Seschrock spa_lookup_by_guid(spa_t *spa, uint64_t guid) 32291544Seschrock { 32301544Seschrock return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); 32311544Seschrock } 32321760Seschrock 32331760Seschrock void 32341760Seschrock spa_upgrade(spa_t *spa) 32351760Seschrock { 32361760Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 32371760Seschrock 32381760Seschrock /* 32391760Seschrock * This should only be called for a non-faulted pool, and since a 32401760Seschrock * future version would result in an unopenable pool, this shouldn't be 32411760Seschrock * possible. 32421760Seschrock */ 32431760Seschrock ASSERT(spa->spa_uberblock.ub_version <= ZFS_VERSION); 32441760Seschrock 32451760Seschrock spa->spa_uberblock.ub_version = ZFS_VERSION; 32461760Seschrock vdev_config_dirty(spa->spa_root_vdev); 32471760Seschrock 32481760Seschrock spa_config_exit(spa, FTAG); 32492082Seschrock 32502082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 32511760Seschrock } 32522082Seschrock 32532082Seschrock boolean_t 32542082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 32552082Seschrock { 32562082Seschrock int i; 32573377Seschrock uint64_t spareguid; 32582082Seschrock 32592082Seschrock for (i = 0; i < spa->spa_nspares; i++) 32602082Seschrock if (spa->spa_spares[i]->vdev_guid == guid) 32612082Seschrock return (B_TRUE); 32622082Seschrock 32633377Seschrock for (i = 0; i < spa->spa_pending_nspares; i++) { 32643377Seschrock if (nvlist_lookup_uint64(spa->spa_pending_spares[i], 32653377Seschrock ZPOOL_CONFIG_GUID, &spareguid) == 0 && 32663377Seschrock spareguid == guid) 32673377Seschrock return (B_TRUE); 32683377Seschrock } 32693377Seschrock 32702082Seschrock return (B_FALSE); 32712082Seschrock } 32723912Slling 32733912Slling int 32743912Slling spa_set_props(spa_t *spa, nvlist_t *nvp) 32753912Slling { 32763912Slling return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 32773912Slling spa, nvp, 3)); 32783912Slling } 32793912Slling 32803912Slling int 32813912Slling spa_get_props(spa_t *spa, nvlist_t **nvp) 32823912Slling { 32833912Slling zap_cursor_t zc; 32843912Slling zap_attribute_t za; 32853912Slling objset_t *mos = spa->spa_meta_objset; 32863912Slling zfs_source_t src; 3287*4451Seschrock zpool_prop_t prop; 32883912Slling nvlist_t *propval; 32893912Slling uint64_t value; 32903912Slling int err; 32913912Slling 32923912Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 32933912Slling 32943912Slling mutex_enter(&spa->spa_props_lock); 32953912Slling /* If no props object, then just return empty nvlist */ 32963912Slling if (spa->spa_pool_props_object == 0) { 32973912Slling mutex_exit(&spa->spa_props_lock); 32983912Slling return (0); 32993912Slling } 33003912Slling 33013912Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 33023912Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 33033912Slling zap_cursor_advance(&zc)) { 33043912Slling 33053912Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZFS_PROP_INVAL) 33063912Slling continue; 33073912Slling 33083912Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 33093912Slling switch (za.za_integer_length) { 33103912Slling case 8: 3311*4451Seschrock if (zpool_prop_default_numeric(prop) == 33123912Slling za.za_first_integer) 33133912Slling src = ZFS_SRC_DEFAULT; 33143912Slling else 33153912Slling src = ZFS_SRC_LOCAL; 33163912Slling value = za.za_first_integer; 33173912Slling 3318*4451Seschrock if (prop == ZPOOL_PROP_BOOTFS) { 33193912Slling dsl_pool_t *dp; 33203912Slling dsl_dataset_t *ds = NULL; 33213912Slling char strval[MAXPATHLEN]; 33223912Slling 33233912Slling dp = spa_get_dsl(spa); 33243912Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 33253912Slling if ((err = dsl_dataset_open_obj(dp, 33263912Slling za.za_first_integer, NULL, DS_MODE_NONE, 33273912Slling FTAG, &ds)) != 0) { 33283912Slling rw_exit(&dp->dp_config_rwlock); 33293912Slling break; 33303912Slling } 33313912Slling dsl_dataset_name(ds, strval); 33323912Slling dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 33333912Slling rw_exit(&dp->dp_config_rwlock); 33343912Slling 33353912Slling VERIFY(nvlist_add_uint64(propval, 33363912Slling ZFS_PROP_SOURCE, src) == 0); 33373912Slling VERIFY(nvlist_add_string(propval, 33383912Slling ZFS_PROP_VALUE, strval) == 0); 33393912Slling } else { 33403912Slling VERIFY(nvlist_add_uint64(propval, 33413912Slling ZFS_PROP_SOURCE, src) == 0); 33423912Slling VERIFY(nvlist_add_uint64(propval, 33433912Slling ZFS_PROP_VALUE, value) == 0); 33443912Slling } 33453912Slling VERIFY(nvlist_add_nvlist(*nvp, za.za_name, 33463912Slling propval) == 0); 33473912Slling break; 33483912Slling } 33493912Slling nvlist_free(propval); 33503912Slling } 33513912Slling zap_cursor_fini(&zc); 33523912Slling mutex_exit(&spa->spa_props_lock); 33533912Slling if (err && err != ENOENT) { 33543912Slling nvlist_free(*nvp); 33553912Slling return (err); 33563912Slling } 33573912Slling 33583912Slling return (0); 33593912Slling } 33603912Slling 33613912Slling /* 33623912Slling * If the bootfs property value is dsobj, clear it. 33633912Slling */ 33643912Slling void 33653912Slling spa_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 33663912Slling { 33673912Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 33683912Slling VERIFY(zap_remove(spa->spa_meta_objset, 33693912Slling spa->spa_pool_props_object, 3370*4451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 33713912Slling spa->spa_bootfs = 0; 33723912Slling } 33733912Slling } 3374*4451Seschrock 3375*4451Seschrock /* 3376*4451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 3377*4451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 3378*4451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 3379*4451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 3380*4451Seschrock * or zdb as real changes. 3381*4451Seschrock */ 3382*4451Seschrock void 3383*4451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 3384*4451Seschrock { 3385*4451Seschrock #ifdef _KERNEL 3386*4451Seschrock sysevent_t *ev; 3387*4451Seschrock sysevent_attr_list_t *attr = NULL; 3388*4451Seschrock sysevent_value_t value; 3389*4451Seschrock sysevent_id_t eid; 3390*4451Seschrock 3391*4451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 3392*4451Seschrock SE_SLEEP); 3393*4451Seschrock 3394*4451Seschrock value.value_type = SE_DATA_TYPE_STRING; 3395*4451Seschrock value.value.sv_string = spa_name(spa); 3396*4451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 3397*4451Seschrock goto done; 3398*4451Seschrock 3399*4451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 3400*4451Seschrock value.value.sv_uint64 = spa_guid(spa); 3401*4451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 3402*4451Seschrock goto done; 3403*4451Seschrock 3404*4451Seschrock if (vd) { 3405*4451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 3406*4451Seschrock value.value.sv_uint64 = vd->vdev_guid; 3407*4451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 3408*4451Seschrock SE_SLEEP) != 0) 3409*4451Seschrock goto done; 3410*4451Seschrock 3411*4451Seschrock if (vd->vdev_path) { 3412*4451Seschrock value.value_type = SE_DATA_TYPE_STRING; 3413*4451Seschrock value.value.sv_string = vd->vdev_path; 3414*4451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 3415*4451Seschrock &value, SE_SLEEP) != 0) 3416*4451Seschrock goto done; 3417*4451Seschrock } 3418*4451Seschrock } 3419*4451Seschrock 3420*4451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 3421*4451Seschrock 3422*4451Seschrock done: 3423*4451Seschrock if (attr) 3424*4451Seschrock sysevent_free_attr(attr); 3425*4451Seschrock sysevent_free(ev); 3426*4451Seschrock #endif 3427*4451Seschrock } 3428