1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 235756Seschrock * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28789Sahrens 29789Sahrens /* 30789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 31789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 32789Sahrens * pool. 33789Sahrens */ 34789Sahrens 35789Sahrens #include <sys/zfs_context.h> 361544Seschrock #include <sys/fm/fs/zfs.h> 37789Sahrens #include <sys/spa_impl.h> 38789Sahrens #include <sys/zio.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zio_compress.h> 41789Sahrens #include <sys/dmu.h> 42789Sahrens #include <sys/dmu_tx.h> 43789Sahrens #include <sys/zap.h> 44789Sahrens #include <sys/zil.h> 45789Sahrens #include <sys/vdev_impl.h> 46789Sahrens #include <sys/metaslab.h> 47789Sahrens #include <sys/uberblock_impl.h> 48789Sahrens #include <sys/txg.h> 49789Sahrens #include <sys/avl.h> 50789Sahrens #include <sys/dmu_traverse.h> 513912Slling #include <sys/dmu_objset.h> 52789Sahrens #include <sys/unique.h> 53789Sahrens #include <sys/dsl_pool.h> 543912Slling #include <sys/dsl_dataset.h> 55789Sahrens #include <sys/dsl_dir.h> 56789Sahrens #include <sys/dsl_prop.h> 573912Slling #include <sys/dsl_synctask.h> 58789Sahrens #include <sys/fs/zfs.h> 595450Sbrendan #include <sys/arc.h> 60789Sahrens #include <sys/callb.h> 613975Sek110237 #include <sys/systeminfo.h> 623975Sek110237 #include <sys/sunddi.h> 63789Sahrens 645094Slling #include "zfs_prop.h" 655913Sperrin #include "zfs_comutil.h" 665094Slling 672986Sek110237 int zio_taskq_threads = 8; 682986Sek110237 695094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 705094Slling 715094Slling /* 725094Slling * ========================================================================== 735094Slling * SPA properties routines 745094Slling * ========================================================================== 755094Slling */ 765094Slling 775094Slling /* 785094Slling * Add a (source=src, propname=propval) list to an nvlist. 795094Slling */ 80*5949Slling static void 815094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 825094Slling uint64_t intval, zprop_source_t src) 835094Slling { 845094Slling const char *propname = zpool_prop_to_name(prop); 855094Slling nvlist_t *propval; 86*5949Slling 87*5949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 88*5949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 89*5949Slling 90*5949Slling if (strval != NULL) 91*5949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 92*5949Slling else 93*5949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 94*5949Slling 95*5949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 965094Slling nvlist_free(propval); 975094Slling } 985094Slling 995094Slling /* 1005094Slling * Get property values from the spa configuration. 1015094Slling */ 102*5949Slling static void 1035094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1045094Slling { 1055094Slling uint64_t size = spa_get_space(spa); 1065094Slling uint64_t used = spa_get_alloc(spa); 1075094Slling uint64_t cap, version; 1085094Slling zprop_source_t src = ZPROP_SRC_NONE; 1095363Seschrock char *cachefile; 1105363Seschrock size_t len; 1115094Slling 1125094Slling /* 1135094Slling * readonly properties 1145094Slling */ 115*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa->spa_name, 0, src); 116*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 117*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 118*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, size - used, src); 1195094Slling 1205094Slling cap = (size == 0) ? 0 : (used * 100 / size); 121*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 122*5949Slling 123*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 124*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 125*5949Slling spa->spa_root_vdev->vdev_state, src); 1265094Slling 1275094Slling /* 1285094Slling * settable properties that are not stored in the pool property object. 1295094Slling */ 1305094Slling version = spa_version(spa); 1315094Slling if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1325094Slling src = ZPROP_SRC_DEFAULT; 1335094Slling else 1345094Slling src = ZPROP_SRC_LOCAL; 135*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 136*5949Slling 137*5949Slling if (spa->spa_root != NULL) 138*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 139*5949Slling 0, ZPROP_SRC_LOCAL); 1405094Slling 1415363Seschrock if (spa->spa_config_dir != NULL) { 1425363Seschrock if (strcmp(spa->spa_config_dir, "none") == 0) { 143*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1445363Seschrock spa->spa_config_dir, 0, ZPROP_SRC_LOCAL); 1455363Seschrock } else { 1465363Seschrock len = strlen(spa->spa_config_dir) + 1475363Seschrock strlen(spa->spa_config_file) + 2; 1485363Seschrock cachefile = kmem_alloc(len, KM_SLEEP); 1495363Seschrock (void) snprintf(cachefile, len, "%s/%s", 1505363Seschrock spa->spa_config_dir, spa->spa_config_file); 151*5949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1525363Seschrock cachefile, 0, ZPROP_SRC_LOCAL); 1535363Seschrock kmem_free(cachefile, len); 1545363Seschrock } 1555363Seschrock } 1565094Slling } 1575094Slling 1585094Slling /* 1595094Slling * Get zpool property values. 1605094Slling */ 1615094Slling int 1625094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 1635094Slling { 1645094Slling zap_cursor_t zc; 1655094Slling zap_attribute_t za; 1665094Slling objset_t *mos = spa->spa_meta_objset; 1675094Slling int err; 1685094Slling 169*5949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1705094Slling 1715094Slling /* 1725094Slling * Get properties from the spa config. 1735094Slling */ 174*5949Slling spa_prop_get_config(spa, nvp); 1755094Slling 1765094Slling mutex_enter(&spa->spa_props_lock); 1775094Slling /* If no pool property object, no more prop to get. */ 1785094Slling if (spa->spa_pool_props_object == 0) { 1795094Slling mutex_exit(&spa->spa_props_lock); 1805094Slling return (0); 1815094Slling } 1825094Slling 1835094Slling /* 1845094Slling * Get properties from the MOS pool property object. 1855094Slling */ 1865094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 1875094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 1885094Slling zap_cursor_advance(&zc)) { 1895094Slling uint64_t intval = 0; 1905094Slling char *strval = NULL; 1915094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 1925094Slling zpool_prop_t prop; 1935094Slling 1945094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 1955094Slling continue; 1965094Slling 1975094Slling switch (za.za_integer_length) { 1985094Slling case 8: 1995094Slling /* integer property */ 2005094Slling if (za.za_first_integer != 2015094Slling zpool_prop_default_numeric(prop)) 2025094Slling src = ZPROP_SRC_LOCAL; 2035094Slling 2045094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2055094Slling dsl_pool_t *dp; 2065094Slling dsl_dataset_t *ds = NULL; 2075094Slling 2085094Slling dp = spa_get_dsl(spa); 2095094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2105094Slling if (err = dsl_dataset_open_obj(dp, 2115094Slling za.za_first_integer, NULL, DS_MODE_NONE, 2125094Slling FTAG, &ds)) { 2135094Slling rw_exit(&dp->dp_config_rwlock); 2145094Slling break; 2155094Slling } 2165094Slling 2175094Slling strval = kmem_alloc( 2185094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2195094Slling KM_SLEEP); 2205094Slling dsl_dataset_name(ds, strval); 2215094Slling dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2225094Slling rw_exit(&dp->dp_config_rwlock); 2235094Slling } else { 2245094Slling strval = NULL; 2255094Slling intval = za.za_first_integer; 2265094Slling } 2275094Slling 228*5949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2295094Slling 2305094Slling if (strval != NULL) 2315094Slling kmem_free(strval, 2325094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2335094Slling 2345094Slling break; 2355094Slling 2365094Slling case 1: 2375094Slling /* string property */ 2385094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2395094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2405094Slling za.za_name, 1, za.za_num_integers, strval); 2415094Slling if (err) { 2425094Slling kmem_free(strval, za.za_num_integers); 2435094Slling break; 2445094Slling } 245*5949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2465094Slling kmem_free(strval, za.za_num_integers); 2475094Slling break; 2485094Slling 2495094Slling default: 2505094Slling break; 2515094Slling } 2525094Slling } 2535094Slling zap_cursor_fini(&zc); 2545094Slling mutex_exit(&spa->spa_props_lock); 2555094Slling out: 2565094Slling if (err && err != ENOENT) { 2575094Slling nvlist_free(*nvp); 258*5949Slling *nvp = NULL; 2595094Slling return (err); 2605094Slling } 2615094Slling 2625094Slling return (0); 2635094Slling } 2645094Slling 2655094Slling /* 2665094Slling * Validate the given pool properties nvlist and modify the list 2675094Slling * for the property values to be set. 2685094Slling */ 2695094Slling static int 2705094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 2715094Slling { 2725094Slling nvpair_t *elem; 2735094Slling int error = 0, reset_bootfs = 0; 2745094Slling uint64_t objnum; 2755094Slling 2765094Slling elem = NULL; 2775094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 2785094Slling zpool_prop_t prop; 2795094Slling char *propname, *strval; 2805094Slling uint64_t intval; 2815094Slling vdev_t *rvdev; 2825094Slling char *vdev_type; 2835094Slling objset_t *os; 2845363Seschrock char *slash; 2855094Slling 2865094Slling propname = nvpair_name(elem); 2875094Slling 2885094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 2895094Slling return (EINVAL); 2905094Slling 2915094Slling switch (prop) { 2925094Slling case ZPOOL_PROP_VERSION: 2935094Slling error = nvpair_value_uint64(elem, &intval); 2945094Slling if (!error && 2955094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 2965094Slling error = EINVAL; 2975094Slling break; 2985094Slling 2995094Slling case ZPOOL_PROP_DELEGATION: 3005094Slling case ZPOOL_PROP_AUTOREPLACE: 3015094Slling error = nvpair_value_uint64(elem, &intval); 3025094Slling if (!error && intval > 1) 3035094Slling error = EINVAL; 3045094Slling break; 3055094Slling 3065094Slling case ZPOOL_PROP_BOOTFS: 3075094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3085094Slling error = ENOTSUP; 3095094Slling break; 3105094Slling } 3115094Slling 3125094Slling /* 3135094Slling * A bootable filesystem can not be on a RAIDZ pool 3145094Slling * nor a striped pool with more than 1 device. 3155094Slling */ 3165094Slling rvdev = spa->spa_root_vdev; 3175094Slling vdev_type = 3185094Slling rvdev->vdev_child[0]->vdev_ops->vdev_op_type; 3195094Slling if (rvdev->vdev_children > 1 || 3205094Slling strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || 3215094Slling strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { 3225094Slling error = ENOTSUP; 3235094Slling break; 3245094Slling } 3255094Slling 3265094Slling reset_bootfs = 1; 3275094Slling 3285094Slling error = nvpair_value_string(elem, &strval); 3295094Slling 3305094Slling if (!error) { 3315094Slling if (strval == NULL || strval[0] == '\0') { 3325094Slling objnum = zpool_prop_default_numeric( 3335094Slling ZPOOL_PROP_BOOTFS); 3345094Slling break; 3355094Slling } 3365094Slling 3375094Slling if (error = dmu_objset_open(strval, DMU_OST_ZFS, 3385094Slling DS_MODE_STANDARD | DS_MODE_READONLY, &os)) 3395094Slling break; 3405094Slling objnum = dmu_objset_id(os); 3415094Slling dmu_objset_close(os); 3425094Slling } 3435094Slling break; 3445329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3455329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3465329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3475329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3485329Sgw25295 error = EINVAL; 3495329Sgw25295 3505329Sgw25295 /* 3515329Sgw25295 * This is a special case which only occurs when 3525329Sgw25295 * the pool has completely failed. This allows 3535329Sgw25295 * the user to change the in-core failmode property 3545329Sgw25295 * without syncing it out to disk (I/Os might 3555329Sgw25295 * currently be blocked). We do this by returning 3565329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 3575329Sgw25295 * into thinking we encountered a property validation 3585329Sgw25295 * error. 3595329Sgw25295 */ 3605329Sgw25295 if (!error && spa_state(spa) == POOL_STATE_IO_FAILURE) { 3615329Sgw25295 spa->spa_failmode = intval; 3625329Sgw25295 error = EIO; 3635329Sgw25295 } 3645329Sgw25295 break; 3655363Seschrock 3665363Seschrock case ZPOOL_PROP_CACHEFILE: 3675363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 3685363Seschrock break; 3695363Seschrock 3705363Seschrock if (strval[0] == '\0') 3715363Seschrock break; 3725363Seschrock 3735363Seschrock if (strcmp(strval, "none") == 0) 3745363Seschrock break; 3755363Seschrock 3765363Seschrock if (strval[0] != '/') { 3775363Seschrock error = EINVAL; 3785363Seschrock break; 3795363Seschrock } 3805363Seschrock 3815363Seschrock slash = strrchr(strval, '/'); 3825363Seschrock ASSERT(slash != NULL); 3835363Seschrock 3845363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 3855363Seschrock strcmp(slash, "/..") == 0) 3865363Seschrock error = EINVAL; 3875363Seschrock break; 3885094Slling } 3895094Slling 3905094Slling if (error) 3915094Slling break; 3925094Slling } 3935094Slling 3945094Slling if (!error && reset_bootfs) { 3955094Slling error = nvlist_remove(props, 3965094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 3975094Slling 3985094Slling if (!error) { 3995094Slling error = nvlist_add_uint64(props, 4005094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4015094Slling } 4025094Slling } 4035094Slling 4045094Slling return (error); 4055094Slling } 4065094Slling 4075094Slling int 4085094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4095094Slling { 4105094Slling int error; 4115094Slling 4125094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4135094Slling return (error); 4145094Slling 4155094Slling return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 4165094Slling spa, nvp, 3)); 4175094Slling } 4185094Slling 4195094Slling /* 4205094Slling * If the bootfs property value is dsobj, clear it. 4215094Slling */ 4225094Slling void 4235094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 4245094Slling { 4255094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 4265094Slling VERIFY(zap_remove(spa->spa_meta_objset, 4275094Slling spa->spa_pool_props_object, 4285094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 4295094Slling spa->spa_bootfs = 0; 4305094Slling } 4315094Slling } 4325094Slling 433789Sahrens /* 434789Sahrens * ========================================================================== 435789Sahrens * SPA state manipulation (open/create/destroy/import/export) 436789Sahrens * ========================================================================== 437789Sahrens */ 438789Sahrens 4391544Seschrock static int 4401544Seschrock spa_error_entry_compare(const void *a, const void *b) 4411544Seschrock { 4421544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 4431544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 4441544Seschrock int ret; 4451544Seschrock 4461544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 4471544Seschrock sizeof (zbookmark_t)); 4481544Seschrock 4491544Seschrock if (ret < 0) 4501544Seschrock return (-1); 4511544Seschrock else if (ret > 0) 4521544Seschrock return (1); 4531544Seschrock else 4541544Seschrock return (0); 4551544Seschrock } 4561544Seschrock 4571544Seschrock /* 4581544Seschrock * Utility function which retrieves copies of the current logs and 4591544Seschrock * re-initializes them in the process. 4601544Seschrock */ 4611544Seschrock void 4621544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 4631544Seschrock { 4641544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 4651544Seschrock 4661544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 4671544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 4681544Seschrock 4691544Seschrock avl_create(&spa->spa_errlist_scrub, 4701544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 4711544Seschrock offsetof(spa_error_entry_t, se_avl)); 4721544Seschrock avl_create(&spa->spa_errlist_last, 4731544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 4741544Seschrock offsetof(spa_error_entry_t, se_avl)); 4751544Seschrock } 4761544Seschrock 477789Sahrens /* 478789Sahrens * Activate an uninitialized pool. 479789Sahrens */ 480789Sahrens static void 481789Sahrens spa_activate(spa_t *spa) 482789Sahrens { 483789Sahrens int t; 484789Sahrens 485789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 486789Sahrens 487789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 488789Sahrens 489789Sahrens spa->spa_normal_class = metaslab_class_create(); 4904527Sperrin spa->spa_log_class = metaslab_class_create(); 491789Sahrens 492789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 493789Sahrens spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", 4942986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 495789Sahrens TASKQ_PREPOPULATE); 496789Sahrens spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", 4972986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 498789Sahrens TASKQ_PREPOPULATE); 499789Sahrens } 500789Sahrens 501789Sahrens list_create(&spa->spa_dirty_list, sizeof (vdev_t), 502789Sahrens offsetof(vdev_t, vdev_dirty_node)); 5035329Sgw25295 list_create(&spa->spa_zio_list, sizeof (zio_t), 5045329Sgw25295 offsetof(zio_t, zio_link_node)); 505789Sahrens 506789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 507789Sahrens offsetof(struct vdev, vdev_txg_node)); 5081544Seschrock 5091544Seschrock avl_create(&spa->spa_errlist_scrub, 5101544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5111544Seschrock offsetof(spa_error_entry_t, se_avl)); 5121544Seschrock avl_create(&spa->spa_errlist_last, 5131544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5141544Seschrock offsetof(spa_error_entry_t, se_avl)); 515789Sahrens } 516789Sahrens 517789Sahrens /* 518789Sahrens * Opposite of spa_activate(). 519789Sahrens */ 520789Sahrens static void 521789Sahrens spa_deactivate(spa_t *spa) 522789Sahrens { 523789Sahrens int t; 524789Sahrens 525789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 526789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 527789Sahrens ASSERT(spa->spa_root_vdev == NULL); 528789Sahrens 529789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 530789Sahrens 531789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 532789Sahrens 533789Sahrens list_destroy(&spa->spa_dirty_list); 5345329Sgw25295 list_destroy(&spa->spa_zio_list); 535789Sahrens 536789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 537789Sahrens taskq_destroy(spa->spa_zio_issue_taskq[t]); 538789Sahrens taskq_destroy(spa->spa_zio_intr_taskq[t]); 539789Sahrens spa->spa_zio_issue_taskq[t] = NULL; 540789Sahrens spa->spa_zio_intr_taskq[t] = NULL; 541789Sahrens } 542789Sahrens 543789Sahrens metaslab_class_destroy(spa->spa_normal_class); 544789Sahrens spa->spa_normal_class = NULL; 545789Sahrens 5464527Sperrin metaslab_class_destroy(spa->spa_log_class); 5474527Sperrin spa->spa_log_class = NULL; 5484527Sperrin 5491544Seschrock /* 5501544Seschrock * If this was part of an import or the open otherwise failed, we may 5511544Seschrock * still have errors left in the queues. Empty them just in case. 5521544Seschrock */ 5531544Seschrock spa_errlog_drain(spa); 5541544Seschrock 5551544Seschrock avl_destroy(&spa->spa_errlist_scrub); 5561544Seschrock avl_destroy(&spa->spa_errlist_last); 5571544Seschrock 558789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 559789Sahrens } 560789Sahrens 561789Sahrens /* 562789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 563789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 564789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 565789Sahrens * All vdev validation is done by the vdev_alloc() routine. 566789Sahrens */ 5672082Seschrock static int 5682082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 5692082Seschrock uint_t id, int atype) 570789Sahrens { 571789Sahrens nvlist_t **child; 572789Sahrens uint_t c, children; 5732082Seschrock int error; 5742082Seschrock 5752082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 5762082Seschrock return (error); 5772082Seschrock 5782082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 5792082Seschrock return (0); 580789Sahrens 581789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 582789Sahrens &child, &children) != 0) { 5832082Seschrock vdev_free(*vdp); 5842082Seschrock *vdp = NULL; 5852082Seschrock return (EINVAL); 586789Sahrens } 587789Sahrens 588789Sahrens for (c = 0; c < children; c++) { 5892082Seschrock vdev_t *vd; 5902082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 5912082Seschrock atype)) != 0) { 5922082Seschrock vdev_free(*vdp); 5932082Seschrock *vdp = NULL; 5942082Seschrock return (error); 595789Sahrens } 596789Sahrens } 597789Sahrens 5982082Seschrock ASSERT(*vdp != NULL); 5992082Seschrock 6002082Seschrock return (0); 601789Sahrens } 602789Sahrens 603789Sahrens /* 604789Sahrens * Opposite of spa_load(). 605789Sahrens */ 606789Sahrens static void 607789Sahrens spa_unload(spa_t *spa) 608789Sahrens { 6092082Seschrock int i; 6102082Seschrock 611789Sahrens /* 6121544Seschrock * Stop async tasks. 6131544Seschrock */ 6141544Seschrock spa_async_suspend(spa); 6151544Seschrock 6161544Seschrock /* 617789Sahrens * Stop syncing. 618789Sahrens */ 619789Sahrens if (spa->spa_sync_on) { 620789Sahrens txg_sync_stop(spa->spa_dsl_pool); 621789Sahrens spa->spa_sync_on = B_FALSE; 622789Sahrens } 623789Sahrens 624789Sahrens /* 625789Sahrens * Wait for any outstanding prefetch I/O to complete. 626789Sahrens */ 6271544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 6281544Seschrock spa_config_exit(spa, FTAG); 629789Sahrens 630789Sahrens /* 6315450Sbrendan * Drop and purge level 2 cache 6325450Sbrendan */ 6335450Sbrendan spa_l2cache_drop(spa); 6345450Sbrendan 6355450Sbrendan /* 636789Sahrens * Close the dsl pool. 637789Sahrens */ 638789Sahrens if (spa->spa_dsl_pool) { 639789Sahrens dsl_pool_close(spa->spa_dsl_pool); 640789Sahrens spa->spa_dsl_pool = NULL; 641789Sahrens } 642789Sahrens 643789Sahrens /* 644789Sahrens * Close all vdevs. 645789Sahrens */ 6461585Sbonwick if (spa->spa_root_vdev) 647789Sahrens vdev_free(spa->spa_root_vdev); 6481585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 6491544Seschrock 6505450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 6515450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 6525450Sbrendan if (spa->spa_spares.sav_vdevs) { 6535450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 6545450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 6555450Sbrendan spa->spa_spares.sav_vdevs = NULL; 6565450Sbrendan } 6575450Sbrendan if (spa->spa_spares.sav_config) { 6585450Sbrendan nvlist_free(spa->spa_spares.sav_config); 6595450Sbrendan spa->spa_spares.sav_config = NULL; 6602082Seschrock } 6615450Sbrendan 6625450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 6635450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 6645450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 6655450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 6665450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 6675450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 6685450Sbrendan } 6695450Sbrendan if (spa->spa_l2cache.sav_config) { 6705450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 6715450Sbrendan spa->spa_l2cache.sav_config = NULL; 6722082Seschrock } 6732082Seschrock 6741544Seschrock spa->spa_async_suspended = 0; 675789Sahrens } 676789Sahrens 677789Sahrens /* 6782082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 6792082Seschrock * this pool. When this is called, we have some form of basic information in 6805450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 6815450Sbrendan * then re-generate a more complete list including status information. 6822082Seschrock */ 6832082Seschrock static void 6842082Seschrock spa_load_spares(spa_t *spa) 6852082Seschrock { 6862082Seschrock nvlist_t **spares; 6872082Seschrock uint_t nspares; 6882082Seschrock int i; 6893377Seschrock vdev_t *vd, *tvd; 6902082Seschrock 6912082Seschrock /* 6922082Seschrock * First, close and free any existing spare vdevs. 6932082Seschrock */ 6945450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 6955450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 6963377Seschrock 6973377Seschrock /* Undo the call to spa_activate() below */ 6983377Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && 6993377Seschrock tvd->vdev_isspare) 7003377Seschrock spa_spare_remove(tvd); 7013377Seschrock vdev_close(vd); 7023377Seschrock vdev_free(vd); 7032082Seschrock } 7043377Seschrock 7055450Sbrendan if (spa->spa_spares.sav_vdevs) 7065450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7075450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7085450Sbrendan 7095450Sbrendan if (spa->spa_spares.sav_config == NULL) 7102082Seschrock nspares = 0; 7112082Seschrock else 7125450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 7132082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 7142082Seschrock 7155450Sbrendan spa->spa_spares.sav_count = (int)nspares; 7165450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7172082Seschrock 7182082Seschrock if (nspares == 0) 7192082Seschrock return; 7202082Seschrock 7212082Seschrock /* 7222082Seschrock * Construct the array of vdevs, opening them to get status in the 7233377Seschrock * process. For each spare, there is potentially two different vdev_t 7243377Seschrock * structures associated with it: one in the list of spares (used only 7253377Seschrock * for basic validation purposes) and one in the active vdev 7263377Seschrock * configuration (if it's spared in). During this phase we open and 7273377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 7283377Seschrock * active configuration, then we also mark this vdev as an active spare. 7292082Seschrock */ 7305450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 7315450Sbrendan KM_SLEEP); 7325450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 7332082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 7342082Seschrock VDEV_ALLOC_SPARE) == 0); 7352082Seschrock ASSERT(vd != NULL); 7362082Seschrock 7375450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 7382082Seschrock 7393377Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { 7403377Seschrock if (!tvd->vdev_isspare) 7413377Seschrock spa_spare_add(tvd); 7423377Seschrock 7433377Seschrock /* 7443377Seschrock * We only mark the spare active if we were successfully 7453377Seschrock * able to load the vdev. Otherwise, importing a pool 7463377Seschrock * with a bad active spare would result in strange 7473377Seschrock * behavior, because multiple pool would think the spare 7483377Seschrock * is actively in use. 7493377Seschrock * 7503377Seschrock * There is a vulnerability here to an equally bizarre 7513377Seschrock * circumstance, where a dead active spare is later 7523377Seschrock * brought back to life (onlined or otherwise). Given 7533377Seschrock * the rarity of this scenario, and the extra complexity 7543377Seschrock * it adds, we ignore the possibility. 7553377Seschrock */ 7563377Seschrock if (!vdev_is_dead(tvd)) 7573377Seschrock spa_spare_activate(tvd); 7583377Seschrock } 7593377Seschrock 7602082Seschrock if (vdev_open(vd) != 0) 7612082Seschrock continue; 7622082Seschrock 7632082Seschrock vd->vdev_top = vd; 7645450Sbrendan if (vdev_validate_aux(vd) == 0) 7655450Sbrendan spa_spare_add(vd); 7662082Seschrock } 7672082Seschrock 7682082Seschrock /* 7692082Seschrock * Recompute the stashed list of spares, with status information 7702082Seschrock * this time. 7712082Seschrock */ 7725450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 7732082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 7742082Seschrock 7755450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 7765450Sbrendan KM_SLEEP); 7775450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7785450Sbrendan spares[i] = vdev_config_generate(spa, 7795450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 7805450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 7815450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 7825450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7832082Seschrock nvlist_free(spares[i]); 7845450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 7855450Sbrendan } 7865450Sbrendan 7875450Sbrendan /* 7885450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 7895450Sbrendan * this pool. When this is called, we have some form of basic information in 7905450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 7915450Sbrendan * then re-generate a more complete list including status information. 7925450Sbrendan * Devices which are already active have their details maintained, and are 7935450Sbrendan * not re-opened. 7945450Sbrendan */ 7955450Sbrendan static void 7965450Sbrendan spa_load_l2cache(spa_t *spa) 7975450Sbrendan { 7985450Sbrendan nvlist_t **l2cache; 7995450Sbrendan uint_t nl2cache; 8005450Sbrendan int i, j, oldnvdevs; 8015450Sbrendan uint64_t guid; 8025450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 8035450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 8045450Sbrendan 8055450Sbrendan if (sav->sav_config != NULL) { 8065450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 8075450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 8085450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 8095450Sbrendan } else { 8105450Sbrendan nl2cache = 0; 8115450Sbrendan } 8125450Sbrendan 8135450Sbrendan oldvdevs = sav->sav_vdevs; 8145450Sbrendan oldnvdevs = sav->sav_count; 8155450Sbrendan sav->sav_vdevs = NULL; 8165450Sbrendan sav->sav_count = 0; 8175450Sbrendan 8185450Sbrendan /* 8195450Sbrendan * Process new nvlist of vdevs. 8205450Sbrendan */ 8215450Sbrendan for (i = 0; i < nl2cache; i++) { 8225450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 8235450Sbrendan &guid) == 0); 8245450Sbrendan 8255450Sbrendan newvdevs[i] = NULL; 8265450Sbrendan for (j = 0; j < oldnvdevs; j++) { 8275450Sbrendan vd = oldvdevs[j]; 8285450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 8295450Sbrendan /* 8305450Sbrendan * Retain previous vdev for add/remove ops. 8315450Sbrendan */ 8325450Sbrendan newvdevs[i] = vd; 8335450Sbrendan oldvdevs[j] = NULL; 8345450Sbrendan break; 8355450Sbrendan } 8365450Sbrendan } 8375450Sbrendan 8385450Sbrendan if (newvdevs[i] == NULL) { 8395450Sbrendan /* 8405450Sbrendan * Create new vdev 8415450Sbrendan */ 8425450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 8435450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 8445450Sbrendan ASSERT(vd != NULL); 8455450Sbrendan newvdevs[i] = vd; 8465450Sbrendan 8475450Sbrendan /* 8485450Sbrendan * Commit this vdev as an l2cache device, 8495450Sbrendan * even if it fails to open. 8505450Sbrendan */ 8515450Sbrendan spa_l2cache_add(vd); 8525450Sbrendan 8535450Sbrendan if (vdev_open(vd) != 0) 8545450Sbrendan continue; 8555450Sbrendan 8565450Sbrendan vd->vdev_top = vd; 8575450Sbrendan (void) vdev_validate_aux(vd); 8585450Sbrendan 8595450Sbrendan if (!vdev_is_dead(vd)) { 8605450Sbrendan uint64_t size; 8615450Sbrendan size = vdev_get_rsize(vd); 8625450Sbrendan ASSERT3U(size, >, 0); 8635450Sbrendan if (spa_mode & FWRITE) { 8645450Sbrendan l2arc_add_vdev(spa, vd, 8655450Sbrendan VDEV_LABEL_START_SIZE, 8665450Sbrendan size - VDEV_LABEL_START_SIZE); 8675450Sbrendan } 8685450Sbrendan spa_l2cache_activate(vd); 8695450Sbrendan } 8705450Sbrendan } 8715450Sbrendan } 8725450Sbrendan 8735450Sbrendan /* 8745450Sbrendan * Purge vdevs that were dropped 8755450Sbrendan */ 8765450Sbrendan for (i = 0; i < oldnvdevs; i++) { 8775450Sbrendan uint64_t pool; 8785450Sbrendan 8795450Sbrendan vd = oldvdevs[i]; 8805450Sbrendan if (vd != NULL) { 8815450Sbrendan if (spa_mode & FWRITE && 8825450Sbrendan spa_l2cache_exists(vd->vdev_guid, &pool) && 8835450Sbrendan pool != 0ULL) { 8845450Sbrendan l2arc_remove_vdev(vd); 8855450Sbrendan } 8865450Sbrendan (void) vdev_close(vd); 8875450Sbrendan spa_l2cache_remove(vd); 8885450Sbrendan } 8895450Sbrendan } 8905450Sbrendan 8915450Sbrendan if (oldvdevs) 8925450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 8935450Sbrendan 8945450Sbrendan if (sav->sav_config == NULL) 8955450Sbrendan goto out; 8965450Sbrendan 8975450Sbrendan sav->sav_vdevs = newvdevs; 8985450Sbrendan sav->sav_count = (int)nl2cache; 8995450Sbrendan 9005450Sbrendan /* 9015450Sbrendan * Recompute the stashed list of l2cache devices, with status 9025450Sbrendan * information this time. 9035450Sbrendan */ 9045450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 9055450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 9065450Sbrendan 9075450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 9085450Sbrendan for (i = 0; i < sav->sav_count; i++) 9095450Sbrendan l2cache[i] = vdev_config_generate(spa, 9105450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 9115450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 9125450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 9135450Sbrendan out: 9145450Sbrendan for (i = 0; i < sav->sav_count; i++) 9155450Sbrendan nvlist_free(l2cache[i]); 9165450Sbrendan if (sav->sav_count) 9175450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 9182082Seschrock } 9192082Seschrock 9202082Seschrock static int 9212082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 9222082Seschrock { 9232082Seschrock dmu_buf_t *db; 9242082Seschrock char *packed = NULL; 9252082Seschrock size_t nvsize = 0; 9262082Seschrock int error; 9272082Seschrock *value = NULL; 9282082Seschrock 9292082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 9302082Seschrock nvsize = *(uint64_t *)db->db_data; 9312082Seschrock dmu_buf_rele(db, FTAG); 9322082Seschrock 9332082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 9342082Seschrock error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 9352082Seschrock if (error == 0) 9362082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 9372082Seschrock kmem_free(packed, nvsize); 9382082Seschrock 9392082Seschrock return (error); 9402082Seschrock } 9412082Seschrock 9422082Seschrock /* 9434451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 9444451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 9454451Seschrock */ 9464451Seschrock static void 9474451Seschrock spa_check_removed(vdev_t *vd) 9484451Seschrock { 9494451Seschrock int c; 9504451Seschrock 9514451Seschrock for (c = 0; c < vd->vdev_children; c++) 9524451Seschrock spa_check_removed(vd->vdev_child[c]); 9534451Seschrock 9544451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 9554451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 9564451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 9574451Seschrock } 9584451Seschrock } 9594451Seschrock 9604451Seschrock /* 961789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 9621544Seschrock * source of configuration information. 963789Sahrens */ 964789Sahrens static int 9651544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 966789Sahrens { 967789Sahrens int error = 0; 968789Sahrens nvlist_t *nvroot = NULL; 969789Sahrens vdev_t *rvd; 970789Sahrens uberblock_t *ub = &spa->spa_uberblock; 9711635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 972789Sahrens uint64_t pool_guid; 9732082Seschrock uint64_t version; 974789Sahrens zio_t *zio; 9754451Seschrock uint64_t autoreplace = 0; 976789Sahrens 9771544Seschrock spa->spa_load_state = state; 9781635Sbonwick 979789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 9801733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 9811544Seschrock error = EINVAL; 9821544Seschrock goto out; 9831544Seschrock } 984789Sahrens 9852082Seschrock /* 9862082Seschrock * Versioning wasn't explicitly added to the label until later, so if 9872082Seschrock * it's not present treat it as the initial version. 9882082Seschrock */ 9892082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 9904577Sahrens version = SPA_VERSION_INITIAL; 9912082Seschrock 9921733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 9931733Sbonwick &spa->spa_config_txg); 9941733Sbonwick 9951635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 9961544Seschrock spa_guid_exists(pool_guid, 0)) { 9971544Seschrock error = EEXIST; 9981544Seschrock goto out; 9991544Seschrock } 1000789Sahrens 10012174Seschrock spa->spa_load_guid = pool_guid; 10022174Seschrock 1003789Sahrens /* 10042082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 10052082Seschrock * value that will be returned by spa_version() since parsing the 10062082Seschrock * configuration requires knowing the version number. 1007789Sahrens */ 10081544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 10092082Seschrock spa->spa_ubsync.ub_version = version; 10102082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 10111544Seschrock spa_config_exit(spa, FTAG); 1012789Sahrens 10132082Seschrock if (error != 0) 10141544Seschrock goto out; 1015789Sahrens 10161585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1017789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1018789Sahrens 1019789Sahrens /* 1020789Sahrens * Try to open all vdevs, loading each label in the process. 1021789Sahrens */ 10224070Smc142369 error = vdev_open(rvd); 10234070Smc142369 if (error != 0) 10241544Seschrock goto out; 1025789Sahrens 1026789Sahrens /* 10271986Seschrock * Validate the labels for all leaf vdevs. We need to grab the config 10281986Seschrock * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD 10291986Seschrock * flag. 10301986Seschrock */ 10311986Seschrock spa_config_enter(spa, RW_READER, FTAG); 10321986Seschrock error = vdev_validate(rvd); 10331986Seschrock spa_config_exit(spa, FTAG); 10341986Seschrock 10354070Smc142369 if (error != 0) 10361986Seschrock goto out; 10371986Seschrock 10381986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 10391986Seschrock error = ENXIO; 10401986Seschrock goto out; 10411986Seschrock } 10421986Seschrock 10431986Seschrock /* 1044789Sahrens * Find the best uberblock. 1045789Sahrens */ 1046789Sahrens bzero(ub, sizeof (uberblock_t)); 1047789Sahrens 1048789Sahrens zio = zio_root(spa, NULL, NULL, 1049789Sahrens ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1050789Sahrens vdev_uberblock_load(zio, rvd, ub); 1051789Sahrens error = zio_wait(zio); 1052789Sahrens 1053789Sahrens /* 1054789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1055789Sahrens */ 1056789Sahrens if (ub->ub_txg == 0) { 10571760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 10581760Seschrock VDEV_AUX_CORRUPT_DATA); 10591544Seschrock error = ENXIO; 10601544Seschrock goto out; 10611544Seschrock } 10621544Seschrock 10631544Seschrock /* 10641544Seschrock * If the pool is newer than the code, we can't open it. 10651544Seschrock */ 10664577Sahrens if (ub->ub_version > SPA_VERSION) { 10671760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 10681760Seschrock VDEV_AUX_VERSION_NEWER); 10691544Seschrock error = ENOTSUP; 10701544Seschrock goto out; 1071789Sahrens } 1072789Sahrens 1073789Sahrens /* 1074789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1075789Sahrens * incomplete configuration. 1076789Sahrens */ 10771732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 10781544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 10791544Seschrock VDEV_AUX_BAD_GUID_SUM); 10801544Seschrock error = ENXIO; 10811544Seschrock goto out; 1082789Sahrens } 1083789Sahrens 1084789Sahrens /* 1085789Sahrens * Initialize internal SPA structures. 1086789Sahrens */ 1087789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1088789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1089789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 10901544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 10911544Seschrock if (error) { 10921544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 10931544Seschrock VDEV_AUX_CORRUPT_DATA); 10941544Seschrock goto out; 10951544Seschrock } 1096789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1097789Sahrens 10981544Seschrock if (zap_lookup(spa->spa_meta_objset, 1099789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 11001544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 11011544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11021544Seschrock VDEV_AUX_CORRUPT_DATA); 11031544Seschrock error = EIO; 11041544Seschrock goto out; 11051544Seschrock } 1106789Sahrens 1107789Sahrens if (!mosconfig) { 11082082Seschrock nvlist_t *newconfig; 11093975Sek110237 uint64_t hostid; 11102082Seschrock 11112082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 11121544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11131544Seschrock VDEV_AUX_CORRUPT_DATA); 11141544Seschrock error = EIO; 11151544Seschrock goto out; 11161544Seschrock } 1117789Sahrens 11183975Sek110237 if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID, 11193975Sek110237 &hostid) == 0) { 11203975Sek110237 char *hostname; 11213975Sek110237 unsigned long myhostid = 0; 11223975Sek110237 11233975Sek110237 VERIFY(nvlist_lookup_string(newconfig, 11243975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 11253975Sek110237 11263975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 11274178Slling if (hostid != 0 && myhostid != 0 && 11284178Slling (unsigned long)hostid != myhostid) { 11293975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 11303975Sek110237 "loaded as it was last accessed by " 11313975Sek110237 "another system (host: %s hostid: 0x%lx). " 11323975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 11333975Sek110237 spa->spa_name, hostname, 11343975Sek110237 (unsigned long)hostid); 11353975Sek110237 error = EBADF; 11363975Sek110237 goto out; 11373975Sek110237 } 11383975Sek110237 } 11393975Sek110237 1140789Sahrens spa_config_set(spa, newconfig); 1141789Sahrens spa_unload(spa); 1142789Sahrens spa_deactivate(spa); 1143789Sahrens spa_activate(spa); 1144789Sahrens 11451544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 11461544Seschrock } 11471544Seschrock 11481544Seschrock if (zap_lookup(spa->spa_meta_objset, 11491544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 11501544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 11511544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11521544Seschrock VDEV_AUX_CORRUPT_DATA); 11531544Seschrock error = EIO; 11541544Seschrock goto out; 1155789Sahrens } 1156789Sahrens 11571544Seschrock /* 11582082Seschrock * Load the bit that tells us to use the new accounting function 11592082Seschrock * (raid-z deflation). If we have an older pool, this will not 11602082Seschrock * be present. 11612082Seschrock */ 11622082Seschrock error = zap_lookup(spa->spa_meta_objset, 11632082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 11642082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 11652082Seschrock if (error != 0 && error != ENOENT) { 11662082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11672082Seschrock VDEV_AUX_CORRUPT_DATA); 11682082Seschrock error = EIO; 11692082Seschrock goto out; 11702082Seschrock } 11712082Seschrock 11722082Seschrock /* 11731544Seschrock * Load the persistent error log. If we have an older pool, this will 11741544Seschrock * not be present. 11751544Seschrock */ 11761544Seschrock error = zap_lookup(spa->spa_meta_objset, 11771544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 11781544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 11791807Sbonwick if (error != 0 && error != ENOENT) { 11801544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11811544Seschrock VDEV_AUX_CORRUPT_DATA); 11821544Seschrock error = EIO; 11831544Seschrock goto out; 11841544Seschrock } 11851544Seschrock 11861544Seschrock error = zap_lookup(spa->spa_meta_objset, 11871544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 11881544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 11891544Seschrock if (error != 0 && error != ENOENT) { 11901544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11911544Seschrock VDEV_AUX_CORRUPT_DATA); 11921544Seschrock error = EIO; 11931544Seschrock goto out; 11941544Seschrock } 1195789Sahrens 1196789Sahrens /* 11972926Sek110237 * Load the history object. If we have an older pool, this 11982926Sek110237 * will not be present. 11992926Sek110237 */ 12002926Sek110237 error = zap_lookup(spa->spa_meta_objset, 12012926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 12022926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 12032926Sek110237 if (error != 0 && error != ENOENT) { 12042926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12052926Sek110237 VDEV_AUX_CORRUPT_DATA); 12062926Sek110237 error = EIO; 12072926Sek110237 goto out; 12082926Sek110237 } 12092926Sek110237 12102926Sek110237 /* 12112082Seschrock * Load any hot spares for this pool. 12122082Seschrock */ 12132082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 12145450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 12152082Seschrock if (error != 0 && error != ENOENT) { 12162082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12172082Seschrock VDEV_AUX_CORRUPT_DATA); 12182082Seschrock error = EIO; 12192082Seschrock goto out; 12202082Seschrock } 12212082Seschrock if (error == 0) { 12224577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 12235450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 12245450Sbrendan &spa->spa_spares.sav_config) != 0) { 12252082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12262082Seschrock VDEV_AUX_CORRUPT_DATA); 12272082Seschrock error = EIO; 12282082Seschrock goto out; 12292082Seschrock } 12302082Seschrock 12312082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 12322082Seschrock spa_load_spares(spa); 12332082Seschrock spa_config_exit(spa, FTAG); 12342082Seschrock } 12352082Seschrock 12365450Sbrendan /* 12375450Sbrendan * Load any level 2 ARC devices for this pool. 12385450Sbrendan */ 12395450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 12405450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 12415450Sbrendan &spa->spa_l2cache.sav_object); 12425450Sbrendan if (error != 0 && error != ENOENT) { 12435450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12445450Sbrendan VDEV_AUX_CORRUPT_DATA); 12455450Sbrendan error = EIO; 12465450Sbrendan goto out; 12475450Sbrendan } 12485450Sbrendan if (error == 0) { 12495450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 12505450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 12515450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 12525450Sbrendan vdev_set_state(rvd, B_TRUE, 12535450Sbrendan VDEV_STATE_CANT_OPEN, 12545450Sbrendan VDEV_AUX_CORRUPT_DATA); 12555450Sbrendan error = EIO; 12565450Sbrendan goto out; 12575450Sbrendan } 12585450Sbrendan 12595450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 12605450Sbrendan spa_load_l2cache(spa); 12615450Sbrendan spa_config_exit(spa, FTAG); 12625450Sbrendan } 12635450Sbrendan 12645094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 12654543Smarks 12663912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 12673912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 12683912Slling 12693912Slling if (error && error != ENOENT) { 12703912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12713912Slling VDEV_AUX_CORRUPT_DATA); 12723912Slling error = EIO; 12733912Slling goto out; 12743912Slling } 12753912Slling 12763912Slling if (error == 0) { 12773912Slling (void) zap_lookup(spa->spa_meta_objset, 12783912Slling spa->spa_pool_props_object, 12794451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 12803912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 12814451Seschrock (void) zap_lookup(spa->spa_meta_objset, 12824451Seschrock spa->spa_pool_props_object, 12834451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 12844451Seschrock sizeof (uint64_t), 1, &autoreplace); 12854543Smarks (void) zap_lookup(spa->spa_meta_objset, 12864543Smarks spa->spa_pool_props_object, 12874543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 12884543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 12895329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 12905329Sgw25295 spa->spa_pool_props_object, 12915329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 12925329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 12933912Slling } 12943912Slling 12952082Seschrock /* 12964451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 12974451Seschrock * the ZFS DE that it should not issue any faults for unopenable 12984451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 12994451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 13004451Seschrock * over. 13014451Seschrock */ 13025756Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 13034451Seschrock spa_check_removed(spa->spa_root_vdev); 13044451Seschrock 13054451Seschrock /* 13061986Seschrock * Load the vdev state for all toplevel vdevs. 1307789Sahrens */ 13081986Seschrock vdev_load(rvd); 1309789Sahrens 1310789Sahrens /* 1311789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1312789Sahrens */ 13131544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 1314789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 13151544Seschrock spa_config_exit(spa, FTAG); 1316789Sahrens 1317789Sahrens /* 1318789Sahrens * Check the state of the root vdev. If it can't be opened, it 1319789Sahrens * indicates one or more toplevel vdevs are faulted. 1320789Sahrens */ 13211544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 13221544Seschrock error = ENXIO; 13231544Seschrock goto out; 13241544Seschrock } 1325789Sahrens 13261544Seschrock if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { 13271635Sbonwick dmu_tx_t *tx; 13281635Sbonwick int need_update = B_FALSE; 13291585Sbonwick int c; 13301601Sbonwick 13311635Sbonwick /* 13321635Sbonwick * Claim log blocks that haven't been committed yet. 13331635Sbonwick * This must all happen in a single txg. 13341635Sbonwick */ 13351601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1336789Sahrens spa_first_txg(spa)); 13372417Sahrens (void) dmu_objset_find(spa->spa_name, 13382417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1339789Sahrens dmu_tx_commit(tx); 1340789Sahrens 1341789Sahrens spa->spa_sync_on = B_TRUE; 1342789Sahrens txg_sync_start(spa->spa_dsl_pool); 1343789Sahrens 1344789Sahrens /* 1345789Sahrens * Wait for all claims to sync. 1346789Sahrens */ 1347789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 13481585Sbonwick 13491585Sbonwick /* 13501635Sbonwick * If the config cache is stale, or we have uninitialized 13511635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 13521585Sbonwick */ 13531635Sbonwick if (config_cache_txg != spa->spa_config_txg || 13541635Sbonwick state == SPA_LOAD_IMPORT) 13551635Sbonwick need_update = B_TRUE; 13561635Sbonwick 13571635Sbonwick for (c = 0; c < rvd->vdev_children; c++) 13581635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 13591635Sbonwick need_update = B_TRUE; 13601585Sbonwick 13611585Sbonwick /* 13621635Sbonwick * Update the config cache asychronously in case we're the 13631635Sbonwick * root pool, in which case the config cache isn't writable yet. 13641585Sbonwick */ 13651635Sbonwick if (need_update) 13661635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 1367789Sahrens } 1368789Sahrens 13691544Seschrock error = 0; 13701544Seschrock out: 13712082Seschrock if (error && error != EBADF) 13721544Seschrock zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); 13731544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 13741544Seschrock spa->spa_ena = 0; 13751544Seschrock 13761544Seschrock return (error); 1377789Sahrens } 1378789Sahrens 1379789Sahrens /* 1380789Sahrens * Pool Open/Import 1381789Sahrens * 1382789Sahrens * The import case is identical to an open except that the configuration is sent 1383789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1384789Sahrens * case of an open, the pool configuration will exist in the 13854451Seschrock * POOL_STATE_UNINITIALIZED state. 1386789Sahrens * 1387789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1388789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1389789Sahrens * ambiguous state. 1390789Sahrens */ 1391789Sahrens static int 1392789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1393789Sahrens { 1394789Sahrens spa_t *spa; 1395789Sahrens int error; 1396789Sahrens int loaded = B_FALSE; 1397789Sahrens int locked = B_FALSE; 1398789Sahrens 1399789Sahrens *spapp = NULL; 1400789Sahrens 1401789Sahrens /* 1402789Sahrens * As disgusting as this is, we need to support recursive calls to this 1403789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1404789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1405789Sahrens * avoid dsl_dir_open() calling this in the first place. 1406789Sahrens */ 1407789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1408789Sahrens mutex_enter(&spa_namespace_lock); 1409789Sahrens locked = B_TRUE; 1410789Sahrens } 1411789Sahrens 1412789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1413789Sahrens if (locked) 1414789Sahrens mutex_exit(&spa_namespace_lock); 1415789Sahrens return (ENOENT); 1416789Sahrens } 1417789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1418789Sahrens 1419789Sahrens spa_activate(spa); 1420789Sahrens 14211635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1422789Sahrens 1423789Sahrens if (error == EBADF) { 1424789Sahrens /* 14251986Seschrock * If vdev_validate() returns failure (indicated by 14261986Seschrock * EBADF), it indicates that one of the vdevs indicates 14271986Seschrock * that the pool has been exported or destroyed. If 14281986Seschrock * this is the case, the config cache is out of sync and 14291986Seschrock * we should remove the pool from the namespace. 1430789Sahrens */ 14312082Seschrock zfs_post_ok(spa, NULL); 1432789Sahrens spa_unload(spa); 1433789Sahrens spa_deactivate(spa); 1434789Sahrens spa_remove(spa); 1435789Sahrens spa_config_sync(); 1436789Sahrens if (locked) 1437789Sahrens mutex_exit(&spa_namespace_lock); 1438789Sahrens return (ENOENT); 14391544Seschrock } 14401544Seschrock 14411544Seschrock if (error) { 1442789Sahrens /* 1443789Sahrens * We can't open the pool, but we still have useful 1444789Sahrens * information: the state of each vdev after the 1445789Sahrens * attempted vdev_open(). Return this to the user. 1446789Sahrens */ 14471635Sbonwick if (config != NULL && spa->spa_root_vdev != NULL) { 14481635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 1449789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1450789Sahrens B_TRUE); 14511635Sbonwick spa_config_exit(spa, FTAG); 14521635Sbonwick } 1453789Sahrens spa_unload(spa); 1454789Sahrens spa_deactivate(spa); 14551544Seschrock spa->spa_last_open_failed = B_TRUE; 1456789Sahrens if (locked) 1457789Sahrens mutex_exit(&spa_namespace_lock); 1458789Sahrens *spapp = NULL; 1459789Sahrens return (error); 14601544Seschrock } else { 14611544Seschrock zfs_post_ok(spa, NULL); 14621544Seschrock spa->spa_last_open_failed = B_FALSE; 1463789Sahrens } 1464789Sahrens 1465789Sahrens loaded = B_TRUE; 1466789Sahrens } 1467789Sahrens 1468789Sahrens spa_open_ref(spa, tag); 14694451Seschrock 14704451Seschrock /* 14714451Seschrock * If we just loaded the pool, resilver anything that's out of date. 14724451Seschrock */ 14734451Seschrock if (loaded && (spa_mode & FWRITE)) 14744451Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 14754451Seschrock 1476789Sahrens if (locked) 1477789Sahrens mutex_exit(&spa_namespace_lock); 1478789Sahrens 1479789Sahrens *spapp = spa; 1480789Sahrens 1481789Sahrens if (config != NULL) { 14821544Seschrock spa_config_enter(spa, RW_READER, FTAG); 1483789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 14841544Seschrock spa_config_exit(spa, FTAG); 1485789Sahrens } 1486789Sahrens 1487789Sahrens return (0); 1488789Sahrens } 1489789Sahrens 1490789Sahrens int 1491789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1492789Sahrens { 1493789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1494789Sahrens } 1495789Sahrens 14961544Seschrock /* 14971544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 14981544Seschrock * preventing it from being exported or destroyed. 14991544Seschrock */ 15001544Seschrock spa_t * 15011544Seschrock spa_inject_addref(char *name) 15021544Seschrock { 15031544Seschrock spa_t *spa; 15041544Seschrock 15051544Seschrock mutex_enter(&spa_namespace_lock); 15061544Seschrock if ((spa = spa_lookup(name)) == NULL) { 15071544Seschrock mutex_exit(&spa_namespace_lock); 15081544Seschrock return (NULL); 15091544Seschrock } 15101544Seschrock spa->spa_inject_ref++; 15111544Seschrock mutex_exit(&spa_namespace_lock); 15121544Seschrock 15131544Seschrock return (spa); 15141544Seschrock } 15151544Seschrock 15161544Seschrock void 15171544Seschrock spa_inject_delref(spa_t *spa) 15181544Seschrock { 15191544Seschrock mutex_enter(&spa_namespace_lock); 15201544Seschrock spa->spa_inject_ref--; 15211544Seschrock mutex_exit(&spa_namespace_lock); 15221544Seschrock } 15231544Seschrock 15245450Sbrendan /* 15255450Sbrendan * Add spares device information to the nvlist. 15265450Sbrendan */ 15272082Seschrock static void 15282082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 15292082Seschrock { 15302082Seschrock nvlist_t **spares; 15312082Seschrock uint_t i, nspares; 15322082Seschrock nvlist_t *nvroot; 15332082Seschrock uint64_t guid; 15342082Seschrock vdev_stat_t *vs; 15352082Seschrock uint_t vsc; 15363377Seschrock uint64_t pool; 15372082Seschrock 15385450Sbrendan if (spa->spa_spares.sav_count == 0) 15392082Seschrock return; 15402082Seschrock 15412082Seschrock VERIFY(nvlist_lookup_nvlist(config, 15422082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 15435450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 15442082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 15452082Seschrock if (nspares != 0) { 15462082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 15472082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 15482082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 15492082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 15502082Seschrock 15512082Seschrock /* 15522082Seschrock * Go through and find any spares which have since been 15532082Seschrock * repurposed as an active spare. If this is the case, update 15542082Seschrock * their status appropriately. 15552082Seschrock */ 15562082Seschrock for (i = 0; i < nspares; i++) { 15572082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 15582082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 15593377Seschrock if (spa_spare_exists(guid, &pool) && pool != 0ULL) { 15602082Seschrock VERIFY(nvlist_lookup_uint64_array( 15612082Seschrock spares[i], ZPOOL_CONFIG_STATS, 15622082Seschrock (uint64_t **)&vs, &vsc) == 0); 15632082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 15642082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 15652082Seschrock } 15662082Seschrock } 15672082Seschrock } 15682082Seschrock } 15692082Seschrock 15705450Sbrendan /* 15715450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 15725450Sbrendan */ 15735450Sbrendan static void 15745450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 15755450Sbrendan { 15765450Sbrendan nvlist_t **l2cache; 15775450Sbrendan uint_t i, j, nl2cache; 15785450Sbrendan nvlist_t *nvroot; 15795450Sbrendan uint64_t guid; 15805450Sbrendan vdev_t *vd; 15815450Sbrendan vdev_stat_t *vs; 15825450Sbrendan uint_t vsc; 15835450Sbrendan 15845450Sbrendan if (spa->spa_l2cache.sav_count == 0) 15855450Sbrendan return; 15865450Sbrendan 15875450Sbrendan spa_config_enter(spa, RW_READER, FTAG); 15885450Sbrendan 15895450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 15905450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 15915450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 15925450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 15935450Sbrendan if (nl2cache != 0) { 15945450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 15955450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 15965450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 15975450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 15985450Sbrendan 15995450Sbrendan /* 16005450Sbrendan * Update level 2 cache device stats. 16015450Sbrendan */ 16025450Sbrendan 16035450Sbrendan for (i = 0; i < nl2cache; i++) { 16045450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 16055450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 16065450Sbrendan 16075450Sbrendan vd = NULL; 16085450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 16095450Sbrendan if (guid == 16105450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 16115450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 16125450Sbrendan break; 16135450Sbrendan } 16145450Sbrendan } 16155450Sbrendan ASSERT(vd != NULL); 16165450Sbrendan 16175450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 16185450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 16195450Sbrendan vdev_get_stats(vd, vs); 16205450Sbrendan } 16215450Sbrendan } 16225450Sbrendan 16235450Sbrendan spa_config_exit(spa, FTAG); 16245450Sbrendan } 16255450Sbrendan 1626789Sahrens int 16271544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1628789Sahrens { 1629789Sahrens int error; 1630789Sahrens spa_t *spa; 1631789Sahrens 1632789Sahrens *config = NULL; 1633789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1634789Sahrens 16352082Seschrock if (spa && *config != NULL) { 16361544Seschrock VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, 16371544Seschrock spa_get_errlog_size(spa)) == 0); 16381544Seschrock 16392082Seschrock spa_add_spares(spa, *config); 16405450Sbrendan spa_add_l2cache(spa, *config); 16412082Seschrock } 16422082Seschrock 16431544Seschrock /* 16441544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 16451544Seschrock * and call spa_lookup() directly. 16461544Seschrock */ 16471544Seschrock if (altroot) { 16481544Seschrock if (spa == NULL) { 16491544Seschrock mutex_enter(&spa_namespace_lock); 16501544Seschrock spa = spa_lookup(name); 16511544Seschrock if (spa) 16521544Seschrock spa_altroot(spa, altroot, buflen); 16531544Seschrock else 16541544Seschrock altroot[0] = '\0'; 16551544Seschrock spa = NULL; 16561544Seschrock mutex_exit(&spa_namespace_lock); 16571544Seschrock } else { 16581544Seschrock spa_altroot(spa, altroot, buflen); 16591544Seschrock } 16601544Seschrock } 16611544Seschrock 1662789Sahrens if (spa != NULL) 1663789Sahrens spa_close(spa, FTAG); 1664789Sahrens 1665789Sahrens return (error); 1666789Sahrens } 1667789Sahrens 1668789Sahrens /* 16695450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 16705450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 16715450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 16725450Sbrendan * specified, as long as they are well-formed. 16732082Seschrock */ 16742082Seschrock static int 16755450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 16765450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 16775450Sbrendan vdev_labeltype_t label) 16782082Seschrock { 16795450Sbrendan nvlist_t **dev; 16805450Sbrendan uint_t i, ndev; 16812082Seschrock vdev_t *vd; 16822082Seschrock int error; 16832082Seschrock 16842082Seschrock /* 16855450Sbrendan * It's acceptable to have no devs specified. 16862082Seschrock */ 16875450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 16882082Seschrock return (0); 16892082Seschrock 16905450Sbrendan if (ndev == 0) 16912082Seschrock return (EINVAL); 16922082Seschrock 16932082Seschrock /* 16945450Sbrendan * Make sure the pool is formatted with a version that supports this 16955450Sbrendan * device type. 16962082Seschrock */ 16975450Sbrendan if (spa_version(spa) < version) 16982082Seschrock return (ENOTSUP); 16992082Seschrock 17003377Seschrock /* 17015450Sbrendan * Set the pending device list so we correctly handle device in-use 17023377Seschrock * checking. 17033377Seschrock */ 17045450Sbrendan sav->sav_pending = dev; 17055450Sbrendan sav->sav_npending = ndev; 17065450Sbrendan 17075450Sbrendan for (i = 0; i < ndev; i++) { 17085450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 17092082Seschrock mode)) != 0) 17103377Seschrock goto out; 17112082Seschrock 17122082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 17132082Seschrock vdev_free(vd); 17143377Seschrock error = EINVAL; 17153377Seschrock goto out; 17162082Seschrock } 17172082Seschrock 17185450Sbrendan /* 17195450Sbrendan * The L2ARC currently only supports disk devices. 17205450Sbrendan */ 17215450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 17225450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 17235450Sbrendan error = ENOTBLK; 17245450Sbrendan goto out; 17255450Sbrendan } 17265450Sbrendan 17272082Seschrock vd->vdev_top = vd; 17283377Seschrock 17293377Seschrock if ((error = vdev_open(vd)) == 0 && 17305450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 17315450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 17323377Seschrock vd->vdev_guid) == 0); 17332082Seschrock } 17342082Seschrock 17352082Seschrock vdev_free(vd); 17363377Seschrock 17375450Sbrendan if (error && 17385450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 17393377Seschrock goto out; 17403377Seschrock else 17413377Seschrock error = 0; 17422082Seschrock } 17432082Seschrock 17443377Seschrock out: 17455450Sbrendan sav->sav_pending = NULL; 17465450Sbrendan sav->sav_npending = 0; 17473377Seschrock return (error); 17482082Seschrock } 17492082Seschrock 17505450Sbrendan static int 17515450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 17525450Sbrendan { 17535450Sbrendan int error; 17545450Sbrendan 17555450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 17565450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 17575450Sbrendan VDEV_LABEL_SPARE)) != 0) { 17585450Sbrendan return (error); 17595450Sbrendan } 17605450Sbrendan 17615450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 17625450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 17635450Sbrendan VDEV_LABEL_L2CACHE)); 17645450Sbrendan } 17655450Sbrendan 17665450Sbrendan static void 17675450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 17685450Sbrendan const char *config) 17695450Sbrendan { 17705450Sbrendan int i; 17715450Sbrendan 17725450Sbrendan if (sav->sav_config != NULL) { 17735450Sbrendan nvlist_t **olddevs; 17745450Sbrendan uint_t oldndevs; 17755450Sbrendan nvlist_t **newdevs; 17765450Sbrendan 17775450Sbrendan /* 17785450Sbrendan * Generate new dev list by concatentating with the 17795450Sbrendan * current dev list. 17805450Sbrendan */ 17815450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 17825450Sbrendan &olddevs, &oldndevs) == 0); 17835450Sbrendan 17845450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 17855450Sbrendan (ndevs + oldndevs), KM_SLEEP); 17865450Sbrendan for (i = 0; i < oldndevs; i++) 17875450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 17885450Sbrendan KM_SLEEP) == 0); 17895450Sbrendan for (i = 0; i < ndevs; i++) 17905450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 17915450Sbrendan KM_SLEEP) == 0); 17925450Sbrendan 17935450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 17945450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 17955450Sbrendan 17965450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 17975450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 17985450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 17995450Sbrendan nvlist_free(newdevs[i]); 18005450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 18015450Sbrendan } else { 18025450Sbrendan /* 18035450Sbrendan * Generate a new dev list. 18045450Sbrendan */ 18055450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 18065450Sbrendan KM_SLEEP) == 0); 18075450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 18085450Sbrendan devs, ndevs) == 0); 18095450Sbrendan } 18105450Sbrendan } 18115450Sbrendan 18125450Sbrendan /* 18135450Sbrendan * Stop and drop level 2 ARC devices 18145450Sbrendan */ 18155450Sbrendan void 18165450Sbrendan spa_l2cache_drop(spa_t *spa) 18175450Sbrendan { 18185450Sbrendan vdev_t *vd; 18195450Sbrendan int i; 18205450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 18215450Sbrendan 18225450Sbrendan for (i = 0; i < sav->sav_count; i++) { 18235450Sbrendan uint64_t pool; 18245450Sbrendan 18255450Sbrendan vd = sav->sav_vdevs[i]; 18265450Sbrendan ASSERT(vd != NULL); 18275450Sbrendan 18285450Sbrendan if (spa_mode & FWRITE && 18295450Sbrendan spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL) { 18305450Sbrendan l2arc_remove_vdev(vd); 18315450Sbrendan } 18325450Sbrendan if (vd->vdev_isl2cache) 18335450Sbrendan spa_l2cache_remove(vd); 18345450Sbrendan vdev_clear_stats(vd); 18355450Sbrendan (void) vdev_close(vd); 18365450Sbrendan } 18375450Sbrendan } 18385450Sbrendan 18392082Seschrock /* 1840789Sahrens * Pool Creation 1841789Sahrens */ 1842789Sahrens int 18435094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 18444715Sek110237 const char *history_str) 1845789Sahrens { 1846789Sahrens spa_t *spa; 18475094Slling char *altroot = NULL; 18481635Sbonwick vdev_t *rvd; 1849789Sahrens dsl_pool_t *dp; 1850789Sahrens dmu_tx_t *tx; 18512082Seschrock int c, error = 0; 1852789Sahrens uint64_t txg = TXG_INITIAL; 18535450Sbrendan nvlist_t **spares, **l2cache; 18545450Sbrendan uint_t nspares, nl2cache; 18555094Slling uint64_t version; 1856789Sahrens 1857789Sahrens /* 1858789Sahrens * If this pool already exists, return failure. 1859789Sahrens */ 1860789Sahrens mutex_enter(&spa_namespace_lock); 1861789Sahrens if (spa_lookup(pool) != NULL) { 1862789Sahrens mutex_exit(&spa_namespace_lock); 1863789Sahrens return (EEXIST); 1864789Sahrens } 1865789Sahrens 1866789Sahrens /* 1867789Sahrens * Allocate a new spa_t structure. 1868789Sahrens */ 18695094Slling (void) nvlist_lookup_string(props, 18705094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 18711635Sbonwick spa = spa_add(pool, altroot); 1872789Sahrens spa_activate(spa); 1873789Sahrens 1874789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 18755094Slling 18765094Slling if (props && (error = spa_prop_validate(spa, props))) { 18775094Slling spa_unload(spa); 18785094Slling spa_deactivate(spa); 18795094Slling spa_remove(spa); 18805094Slling return (error); 18815094Slling } 18825094Slling 18835094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 18845094Slling &version) != 0) 18855094Slling version = SPA_VERSION; 18865094Slling ASSERT(version <= SPA_VERSION); 18875094Slling spa->spa_uberblock.ub_version = version; 1888789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1889789Sahrens 18901635Sbonwick /* 18911635Sbonwick * Create the root vdev. 18921635Sbonwick */ 18931635Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 18941635Sbonwick 18952082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 18962082Seschrock 18972082Seschrock ASSERT(error != 0 || rvd != NULL); 18982082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 18992082Seschrock 19005913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 19011635Sbonwick error = EINVAL; 19022082Seschrock 19032082Seschrock if (error == 0 && 19042082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 19055450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 19062082Seschrock VDEV_ALLOC_ADD)) == 0) { 19072082Seschrock for (c = 0; c < rvd->vdev_children; c++) 19082082Seschrock vdev_init(rvd->vdev_child[c], txg); 19092082Seschrock vdev_config_dirty(rvd); 19101635Sbonwick } 19111635Sbonwick 19121635Sbonwick spa_config_exit(spa, FTAG); 1913789Sahrens 19142082Seschrock if (error != 0) { 1915789Sahrens spa_unload(spa); 1916789Sahrens spa_deactivate(spa); 1917789Sahrens spa_remove(spa); 1918789Sahrens mutex_exit(&spa_namespace_lock); 1919789Sahrens return (error); 1920789Sahrens } 1921789Sahrens 19222082Seschrock /* 19232082Seschrock * Get the list of spares, if specified. 19242082Seschrock */ 19252082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 19262082Seschrock &spares, &nspares) == 0) { 19275450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 19282082Seschrock KM_SLEEP) == 0); 19295450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 19302082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 19312082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 19322082Seschrock spa_load_spares(spa); 19332082Seschrock spa_config_exit(spa, FTAG); 19345450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 19355450Sbrendan } 19365450Sbrendan 19375450Sbrendan /* 19385450Sbrendan * Get the list of level 2 cache devices, if specified. 19395450Sbrendan */ 19405450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 19415450Sbrendan &l2cache, &nl2cache) == 0) { 19425450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 19435450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 19445450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 19455450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 19465450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 19475450Sbrendan spa_load_l2cache(spa); 19485450Sbrendan spa_config_exit(spa, FTAG); 19495450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 19502082Seschrock } 19512082Seschrock 1952789Sahrens spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); 1953789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 1954789Sahrens 1955789Sahrens tx = dmu_tx_create_assigned(dp, txg); 1956789Sahrens 1957789Sahrens /* 1958789Sahrens * Create the pool config object. 1959789Sahrens */ 1960789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 1961789Sahrens DMU_OT_PACKED_NVLIST, 1 << 14, 1962789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 1963789Sahrens 19641544Seschrock if (zap_add(spa->spa_meta_objset, 1965789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 19661544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 19671544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 19681544Seschrock } 1969789Sahrens 19705094Slling /* Newly created pools with the right version are always deflated. */ 19715094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 19725094Slling spa->spa_deflate = TRUE; 19735094Slling if (zap_add(spa->spa_meta_objset, 19745094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 19755094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 19765094Slling cmn_err(CE_PANIC, "failed to add deflate"); 19775094Slling } 19782082Seschrock } 19792082Seschrock 1980789Sahrens /* 1981789Sahrens * Create the deferred-free bplist object. Turn off compression 1982789Sahrens * because sync-to-convergence takes longer if the blocksize 1983789Sahrens * keeps changing. 1984789Sahrens */ 1985789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 1986789Sahrens 1 << 14, tx); 1987789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 1988789Sahrens ZIO_COMPRESS_OFF, tx); 1989789Sahrens 19901544Seschrock if (zap_add(spa->spa_meta_objset, 1991789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 19921544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 19931544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 19941544Seschrock } 1995789Sahrens 19962926Sek110237 /* 19972926Sek110237 * Create the pool's history object. 19982926Sek110237 */ 19995094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 20005094Slling spa_history_create_obj(spa, tx); 20015094Slling 20025094Slling /* 20035094Slling * Set pool properties. 20045094Slling */ 20055094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 20065094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 20075329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 20085094Slling if (props) 20095094Slling spa_sync_props(spa, props, CRED(), tx); 20102926Sek110237 2011789Sahrens dmu_tx_commit(tx); 2012789Sahrens 2013789Sahrens spa->spa_sync_on = B_TRUE; 2014789Sahrens txg_sync_start(spa->spa_dsl_pool); 2015789Sahrens 2016789Sahrens /* 2017789Sahrens * We explicitly wait for the first transaction to complete so that our 2018789Sahrens * bean counters are appropriately updated. 2019789Sahrens */ 2020789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2021789Sahrens 2022789Sahrens spa_config_sync(); 2023789Sahrens 20245094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 20254715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 20264715Sek110237 2027789Sahrens mutex_exit(&spa_namespace_lock); 2028789Sahrens 2029789Sahrens return (0); 2030789Sahrens } 2031789Sahrens 2032789Sahrens /* 2033789Sahrens * Import the given pool into the system. We set up the necessary spa_t and 2034789Sahrens * then call spa_load() to do the dirty work. 2035789Sahrens */ 2036789Sahrens int 20375094Slling spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2038789Sahrens { 2039789Sahrens spa_t *spa; 20405094Slling char *altroot = NULL; 2041789Sahrens int error; 20422082Seschrock nvlist_t *nvroot; 20435450Sbrendan nvlist_t **spares, **l2cache; 20445450Sbrendan uint_t nspares, nl2cache; 2045789Sahrens 2046789Sahrens /* 2047789Sahrens * If a pool with this name exists, return failure. 2048789Sahrens */ 2049789Sahrens mutex_enter(&spa_namespace_lock); 2050789Sahrens if (spa_lookup(pool) != NULL) { 2051789Sahrens mutex_exit(&spa_namespace_lock); 2052789Sahrens return (EEXIST); 2053789Sahrens } 2054789Sahrens 2055789Sahrens /* 20561635Sbonwick * Create and initialize the spa structure. 2057789Sahrens */ 20585094Slling (void) nvlist_lookup_string(props, 20595094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 20601635Sbonwick spa = spa_add(pool, altroot); 2061789Sahrens spa_activate(spa); 2062789Sahrens 2063789Sahrens /* 20641635Sbonwick * Pass off the heavy lifting to spa_load(). 20651732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 20661732Sbonwick * is actually the one to trust when doing an import. 20671601Sbonwick */ 20681732Sbonwick error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 2069789Sahrens 20702082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 20712082Seschrock /* 20722082Seschrock * Toss any existing sparelist, as it doesn't have any validity anymore, 20732082Seschrock * and conflicts with spa_has_spare(). 20742082Seschrock */ 20755450Sbrendan if (spa->spa_spares.sav_config) { 20765450Sbrendan nvlist_free(spa->spa_spares.sav_config); 20775450Sbrendan spa->spa_spares.sav_config = NULL; 20782082Seschrock spa_load_spares(spa); 20792082Seschrock } 20805450Sbrendan if (spa->spa_l2cache.sav_config) { 20815450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 20825450Sbrendan spa->spa_l2cache.sav_config = NULL; 20835450Sbrendan spa_load_l2cache(spa); 20845450Sbrendan } 20852082Seschrock 20862082Seschrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 20872082Seschrock &nvroot) == 0); 20885450Sbrendan if (error == 0) 20895450Sbrendan error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); 20905450Sbrendan if (error == 0) 20915450Sbrendan error = spa_validate_aux(spa, nvroot, -1ULL, 20925450Sbrendan VDEV_ALLOC_L2CACHE); 20932082Seschrock spa_config_exit(spa, FTAG); 20942082Seschrock 20955094Slling if (error != 0 || (props && (error = spa_prop_set(spa, props)))) { 2096789Sahrens spa_unload(spa); 2097789Sahrens spa_deactivate(spa); 2098789Sahrens spa_remove(spa); 2099789Sahrens mutex_exit(&spa_namespace_lock); 2100789Sahrens return (error); 2101789Sahrens } 2102789Sahrens 21031635Sbonwick /* 21045450Sbrendan * Override any spares and level 2 cache devices as specified by 21055450Sbrendan * the user, as these may have correct device names/devids, etc. 21062082Seschrock */ 21072082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 21082082Seschrock &spares, &nspares) == 0) { 21095450Sbrendan if (spa->spa_spares.sav_config) 21105450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, 21112082Seschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 21122082Seschrock else 21135450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 21142082Seschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 21155450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 21162082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 21172082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 21182082Seschrock spa_load_spares(spa); 21192082Seschrock spa_config_exit(spa, FTAG); 21205450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 21215450Sbrendan } 21225450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 21235450Sbrendan &l2cache, &nl2cache) == 0) { 21245450Sbrendan if (spa->spa_l2cache.sav_config) 21255450Sbrendan VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 21265450Sbrendan ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 21275450Sbrendan else 21285450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 21295450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 21305450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 21315450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 21325450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 21335450Sbrendan spa_load_l2cache(spa); 21345450Sbrendan spa_config_exit(spa, FTAG); 21355450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 21362082Seschrock } 21372082Seschrock 21382082Seschrock /* 21391635Sbonwick * Update the config cache to include the newly-imported pool. 21401635Sbonwick */ 21414627Sck153898 if (spa_mode & FWRITE) 21424627Sck153898 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 21431635Sbonwick 2144789Sahrens /* 2145789Sahrens * Resilver anything that's out of date. 2146789Sahrens */ 2147789Sahrens if (spa_mode & FWRITE) 2148789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 2149789Sahrens 21504451Seschrock mutex_exit(&spa_namespace_lock); 21514451Seschrock 2152789Sahrens return (0); 2153789Sahrens } 2154789Sahrens 2155789Sahrens /* 2156789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2157789Sahrens * to get the vdev stats associated with the imported devices. 2158789Sahrens */ 2159789Sahrens #define TRYIMPORT_NAME "$import" 2160789Sahrens 2161789Sahrens nvlist_t * 2162789Sahrens spa_tryimport(nvlist_t *tryconfig) 2163789Sahrens { 2164789Sahrens nvlist_t *config = NULL; 2165789Sahrens char *poolname; 2166789Sahrens spa_t *spa; 2167789Sahrens uint64_t state; 2168789Sahrens 2169789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2170789Sahrens return (NULL); 2171789Sahrens 2172789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2173789Sahrens return (NULL); 2174789Sahrens 21751635Sbonwick /* 21761635Sbonwick * Create and initialize the spa structure. 21771635Sbonwick */ 2178789Sahrens mutex_enter(&spa_namespace_lock); 21791635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 2180789Sahrens spa_activate(spa); 2181789Sahrens 2182789Sahrens /* 21831635Sbonwick * Pass off the heavy lifting to spa_load(). 21841732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 21851732Sbonwick * is actually the one to trust when doing an import. 2186789Sahrens */ 21871732Sbonwick (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2188789Sahrens 2189789Sahrens /* 2190789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2191789Sahrens */ 2192789Sahrens if (spa->spa_root_vdev != NULL) { 21931635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 2194789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 21951635Sbonwick spa_config_exit(spa, FTAG); 2196789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2197789Sahrens poolname) == 0); 2198789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2199789Sahrens state) == 0); 22003975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 22013975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 22022082Seschrock 22032082Seschrock /* 22045450Sbrendan * Add the list of hot spares and level 2 cache devices. 22052082Seschrock */ 22062082Seschrock spa_add_spares(spa, config); 22075450Sbrendan spa_add_l2cache(spa, config); 2208789Sahrens } 2209789Sahrens 2210789Sahrens spa_unload(spa); 2211789Sahrens spa_deactivate(spa); 2212789Sahrens spa_remove(spa); 2213789Sahrens mutex_exit(&spa_namespace_lock); 2214789Sahrens 2215789Sahrens return (config); 2216789Sahrens } 2217789Sahrens 2218789Sahrens /* 2219789Sahrens * Pool export/destroy 2220789Sahrens * 2221789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2222789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2223789Sahrens * update the pool state and sync all the labels to disk, removing the 2224789Sahrens * configuration from the cache afterwards. 2225789Sahrens */ 2226789Sahrens static int 22271775Sbillm spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) 2228789Sahrens { 2229789Sahrens spa_t *spa; 2230789Sahrens 22311775Sbillm if (oldconfig) 22321775Sbillm *oldconfig = NULL; 22331775Sbillm 2234789Sahrens if (!(spa_mode & FWRITE)) 2235789Sahrens return (EROFS); 2236789Sahrens 2237789Sahrens mutex_enter(&spa_namespace_lock); 2238789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2239789Sahrens mutex_exit(&spa_namespace_lock); 2240789Sahrens return (ENOENT); 2241789Sahrens } 2242789Sahrens 2243789Sahrens /* 22441544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 22451544Seschrock * reacquire the namespace lock, and see if we can export. 22461544Seschrock */ 22471544Seschrock spa_open_ref(spa, FTAG); 22481544Seschrock mutex_exit(&spa_namespace_lock); 22491544Seschrock spa_async_suspend(spa); 22501544Seschrock mutex_enter(&spa_namespace_lock); 22511544Seschrock spa_close(spa, FTAG); 22521544Seschrock 22531544Seschrock /* 2254789Sahrens * The pool will be in core if it's openable, 2255789Sahrens * in which case we can modify its state. 2256789Sahrens */ 2257789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2258789Sahrens /* 2259789Sahrens * Objsets may be open only because they're dirty, so we 2260789Sahrens * have to force it to sync before checking spa_refcnt. 2261789Sahrens */ 2262789Sahrens spa_scrub_suspend(spa); 2263789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2264789Sahrens 22651544Seschrock /* 22661544Seschrock * A pool cannot be exported or destroyed if there are active 22671544Seschrock * references. If we are resetting a pool, allow references by 22681544Seschrock * fault injection handlers. 22691544Seschrock */ 22701544Seschrock if (!spa_refcount_zero(spa) || 22711544Seschrock (spa->spa_inject_ref != 0 && 22721544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 2273789Sahrens spa_scrub_resume(spa); 22741544Seschrock spa_async_resume(spa); 2275789Sahrens mutex_exit(&spa_namespace_lock); 2276789Sahrens return (EBUSY); 2277789Sahrens } 2278789Sahrens 2279789Sahrens spa_scrub_resume(spa); 2280789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 2281789Sahrens 2282789Sahrens /* 2283789Sahrens * We want this to be reflected on every label, 2284789Sahrens * so mark them all dirty. spa_unload() will do the 2285789Sahrens * final sync that pushes these changes out. 2286789Sahrens */ 22871544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 22881601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 22891544Seschrock spa->spa_state = new_state; 22901635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 22911544Seschrock vdev_config_dirty(spa->spa_root_vdev); 22921601Sbonwick spa_config_exit(spa, FTAG); 22931544Seschrock } 2294789Sahrens } 2295789Sahrens 22964451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 22974451Seschrock 2298789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2299789Sahrens spa_unload(spa); 2300789Sahrens spa_deactivate(spa); 2301789Sahrens } 2302789Sahrens 23031775Sbillm if (oldconfig && spa->spa_config) 23041775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 23051775Sbillm 23061544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 23075363Seschrock spa_config_check(spa->spa_config_dir, 23085363Seschrock spa->spa_config_file); 23091544Seschrock spa_remove(spa); 23101544Seschrock spa_config_sync(); 23111544Seschrock } 2312789Sahrens mutex_exit(&spa_namespace_lock); 2313789Sahrens 2314789Sahrens return (0); 2315789Sahrens } 2316789Sahrens 2317789Sahrens /* 2318789Sahrens * Destroy a storage pool. 2319789Sahrens */ 2320789Sahrens int 2321789Sahrens spa_destroy(char *pool) 2322789Sahrens { 23231775Sbillm return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); 2324789Sahrens } 2325789Sahrens 2326789Sahrens /* 2327789Sahrens * Export a storage pool. 2328789Sahrens */ 2329789Sahrens int 23301775Sbillm spa_export(char *pool, nvlist_t **oldconfig) 2331789Sahrens { 23321775Sbillm return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); 2333789Sahrens } 2334789Sahrens 2335789Sahrens /* 23361544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 23371544Seschrock * from the namespace in any way. 23381544Seschrock */ 23391544Seschrock int 23401544Seschrock spa_reset(char *pool) 23411544Seschrock { 23421775Sbillm return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); 23431544Seschrock } 23441544Seschrock 23451544Seschrock 23461544Seschrock /* 2347789Sahrens * ========================================================================== 2348789Sahrens * Device manipulation 2349789Sahrens * ========================================================================== 2350789Sahrens */ 2351789Sahrens 2352789Sahrens /* 23534527Sperrin * Add a device to a storage pool. 2354789Sahrens */ 2355789Sahrens int 2356789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2357789Sahrens { 2358789Sahrens uint64_t txg; 23591635Sbonwick int c, error; 2360789Sahrens vdev_t *rvd = spa->spa_root_vdev; 23611585Sbonwick vdev_t *vd, *tvd; 23625450Sbrendan nvlist_t **spares, **l2cache; 23635450Sbrendan uint_t nspares, nl2cache; 2364789Sahrens 2365789Sahrens txg = spa_vdev_enter(spa); 2366789Sahrens 23672082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 23682082Seschrock VDEV_ALLOC_ADD)) != 0) 23692082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 23702082Seschrock 23713377Seschrock spa->spa_pending_vdev = vd; 2372789Sahrens 23735450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 23745450Sbrendan &nspares) != 0) 23752082Seschrock nspares = 0; 23762082Seschrock 23775450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 23785450Sbrendan &nl2cache) != 0) 23795450Sbrendan nl2cache = 0; 23805450Sbrendan 23815450Sbrendan if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) { 23823377Seschrock spa->spa_pending_vdev = NULL; 23832082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 23843377Seschrock } 23852082Seschrock 23862082Seschrock if (vd->vdev_children != 0) { 23873377Seschrock if ((error = vdev_create(vd, txg, B_FALSE)) != 0) { 23883377Seschrock spa->spa_pending_vdev = NULL; 23892082Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 23902082Seschrock } 23912082Seschrock } 23922082Seschrock 23933377Seschrock /* 23945450Sbrendan * We must validate the spares and l2cache devices after checking the 23955450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 23963377Seschrock */ 23975450Sbrendan if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) { 23983377Seschrock spa->spa_pending_vdev = NULL; 23993377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 24003377Seschrock } 24013377Seschrock 24023377Seschrock spa->spa_pending_vdev = NULL; 24033377Seschrock 24043377Seschrock /* 24053377Seschrock * Transfer each new top-level vdev from vd to rvd. 24063377Seschrock */ 24073377Seschrock for (c = 0; c < vd->vdev_children; c++) { 24083377Seschrock tvd = vd->vdev_child[c]; 24093377Seschrock vdev_remove_child(vd, tvd); 24103377Seschrock tvd->vdev_id = rvd->vdev_children; 24113377Seschrock vdev_add_child(rvd, tvd); 24123377Seschrock vdev_config_dirty(tvd); 24133377Seschrock } 24143377Seschrock 24152082Seschrock if (nspares != 0) { 24165450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 24175450Sbrendan ZPOOL_CONFIG_SPARES); 24182082Seschrock spa_load_spares(spa); 24195450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 24205450Sbrendan } 24215450Sbrendan 24225450Sbrendan if (nl2cache != 0) { 24235450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 24245450Sbrendan ZPOOL_CONFIG_L2CACHE); 24255450Sbrendan spa_load_l2cache(spa); 24265450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2427789Sahrens } 2428789Sahrens 2429789Sahrens /* 24301585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 24311585Sbonwick * If other threads start allocating from these vdevs before we 24321585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 24331585Sbonwick * fail to open the pool because there are DVAs that the config cache 24341585Sbonwick * can't translate. Therefore, we first add the vdevs without 24351585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 24361635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 24371585Sbonwick * 24381585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 24391585Sbonwick * if we lose power at any point in this sequence, the remaining 24401585Sbonwick * steps will be completed the next time we load the pool. 2441789Sahrens */ 24421635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 24431585Sbonwick 24441635Sbonwick mutex_enter(&spa_namespace_lock); 24451635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 24461635Sbonwick mutex_exit(&spa_namespace_lock); 2447789Sahrens 24481635Sbonwick return (0); 2449789Sahrens } 2450789Sahrens 2451789Sahrens /* 2452789Sahrens * Attach a device to a mirror. The arguments are the path to any device 2453789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 2454789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 2455789Sahrens * 2456789Sahrens * If 'replacing' is specified, the new device is intended to replace the 2457789Sahrens * existing device; in this case the two devices are made into their own 24584451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 2459789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 2460789Sahrens * extra rules: you can't attach to it after it's been created, and upon 2461789Sahrens * completion of resilvering, the first disk (the one being replaced) 2462789Sahrens * is automatically detached. 2463789Sahrens */ 2464789Sahrens int 24651544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 2466789Sahrens { 2467789Sahrens uint64_t txg, open_txg; 2468789Sahrens int error; 2469789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2470789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 24712082Seschrock vdev_ops_t *pvops; 24724527Sperrin int is_log; 2473789Sahrens 2474789Sahrens txg = spa_vdev_enter(spa); 2475789Sahrens 24761544Seschrock oldvd = vdev_lookup_by_guid(rvd, guid); 2477789Sahrens 2478789Sahrens if (oldvd == NULL) 2479789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2480789Sahrens 24811585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 24821585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 24831585Sbonwick 2484789Sahrens pvd = oldvd->vdev_parent; 2485789Sahrens 24862082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 24874451Seschrock VDEV_ALLOC_ADD)) != 0) 24884451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 24894451Seschrock 24904451Seschrock if (newrootvd->vdev_children != 1) 2491789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2492789Sahrens 2493789Sahrens newvd = newrootvd->vdev_child[0]; 2494789Sahrens 2495789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 2496789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2497789Sahrens 24982082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 2499789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 2500789Sahrens 25014527Sperrin /* 25024527Sperrin * Spares can't replace logs 25034527Sperrin */ 25044527Sperrin is_log = oldvd->vdev_islog; 25054527Sperrin if (is_log && newvd->vdev_isspare) 25064527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25074527Sperrin 25082082Seschrock if (!replacing) { 25092082Seschrock /* 25102082Seschrock * For attach, the only allowable parent is a mirror or the root 25112082Seschrock * vdev. 25122082Seschrock */ 25132082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 25142082Seschrock pvd->vdev_ops != &vdev_root_ops) 25152082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25162082Seschrock 25172082Seschrock pvops = &vdev_mirror_ops; 25182082Seschrock } else { 25192082Seschrock /* 25202082Seschrock * Active hot spares can only be replaced by inactive hot 25212082Seschrock * spares. 25222082Seschrock */ 25232082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 25242082Seschrock pvd->vdev_child[1] == oldvd && 25252082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 25262082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25272082Seschrock 25282082Seschrock /* 25292082Seschrock * If the source is a hot spare, and the parent isn't already a 25302082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 25313377Seschrock * want to create a replacing vdev. The user is not allowed to 25323377Seschrock * attach to a spared vdev child unless the 'isspare' state is 25333377Seschrock * the same (spare replaces spare, non-spare replaces 25343377Seschrock * non-spare). 25352082Seschrock */ 25362082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 25372082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25383377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 25393377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 25403377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25412082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 25422082Seschrock newvd->vdev_isspare) 25432082Seschrock pvops = &vdev_spare_ops; 25442082Seschrock else 25452082Seschrock pvops = &vdev_replacing_ops; 25462082Seschrock } 25472082Seschrock 25481175Slling /* 25491175Slling * Compare the new device size with the replaceable/attachable 25501175Slling * device size. 25511175Slling */ 25521175Slling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 2553789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 2554789Sahrens 25551732Sbonwick /* 25561732Sbonwick * The new device cannot have a higher alignment requirement 25571732Sbonwick * than the top-level vdev. 25581732Sbonwick */ 25591732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 2560789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 2561789Sahrens 2562789Sahrens /* 2563789Sahrens * If this is an in-place replacement, update oldvd's path and devid 2564789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 2565789Sahrens */ 2566789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 2567789Sahrens spa_strfree(oldvd->vdev_path); 2568789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 2569789Sahrens KM_SLEEP); 2570789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 2571789Sahrens newvd->vdev_path, "old"); 2572789Sahrens if (oldvd->vdev_devid != NULL) { 2573789Sahrens spa_strfree(oldvd->vdev_devid); 2574789Sahrens oldvd->vdev_devid = NULL; 2575789Sahrens } 2576789Sahrens } 2577789Sahrens 2578789Sahrens /* 25792082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 25802082Seschrock * mirror/replacing/spare vdev above oldvd. 2581789Sahrens */ 2582789Sahrens if (pvd->vdev_ops != pvops) 2583789Sahrens pvd = vdev_add_parent(oldvd, pvops); 2584789Sahrens 2585789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 2586789Sahrens ASSERT(pvd->vdev_ops == pvops); 2587789Sahrens ASSERT(oldvd->vdev_parent == pvd); 2588789Sahrens 2589789Sahrens /* 2590789Sahrens * Extract the new device from its root and add it to pvd. 2591789Sahrens */ 2592789Sahrens vdev_remove_child(newrootvd, newvd); 2593789Sahrens newvd->vdev_id = pvd->vdev_children; 2594789Sahrens vdev_add_child(pvd, newvd); 2595789Sahrens 25961544Seschrock /* 25971544Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 25981544Seschrock * the addition of newvd may have decreased our parent's asize. 25991544Seschrock */ 26001544Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 26011544Seschrock 2602789Sahrens tvd = newvd->vdev_top; 2603789Sahrens ASSERT(pvd->vdev_top == tvd); 2604789Sahrens ASSERT(tvd->vdev_parent == rvd); 2605789Sahrens 2606789Sahrens vdev_config_dirty(tvd); 2607789Sahrens 2608789Sahrens /* 2609789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 2610789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 2611789Sahrens */ 2612789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 2613789Sahrens 2614789Sahrens mutex_enter(&newvd->vdev_dtl_lock); 2615789Sahrens space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, 2616789Sahrens open_txg - TXG_INITIAL + 1); 2617789Sahrens mutex_exit(&newvd->vdev_dtl_lock); 2618789Sahrens 26193377Seschrock if (newvd->vdev_isspare) 26203377Seschrock spa_spare_activate(newvd); 26211544Seschrock 2622789Sahrens /* 2623789Sahrens * Mark newvd's DTL dirty in this txg. 2624789Sahrens */ 26251732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 2626789Sahrens 2627789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 2628789Sahrens 2629789Sahrens /* 26304451Seschrock * Kick off a resilver to update newvd. We need to grab the namespace 26314451Seschrock * lock because spa_scrub() needs to post a sysevent with the pool name. 2632789Sahrens */ 26334451Seschrock mutex_enter(&spa_namespace_lock); 2634789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 26354451Seschrock mutex_exit(&spa_namespace_lock); 2636789Sahrens 2637789Sahrens return (0); 2638789Sahrens } 2639789Sahrens 2640789Sahrens /* 2641789Sahrens * Detach a device from a mirror or replacing vdev. 2642789Sahrens * If 'replace_done' is specified, only detach if the parent 2643789Sahrens * is a replacing vdev. 2644789Sahrens */ 2645789Sahrens int 26461544Seschrock spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) 2647789Sahrens { 2648789Sahrens uint64_t txg; 2649789Sahrens int c, t, error; 2650789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2651789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 26522082Seschrock boolean_t unspare = B_FALSE; 26532082Seschrock uint64_t unspare_guid; 2654789Sahrens 2655789Sahrens txg = spa_vdev_enter(spa); 2656789Sahrens 26571544Seschrock vd = vdev_lookup_by_guid(rvd, guid); 2658789Sahrens 2659789Sahrens if (vd == NULL) 2660789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2661789Sahrens 26621585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 26631585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 26641585Sbonwick 2665789Sahrens pvd = vd->vdev_parent; 2666789Sahrens 2667789Sahrens /* 2668789Sahrens * If replace_done is specified, only remove this device if it's 26692082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 26702082Seschrock * disk can be removed. 2671789Sahrens */ 26722082Seschrock if (replace_done) { 26732082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 26742082Seschrock if (vd->vdev_id != 0) 26752082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 26762082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 26772082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 26782082Seschrock } 26792082Seschrock } 26802082Seschrock 26812082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 26824577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 2683789Sahrens 2684789Sahrens /* 26852082Seschrock * Only mirror, replacing, and spare vdevs support detach. 2686789Sahrens */ 2687789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 26882082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 26892082Seschrock pvd->vdev_ops != &vdev_spare_ops) 2690789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 2691789Sahrens 2692789Sahrens /* 2693789Sahrens * If there's only one replica, you can't detach it. 2694789Sahrens */ 2695789Sahrens if (pvd->vdev_children <= 1) 2696789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 2697789Sahrens 2698789Sahrens /* 2699789Sahrens * If all siblings have non-empty DTLs, this device may have the only 2700789Sahrens * valid copy of the data, which means we cannot safely detach it. 2701789Sahrens * 2702789Sahrens * XXX -- as in the vdev_offline() case, we really want a more 2703789Sahrens * precise DTL check. 2704789Sahrens */ 2705789Sahrens for (c = 0; c < pvd->vdev_children; c++) { 2706789Sahrens uint64_t dirty; 2707789Sahrens 2708789Sahrens cvd = pvd->vdev_child[c]; 2709789Sahrens if (cvd == vd) 2710789Sahrens continue; 2711789Sahrens if (vdev_is_dead(cvd)) 2712789Sahrens continue; 2713789Sahrens mutex_enter(&cvd->vdev_dtl_lock); 2714789Sahrens dirty = cvd->vdev_dtl_map.sm_space | 2715789Sahrens cvd->vdev_dtl_scrub.sm_space; 2716789Sahrens mutex_exit(&cvd->vdev_dtl_lock); 2717789Sahrens if (!dirty) 2718789Sahrens break; 2719789Sahrens } 27202082Seschrock 27212082Seschrock /* 27222082Seschrock * If we are a replacing or spare vdev, then we can always detach the 27232082Seschrock * latter child, as that is how one cancels the operation. 27242082Seschrock */ 27252082Seschrock if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && 27262082Seschrock c == pvd->vdev_children) 2727789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 2728789Sahrens 2729789Sahrens /* 27302082Seschrock * If we are detaching the original disk from a spare, then it implies 27312082Seschrock * that the spare should become a real disk, and be removed from the 27322082Seschrock * active spare list for the pool. 27332082Seschrock */ 27342082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 27352082Seschrock vd->vdev_id == 0) 27362082Seschrock unspare = B_TRUE; 27372082Seschrock 27382082Seschrock /* 2739789Sahrens * Erase the disk labels so the disk can be used for other things. 2740789Sahrens * This must be done after all other error cases are handled, 2741789Sahrens * but before we disembowel vd (so we can still do I/O to it). 2742789Sahrens * But if we can't do it, don't treat the error as fatal -- 2743789Sahrens * it may be that the unwritability of the disk is the reason 2744789Sahrens * it's being detached! 2745789Sahrens */ 27463377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 2747789Sahrens 2748789Sahrens /* 2749789Sahrens * Remove vd from its parent and compact the parent's children. 2750789Sahrens */ 2751789Sahrens vdev_remove_child(pvd, vd); 2752789Sahrens vdev_compact_children(pvd); 2753789Sahrens 2754789Sahrens /* 2755789Sahrens * Remember one of the remaining children so we can get tvd below. 2756789Sahrens */ 2757789Sahrens cvd = pvd->vdev_child[0]; 2758789Sahrens 2759789Sahrens /* 27602082Seschrock * If we need to remove the remaining child from the list of hot spares, 27612082Seschrock * do it now, marking the vdev as no longer a spare in the process. We 27622082Seschrock * must do this before vdev_remove_parent(), because that can change the 27632082Seschrock * GUID if it creates a new toplevel GUID. 27642082Seschrock */ 27652082Seschrock if (unspare) { 27662082Seschrock ASSERT(cvd->vdev_isspare); 27673377Seschrock spa_spare_remove(cvd); 27682082Seschrock unspare_guid = cvd->vdev_guid; 27692082Seschrock } 27702082Seschrock 27712082Seschrock /* 2772789Sahrens * If the parent mirror/replacing vdev only has one child, 2773789Sahrens * the parent is no longer needed. Remove it from the tree. 2774789Sahrens */ 2775789Sahrens if (pvd->vdev_children == 1) 2776789Sahrens vdev_remove_parent(cvd); 2777789Sahrens 2778789Sahrens /* 2779789Sahrens * We don't set tvd until now because the parent we just removed 2780789Sahrens * may have been the previous top-level vdev. 2781789Sahrens */ 2782789Sahrens tvd = cvd->vdev_top; 2783789Sahrens ASSERT(tvd->vdev_parent == rvd); 2784789Sahrens 2785789Sahrens /* 27863377Seschrock * Reevaluate the parent vdev state. 2787789Sahrens */ 27884451Seschrock vdev_propagate_state(cvd); 2789789Sahrens 2790789Sahrens /* 27913377Seschrock * If the device we just detached was smaller than the others, it may be 27923377Seschrock * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 27933377Seschrock * can't fail because the existing metaslabs are already in core, so 27943377Seschrock * there's nothing to read from disk. 2795789Sahrens */ 27961732Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 2797789Sahrens 2798789Sahrens vdev_config_dirty(tvd); 2799789Sahrens 2800789Sahrens /* 28013377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 28023377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 28033377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 28043377Seschrock * prevent vd from being accessed after it's freed. 2805789Sahrens */ 2806789Sahrens for (t = 0; t < TXG_SIZE; t++) 2807789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 28081732Sbonwick vd->vdev_detached = B_TRUE; 28091732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 2810789Sahrens 28114451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 28124451Seschrock 28132082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 28142082Seschrock 28152082Seschrock /* 28163377Seschrock * If this was the removal of the original device in a hot spare vdev, 28173377Seschrock * then we want to go through and remove the device from the hot spare 28183377Seschrock * list of every other pool. 28192082Seschrock */ 28202082Seschrock if (unspare) { 28212082Seschrock spa = NULL; 28222082Seschrock mutex_enter(&spa_namespace_lock); 28232082Seschrock while ((spa = spa_next(spa)) != NULL) { 28242082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 28252082Seschrock continue; 28262082Seschrock 28272082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 28282082Seschrock } 28292082Seschrock mutex_exit(&spa_namespace_lock); 28302082Seschrock } 28312082Seschrock 28322082Seschrock return (error); 28332082Seschrock } 28342082Seschrock 28352082Seschrock /* 28365450Sbrendan * Remove a spares vdev from the nvlist config. 28372082Seschrock */ 28385450Sbrendan static int 28395450Sbrendan spa_remove_spares(spa_aux_vdev_t *sav, uint64_t guid, boolean_t unspare, 28405450Sbrendan nvlist_t **spares, int nspares, vdev_t *vd) 28412082Seschrock { 28425450Sbrendan nvlist_t *nv, **newspares; 28435450Sbrendan int i, j; 28442082Seschrock 28452082Seschrock nv = NULL; 28465450Sbrendan for (i = 0; i < nspares; i++) { 28475450Sbrendan uint64_t theguid; 28485450Sbrendan 28495450Sbrendan VERIFY(nvlist_lookup_uint64(spares[i], 28505450Sbrendan ZPOOL_CONFIG_GUID, &theguid) == 0); 28515450Sbrendan if (theguid == guid) { 28525450Sbrendan nv = spares[i]; 28535450Sbrendan break; 28542082Seschrock } 28552082Seschrock } 28562082Seschrock 28572082Seschrock /* 28585450Sbrendan * Only remove the hot spare if it's not currently in use in this pool. 28592082Seschrock */ 28605450Sbrendan if (nv == NULL && vd == NULL) 28615450Sbrendan return (ENOENT); 28625450Sbrendan 28635450Sbrendan if (nv == NULL && vd != NULL) 28645450Sbrendan return (ENOTSUP); 28655450Sbrendan 28665450Sbrendan if (!unspare && nv != NULL && vd != NULL) 28675450Sbrendan return (EBUSY); 28682082Seschrock 28692082Seschrock if (nspares == 1) { 28702082Seschrock newspares = NULL; 28712082Seschrock } else { 28722082Seschrock newspares = kmem_alloc((nspares - 1) * sizeof (void *), 28732082Seschrock KM_SLEEP); 28742082Seschrock for (i = 0, j = 0; i < nspares; i++) { 28752082Seschrock if (spares[i] != nv) 28762082Seschrock VERIFY(nvlist_dup(spares[i], 28772082Seschrock &newspares[j++], KM_SLEEP) == 0); 28782082Seschrock } 28792082Seschrock } 28802082Seschrock 28815450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_SPARES, 28822082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 28835450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 28845450Sbrendan ZPOOL_CONFIG_SPARES, newspares, nspares - 1) == 0); 28852082Seschrock for (i = 0; i < nspares - 1; i++) 28862082Seschrock nvlist_free(newspares[i]); 28872082Seschrock kmem_free(newspares, (nspares - 1) * sizeof (void *)); 28885450Sbrendan 28895450Sbrendan return (0); 28905450Sbrendan } 28915450Sbrendan 28925450Sbrendan /* 28935450Sbrendan * Remove an l2cache vdev from the nvlist config. 28945450Sbrendan */ 28955450Sbrendan static int 28965450Sbrendan spa_remove_l2cache(spa_aux_vdev_t *sav, uint64_t guid, nvlist_t **l2cache, 28975450Sbrendan int nl2cache, vdev_t *vd) 28985450Sbrendan { 28995450Sbrendan nvlist_t *nv, **newl2cache; 29005450Sbrendan int i, j; 29015450Sbrendan 29025450Sbrendan nv = NULL; 29035450Sbrendan for (i = 0; i < nl2cache; i++) { 29045450Sbrendan uint64_t theguid; 29055450Sbrendan 29065450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 29075450Sbrendan ZPOOL_CONFIG_GUID, &theguid) == 0); 29085450Sbrendan if (theguid == guid) { 29095450Sbrendan nv = l2cache[i]; 29105450Sbrendan break; 29115450Sbrendan } 29125450Sbrendan } 29135450Sbrendan 29145450Sbrendan if (vd == NULL) { 29155450Sbrendan for (i = 0; i < nl2cache; i++) { 29165450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) { 29175450Sbrendan vd = sav->sav_vdevs[i]; 29185450Sbrendan break; 29195450Sbrendan } 29205450Sbrendan } 29215450Sbrendan } 29225450Sbrendan 29235450Sbrendan if (nv == NULL && vd == NULL) 29245450Sbrendan return (ENOENT); 29255450Sbrendan 29265450Sbrendan if (nv == NULL && vd != NULL) 29275450Sbrendan return (ENOTSUP); 29285450Sbrendan 29295450Sbrendan if (nl2cache == 1) { 29305450Sbrendan newl2cache = NULL; 29315450Sbrendan } else { 29325450Sbrendan newl2cache = kmem_alloc((nl2cache - 1) * sizeof (void *), 29335450Sbrendan KM_SLEEP); 29345450Sbrendan for (i = 0, j = 0; i < nl2cache; i++) { 29355450Sbrendan if (l2cache[i] != nv) 29365450Sbrendan VERIFY(nvlist_dup(l2cache[i], 29375450Sbrendan &newl2cache[j++], KM_SLEEP) == 0); 29385450Sbrendan } 29395450Sbrendan } 29405450Sbrendan 29415450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 29425450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 29435450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 29445450Sbrendan ZPOOL_CONFIG_L2CACHE, newl2cache, nl2cache - 1) == 0); 29455450Sbrendan for (i = 0; i < nl2cache - 1; i++) 29465450Sbrendan nvlist_free(newl2cache[i]); 29475450Sbrendan kmem_free(newl2cache, (nl2cache - 1) * sizeof (void *)); 29485450Sbrendan 29495450Sbrendan return (0); 29505450Sbrendan } 29515450Sbrendan 29525450Sbrendan /* 29535450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 29545450Sbrendan * spares and level 2 ARC devices. 29555450Sbrendan */ 29565450Sbrendan int 29575450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 29585450Sbrendan { 29595450Sbrendan vdev_t *vd; 29605450Sbrendan nvlist_t **spares, **l2cache; 29615450Sbrendan uint_t nspares, nl2cache; 29625450Sbrendan int error = 0; 29635450Sbrendan 29645450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 29655450Sbrendan 29665450Sbrendan vd = spa_lookup_by_guid(spa, guid); 29675450Sbrendan 29685450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 29695450Sbrendan spa_spare_exists(guid, NULL) && 29705450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 29715450Sbrendan ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { 29725450Sbrendan if ((error = spa_remove_spares(&spa->spa_spares, guid, unspare, 29735450Sbrendan spares, nspares, vd)) != 0) 29745450Sbrendan goto out; 29755450Sbrendan spa_load_spares(spa); 29765450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 29775450Sbrendan goto out; 29785450Sbrendan } 29795450Sbrendan 29805450Sbrendan if (spa->spa_l2cache.sav_vdevs != NULL && 29815450Sbrendan spa_l2cache_exists(guid, NULL) && 29825450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 29835450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { 29845450Sbrendan if ((error = spa_remove_l2cache(&spa->spa_l2cache, guid, 29855450Sbrendan l2cache, nl2cache, vd)) != 0) 29865450Sbrendan goto out; 29875450Sbrendan spa_load_l2cache(spa); 29885450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 29895450Sbrendan } 29902082Seschrock 29912082Seschrock out: 29922082Seschrock spa_config_exit(spa, FTAG); 29935450Sbrendan return (error); 2994789Sahrens } 2995789Sahrens 2996789Sahrens /* 29974451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 29984451Seschrock * current spared, so we can detach it. 2999789Sahrens */ 30001544Seschrock static vdev_t * 30014451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3002789Sahrens { 30031544Seschrock vdev_t *newvd, *oldvd; 3004789Sahrens int c; 3005789Sahrens 30061544Seschrock for (c = 0; c < vd->vdev_children; c++) { 30074451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 30081544Seschrock if (oldvd != NULL) 30091544Seschrock return (oldvd); 30101544Seschrock } 3011789Sahrens 30124451Seschrock /* 30134451Seschrock * Check for a completed replacement. 30144451Seschrock */ 3015789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 30161544Seschrock oldvd = vd->vdev_child[0]; 30171544Seschrock newvd = vd->vdev_child[1]; 3018789Sahrens 30191544Seschrock mutex_enter(&newvd->vdev_dtl_lock); 30201544Seschrock if (newvd->vdev_dtl_map.sm_space == 0 && 30211544Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 30221544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30231544Seschrock return (oldvd); 30241544Seschrock } 30251544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30261544Seschrock } 3027789Sahrens 30284451Seschrock /* 30294451Seschrock * Check for a completed resilver with the 'unspare' flag set. 30304451Seschrock */ 30314451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 30324451Seschrock newvd = vd->vdev_child[0]; 30334451Seschrock oldvd = vd->vdev_child[1]; 30344451Seschrock 30354451Seschrock mutex_enter(&newvd->vdev_dtl_lock); 30364451Seschrock if (newvd->vdev_unspare && 30374451Seschrock newvd->vdev_dtl_map.sm_space == 0 && 30384451Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 30394451Seschrock newvd->vdev_unspare = 0; 30404451Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30414451Seschrock return (oldvd); 30424451Seschrock } 30434451Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30444451Seschrock } 30454451Seschrock 30461544Seschrock return (NULL); 3047789Sahrens } 3048789Sahrens 30491544Seschrock static void 30504451Seschrock spa_vdev_resilver_done(spa_t *spa) 3051789Sahrens { 30521544Seschrock vdev_t *vd; 30532082Seschrock vdev_t *pvd; 30541544Seschrock uint64_t guid; 30552082Seschrock uint64_t pguid = 0; 3056789Sahrens 30571544Seschrock spa_config_enter(spa, RW_READER, FTAG); 3058789Sahrens 30594451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 30601544Seschrock guid = vd->vdev_guid; 30612082Seschrock /* 30622082Seschrock * If we have just finished replacing a hot spared device, then 30632082Seschrock * we need to detach the parent's first child (the original hot 30642082Seschrock * spare) as well. 30652082Seschrock */ 30662082Seschrock pvd = vd->vdev_parent; 30672082Seschrock if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && 30682082Seschrock pvd->vdev_id == 0) { 30692082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 30702082Seschrock ASSERT(pvd->vdev_parent->vdev_children == 2); 30712082Seschrock pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; 30722082Seschrock } 30731544Seschrock spa_config_exit(spa, FTAG); 30741544Seschrock if (spa_vdev_detach(spa, guid, B_TRUE) != 0) 30751544Seschrock return; 30762082Seschrock if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) 30772082Seschrock return; 30781544Seschrock spa_config_enter(spa, RW_READER, FTAG); 3079789Sahrens } 3080789Sahrens 30811544Seschrock spa_config_exit(spa, FTAG); 3082789Sahrens } 3083789Sahrens 3084789Sahrens /* 30851354Seschrock * Update the stored path for this vdev. Dirty the vdev configuration, relying 30861354Seschrock * on spa_vdev_enter/exit() to synchronize the labels and cache. 30871354Seschrock */ 30881354Seschrock int 30891354Seschrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 30901354Seschrock { 30911354Seschrock vdev_t *rvd, *vd; 30921354Seschrock uint64_t txg; 30931354Seschrock 30941354Seschrock rvd = spa->spa_root_vdev; 30951354Seschrock 30961354Seschrock txg = spa_vdev_enter(spa); 30971354Seschrock 30982082Seschrock if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 30992082Seschrock /* 31005450Sbrendan * Determine if this is a reference to a hot spare or l2cache 31015450Sbrendan * device. If it is, update the path as stored in their 31025450Sbrendan * device list. 31032082Seschrock */ 31045450Sbrendan nvlist_t **spares, **l2cache; 31055450Sbrendan uint_t i, nspares, nl2cache; 31065450Sbrendan 31075450Sbrendan if (spa->spa_spares.sav_config != NULL) { 31085450Sbrendan VERIFY(nvlist_lookup_nvlist_array( 31095450Sbrendan spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 31105450Sbrendan &spares, &nspares) == 0); 31112082Seschrock for (i = 0; i < nspares; i++) { 31122082Seschrock uint64_t theguid; 31132082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 31142082Seschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 31155450Sbrendan if (theguid == guid) { 31165450Sbrendan VERIFY(nvlist_add_string(spares[i], 31175450Sbrendan ZPOOL_CONFIG_PATH, newpath) == 0); 31185450Sbrendan spa_load_spares(spa); 31195450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 31205450Sbrendan return (spa_vdev_exit(spa, NULL, txg, 31215450Sbrendan 0)); 31225450Sbrendan } 31232082Seschrock } 31242082Seschrock } 31255450Sbrendan 31265450Sbrendan if (spa->spa_l2cache.sav_config != NULL) { 31275450Sbrendan VERIFY(nvlist_lookup_nvlist_array( 31285450Sbrendan spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, 31295450Sbrendan &l2cache, &nl2cache) == 0); 31305450Sbrendan for (i = 0; i < nl2cache; i++) { 31315450Sbrendan uint64_t theguid; 31325450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 31335450Sbrendan ZPOOL_CONFIG_GUID, &theguid) == 0); 31345450Sbrendan if (theguid == guid) { 31355450Sbrendan VERIFY(nvlist_add_string(l2cache[i], 31365450Sbrendan ZPOOL_CONFIG_PATH, newpath) == 0); 31375450Sbrendan spa_load_l2cache(spa); 31385450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 31395450Sbrendan return (spa_vdev_exit(spa, NULL, txg, 31405450Sbrendan 0)); 31415450Sbrendan } 31425450Sbrendan } 31435450Sbrendan } 31445450Sbrendan 31455450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 31462082Seschrock } 31471354Seschrock 31481585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 31491585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 31501585Sbonwick 31511354Seschrock spa_strfree(vd->vdev_path); 31521354Seschrock vd->vdev_path = spa_strdup(newpath); 31531354Seschrock 31541354Seschrock vdev_config_dirty(vd->vdev_top); 31551354Seschrock 31561354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 31571354Seschrock } 31581354Seschrock 31591354Seschrock /* 3160789Sahrens * ========================================================================== 3161789Sahrens * SPA Scrubbing 3162789Sahrens * ========================================================================== 3163789Sahrens */ 3164789Sahrens 3165789Sahrens static void 3166789Sahrens spa_scrub_io_done(zio_t *zio) 3167789Sahrens { 3168789Sahrens spa_t *spa = zio->io_spa; 3169789Sahrens 31704309Smaybee arc_data_buf_free(zio->io_data, zio->io_size); 3171789Sahrens 3172789Sahrens mutex_enter(&spa->spa_scrub_lock); 31731544Seschrock if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 31741775Sbillm vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; 3175789Sahrens spa->spa_scrub_errors++; 3176789Sahrens mutex_enter(&vd->vdev_stat_lock); 3177789Sahrens vd->vdev_stat.vs_scrub_errors++; 3178789Sahrens mutex_exit(&vd->vdev_stat_lock); 3179789Sahrens } 31803697Smishra 31813697Smishra if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight) 31821544Seschrock cv_broadcast(&spa->spa_scrub_io_cv); 31833697Smishra 31843697Smishra ASSERT(spa->spa_scrub_inflight >= 0); 31853697Smishra 31861544Seschrock mutex_exit(&spa->spa_scrub_lock); 3187789Sahrens } 3188789Sahrens 3189789Sahrens static void 31901544Seschrock spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, 31911544Seschrock zbookmark_t *zb) 3192789Sahrens { 3193789Sahrens size_t size = BP_GET_LSIZE(bp); 31943697Smishra void *data; 3195789Sahrens 3196789Sahrens mutex_enter(&spa->spa_scrub_lock); 31973697Smishra /* 31983697Smishra * Do not give too much work to vdev(s). 31993697Smishra */ 32003697Smishra while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) { 32013697Smishra cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 32023697Smishra } 3203789Sahrens spa->spa_scrub_inflight++; 3204789Sahrens mutex_exit(&spa->spa_scrub_lock); 3205789Sahrens 32064309Smaybee data = arc_data_buf_alloc(size); 32073697Smishra 32081544Seschrock if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) 32091544Seschrock flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ 32101544Seschrock 32111807Sbonwick flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; 32121544Seschrock 3213789Sahrens zio_nowait(zio_read(NULL, spa, bp, data, size, 32141544Seschrock spa_scrub_io_done, NULL, priority, flags, zb)); 3215789Sahrens } 3216789Sahrens 3217789Sahrens /* ARGSUSED */ 3218789Sahrens static int 3219789Sahrens spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 3220789Sahrens { 3221789Sahrens blkptr_t *bp = &bc->bc_blkptr; 32221775Sbillm vdev_t *vd = spa->spa_root_vdev; 32231775Sbillm dva_t *dva = bp->blk_dva; 32241775Sbillm int needs_resilver = B_FALSE; 32251775Sbillm int d; 3226789Sahrens 32271775Sbillm if (bc->bc_errno) { 3228789Sahrens /* 3229789Sahrens * We can't scrub this block, but we can continue to scrub 3230789Sahrens * the rest of the pool. Note the error and move along. 3231789Sahrens */ 3232789Sahrens mutex_enter(&spa->spa_scrub_lock); 3233789Sahrens spa->spa_scrub_errors++; 3234789Sahrens mutex_exit(&spa->spa_scrub_lock); 3235789Sahrens 32361775Sbillm mutex_enter(&vd->vdev_stat_lock); 32371775Sbillm vd->vdev_stat.vs_scrub_errors++; 32381775Sbillm mutex_exit(&vd->vdev_stat_lock); 3239789Sahrens 3240789Sahrens return (ERESTART); 3241789Sahrens } 3242789Sahrens 3243789Sahrens ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); 3244789Sahrens 32451775Sbillm for (d = 0; d < BP_GET_NDVAS(bp); d++) { 32461775Sbillm vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); 32471775Sbillm 32481775Sbillm ASSERT(vd != NULL); 32491775Sbillm 32501775Sbillm /* 32511775Sbillm * Keep track of how much data we've examined so that 32521775Sbillm * zpool(1M) status can make useful progress reports. 32531775Sbillm */ 32541775Sbillm mutex_enter(&vd->vdev_stat_lock); 32551775Sbillm vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); 32561775Sbillm mutex_exit(&vd->vdev_stat_lock); 3257789Sahrens 32581775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { 32591775Sbillm if (DVA_GET_GANG(&dva[d])) { 32601775Sbillm /* 32611775Sbillm * Gang members may be spread across multiple 32621775Sbillm * vdevs, so the best we can do is look at the 32631775Sbillm * pool-wide DTL. 32641775Sbillm * XXX -- it would be better to change our 32651775Sbillm * allocation policy to ensure that this can't 32661775Sbillm * happen. 32671775Sbillm */ 32681775Sbillm vd = spa->spa_root_vdev; 32691775Sbillm } 32701775Sbillm if (vdev_dtl_contains(&vd->vdev_dtl_map, 32711775Sbillm bp->blk_birth, 1)) 32721775Sbillm needs_resilver = B_TRUE; 3273789Sahrens } 32741775Sbillm } 32751775Sbillm 32761775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) 3277789Sahrens spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, 32781544Seschrock ZIO_FLAG_SCRUB, &bc->bc_bookmark); 32791775Sbillm else if (needs_resilver) 32801775Sbillm spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, 32811775Sbillm ZIO_FLAG_RESILVER, &bc->bc_bookmark); 3282789Sahrens 3283789Sahrens return (0); 3284789Sahrens } 3285789Sahrens 3286789Sahrens static void 3287789Sahrens spa_scrub_thread(spa_t *spa) 3288789Sahrens { 3289789Sahrens callb_cpr_t cprinfo; 3290789Sahrens traverse_handle_t *th = spa->spa_scrub_th; 3291789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3292789Sahrens pool_scrub_type_t scrub_type = spa->spa_scrub_type; 3293789Sahrens int error = 0; 3294789Sahrens boolean_t complete; 3295789Sahrens 3296789Sahrens CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); 3297789Sahrens 3298797Sbonwick /* 3299797Sbonwick * If we're restarting due to a snapshot create/delete, 3300797Sbonwick * wait for that to complete. 3301797Sbonwick */ 3302797Sbonwick txg_wait_synced(spa_get_dsl(spa), 0); 3303797Sbonwick 33041544Seschrock dprintf("start %s mintxg=%llu maxtxg=%llu\n", 33051544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 33061544Seschrock spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); 33071544Seschrock 33081544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 33091544Seschrock vdev_reopen(rvd); /* purge all vdev caches */ 3310789Sahrens vdev_config_dirty(rvd); /* rewrite all disk labels */ 3311789Sahrens vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); 33121544Seschrock spa_config_exit(spa, FTAG); 3313789Sahrens 3314789Sahrens mutex_enter(&spa->spa_scrub_lock); 3315789Sahrens spa->spa_scrub_errors = 0; 3316789Sahrens spa->spa_scrub_active = 1; 33171544Seschrock ASSERT(spa->spa_scrub_inflight == 0); 3318789Sahrens 3319789Sahrens while (!spa->spa_scrub_stop) { 3320789Sahrens CALLB_CPR_SAFE_BEGIN(&cprinfo); 33211544Seschrock while (spa->spa_scrub_suspended) { 3322789Sahrens spa->spa_scrub_active = 0; 3323789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3324789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 3325789Sahrens spa->spa_scrub_active = 1; 3326789Sahrens } 3327789Sahrens CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); 3328789Sahrens 3329789Sahrens if (spa->spa_scrub_restart_txg != 0) 3330789Sahrens break; 3331789Sahrens 3332789Sahrens mutex_exit(&spa->spa_scrub_lock); 3333789Sahrens error = traverse_more(th); 3334789Sahrens mutex_enter(&spa->spa_scrub_lock); 3335789Sahrens if (error != EAGAIN) 3336789Sahrens break; 3337789Sahrens } 3338789Sahrens 3339789Sahrens while (spa->spa_scrub_inflight) 3340789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 3341789Sahrens 33421601Sbonwick spa->spa_scrub_active = 0; 33431601Sbonwick cv_broadcast(&spa->spa_scrub_cv); 33441601Sbonwick 33451601Sbonwick mutex_exit(&spa->spa_scrub_lock); 33461601Sbonwick 33471601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 33481601Sbonwick 33491601Sbonwick mutex_enter(&spa->spa_scrub_lock); 33501601Sbonwick 33511601Sbonwick /* 33521601Sbonwick * Note: we check spa_scrub_restart_txg under both spa_scrub_lock 33531601Sbonwick * AND the spa config lock to synchronize with any config changes 33541601Sbonwick * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). 33551601Sbonwick */ 3356789Sahrens if (spa->spa_scrub_restart_txg != 0) 3357789Sahrens error = ERESTART; 3358789Sahrens 33591544Seschrock if (spa->spa_scrub_stop) 33601544Seschrock error = EINTR; 33611544Seschrock 3362789Sahrens /* 33631544Seschrock * Even if there were uncorrectable errors, we consider the scrub 33641544Seschrock * completed. The downside is that if there is a transient error during 33651544Seschrock * a resilver, we won't resilver the data properly to the target. But 33661544Seschrock * if the damage is permanent (more likely) we will resilver forever, 33671544Seschrock * which isn't really acceptable. Since there is enough information for 33681544Seschrock * the user to know what has failed and why, this seems like a more 33691544Seschrock * tractable approach. 3370789Sahrens */ 33711544Seschrock complete = (error == 0); 3372789Sahrens 33731544Seschrock dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", 33741544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 3375789Sahrens spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", 3376789Sahrens error, spa->spa_scrub_errors, spa->spa_scrub_stop); 3377789Sahrens 3378789Sahrens mutex_exit(&spa->spa_scrub_lock); 3379789Sahrens 3380789Sahrens /* 3381789Sahrens * If the scrub/resilver completed, update all DTLs to reflect this. 3382789Sahrens * Whether it succeeded or not, vacate all temporary scrub DTLs. 3383789Sahrens */ 3384789Sahrens vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, 3385789Sahrens complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); 3386789Sahrens vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); 33871544Seschrock spa_errlog_rotate(spa); 33881601Sbonwick 33894451Seschrock if (scrub_type == POOL_SCRUB_RESILVER && complete) 33904451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH); 33914451Seschrock 33921544Seschrock spa_config_exit(spa, FTAG); 3393789Sahrens 3394789Sahrens mutex_enter(&spa->spa_scrub_lock); 3395789Sahrens 33961544Seschrock /* 33971544Seschrock * We may have finished replacing a device. 33981544Seschrock * Let the async thread assess this and handle the detach. 33991544Seschrock */ 34004451Seschrock spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 3401789Sahrens 3402789Sahrens /* 3403789Sahrens * If we were told to restart, our final act is to start a new scrub. 3404789Sahrens */ 3405789Sahrens if (error == ERESTART) 34061544Seschrock spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? 34071544Seschrock SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); 3408789Sahrens 34091544Seschrock spa->spa_scrub_type = POOL_SCRUB_NONE; 34101544Seschrock spa->spa_scrub_active = 0; 34111544Seschrock spa->spa_scrub_thread = NULL; 34121544Seschrock cv_broadcast(&spa->spa_scrub_cv); 3413789Sahrens CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ 3414789Sahrens thread_exit(); 3415789Sahrens } 3416789Sahrens 3417789Sahrens void 3418789Sahrens spa_scrub_suspend(spa_t *spa) 3419789Sahrens { 3420789Sahrens mutex_enter(&spa->spa_scrub_lock); 34211544Seschrock spa->spa_scrub_suspended++; 3422789Sahrens while (spa->spa_scrub_active) { 3423789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3424789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 3425789Sahrens } 3426789Sahrens while (spa->spa_scrub_inflight) 3427789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 3428789Sahrens mutex_exit(&spa->spa_scrub_lock); 3429789Sahrens } 3430789Sahrens 3431789Sahrens void 3432789Sahrens spa_scrub_resume(spa_t *spa) 3433789Sahrens { 3434789Sahrens mutex_enter(&spa->spa_scrub_lock); 34351544Seschrock ASSERT(spa->spa_scrub_suspended != 0); 34361544Seschrock if (--spa->spa_scrub_suspended == 0) 3437789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3438789Sahrens mutex_exit(&spa->spa_scrub_lock); 3439789Sahrens } 3440789Sahrens 3441789Sahrens void 3442789Sahrens spa_scrub_restart(spa_t *spa, uint64_t txg) 3443789Sahrens { 3444789Sahrens /* 3445789Sahrens * Something happened (e.g. snapshot create/delete) that means 3446789Sahrens * we must restart any in-progress scrubs. The itinerary will 3447789Sahrens * fix this properly. 3448789Sahrens */ 3449789Sahrens mutex_enter(&spa->spa_scrub_lock); 3450789Sahrens spa->spa_scrub_restart_txg = txg; 3451789Sahrens mutex_exit(&spa->spa_scrub_lock); 3452789Sahrens } 3453789Sahrens 34541544Seschrock int 34551544Seschrock spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) 3456789Sahrens { 3457789Sahrens space_seg_t *ss; 3458789Sahrens uint64_t mintxg, maxtxg; 3459789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3460789Sahrens 34614808Sek110237 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 34624808Sek110237 ASSERT(!spa_config_held(spa, RW_WRITER)); 34634808Sek110237 3464789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3465789Sahrens return (ENOTSUP); 3466789Sahrens 34671544Seschrock mutex_enter(&spa->spa_scrub_lock); 34681544Seschrock 3469789Sahrens /* 3470789Sahrens * If there's a scrub or resilver already in progress, stop it. 3471789Sahrens */ 3472789Sahrens while (spa->spa_scrub_thread != NULL) { 3473789Sahrens /* 3474789Sahrens * Don't stop a resilver unless forced. 3475789Sahrens */ 34761544Seschrock if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { 34771544Seschrock mutex_exit(&spa->spa_scrub_lock); 3478789Sahrens return (EBUSY); 34791544Seschrock } 3480789Sahrens spa->spa_scrub_stop = 1; 3481789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3482789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 3483789Sahrens } 3484789Sahrens 3485789Sahrens /* 3486789Sahrens * Terminate the previous traverse. 3487789Sahrens */ 3488789Sahrens if (spa->spa_scrub_th != NULL) { 3489789Sahrens traverse_fini(spa->spa_scrub_th); 3490789Sahrens spa->spa_scrub_th = NULL; 3491789Sahrens } 3492789Sahrens 34931544Seschrock if (rvd == NULL) { 34941544Seschrock ASSERT(spa->spa_scrub_stop == 0); 34951544Seschrock ASSERT(spa->spa_scrub_type == type); 34961544Seschrock ASSERT(spa->spa_scrub_restart_txg == 0); 34971544Seschrock mutex_exit(&spa->spa_scrub_lock); 34981544Seschrock return (0); 34991544Seschrock } 3500789Sahrens 3501789Sahrens mintxg = TXG_INITIAL - 1; 3502789Sahrens maxtxg = spa_last_synced_txg(spa) + 1; 3503789Sahrens 35041544Seschrock mutex_enter(&rvd->vdev_dtl_lock); 3505789Sahrens 35061544Seschrock if (rvd->vdev_dtl_map.sm_space == 0) { 35071544Seschrock /* 35081544Seschrock * The pool-wide DTL is empty. 35091732Sbonwick * If this is a resilver, there's nothing to do except 35101732Sbonwick * check whether any in-progress replacements have completed. 35111544Seschrock */ 35121732Sbonwick if (type == POOL_SCRUB_RESILVER) { 35131544Seschrock type = POOL_SCRUB_NONE; 35144451Seschrock spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 35151732Sbonwick } 35161544Seschrock } else { 35171544Seschrock /* 35181544Seschrock * The pool-wide DTL is non-empty. 35191544Seschrock * If this is a normal scrub, upgrade to a resilver instead. 35201544Seschrock */ 35211544Seschrock if (type == POOL_SCRUB_EVERYTHING) 35221544Seschrock type = POOL_SCRUB_RESILVER; 35231544Seschrock } 3524789Sahrens 35251544Seschrock if (type == POOL_SCRUB_RESILVER) { 3526789Sahrens /* 3527789Sahrens * Determine the resilvering boundaries. 3528789Sahrens * 3529789Sahrens * Note: (mintxg, maxtxg) is an open interval, 3530789Sahrens * i.e. mintxg and maxtxg themselves are not included. 3531789Sahrens * 3532789Sahrens * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 3533789Sahrens * so we don't claim to resilver a txg that's still changing. 3534789Sahrens */ 3535789Sahrens ss = avl_first(&rvd->vdev_dtl_map.sm_root); 35361544Seschrock mintxg = ss->ss_start - 1; 3537789Sahrens ss = avl_last(&rvd->vdev_dtl_map.sm_root); 35381544Seschrock maxtxg = MIN(ss->ss_end, maxtxg); 35394451Seschrock 35404451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); 3541789Sahrens } 3542789Sahrens 35431544Seschrock mutex_exit(&rvd->vdev_dtl_lock); 35441544Seschrock 35451544Seschrock spa->spa_scrub_stop = 0; 35461544Seschrock spa->spa_scrub_type = type; 35471544Seschrock spa->spa_scrub_restart_txg = 0; 35481544Seschrock 35491544Seschrock if (type != POOL_SCRUB_NONE) { 35501544Seschrock spa->spa_scrub_mintxg = mintxg; 3551789Sahrens spa->spa_scrub_maxtxg = maxtxg; 3552789Sahrens spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, 35531635Sbonwick ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, 35541635Sbonwick ZIO_FLAG_CANFAIL); 3555789Sahrens traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); 3556789Sahrens spa->spa_scrub_thread = thread_create(NULL, 0, 3557789Sahrens spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); 3558789Sahrens } 3559789Sahrens 35601544Seschrock mutex_exit(&spa->spa_scrub_lock); 35611544Seschrock 3562789Sahrens return (0); 3563789Sahrens } 3564789Sahrens 35651544Seschrock /* 35661544Seschrock * ========================================================================== 35671544Seschrock * SPA async task processing 35681544Seschrock * ========================================================================== 35691544Seschrock */ 35701544Seschrock 35711544Seschrock static void 35724451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3573789Sahrens { 35741544Seschrock vdev_t *tvd; 35751544Seschrock int c; 35761544Seschrock 35774451Seschrock for (c = 0; c < vd->vdev_children; c++) { 35784451Seschrock tvd = vd->vdev_child[c]; 35794451Seschrock if (tvd->vdev_remove_wanted) { 35804451Seschrock tvd->vdev_remove_wanted = 0; 35814451Seschrock vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, 35824451Seschrock VDEV_AUX_NONE); 35835329Sgw25295 vdev_clear(spa, tvd, B_TRUE); 35844451Seschrock vdev_config_dirty(tvd->vdev_top); 35851544Seschrock } 35864451Seschrock spa_async_remove(spa, tvd); 35871544Seschrock } 35881544Seschrock } 35891544Seschrock 35901544Seschrock static void 35911544Seschrock spa_async_thread(spa_t *spa) 35921544Seschrock { 35931544Seschrock int tasks; 35944451Seschrock uint64_t txg; 35951544Seschrock 35961544Seschrock ASSERT(spa->spa_sync_on); 3597789Sahrens 35981544Seschrock mutex_enter(&spa->spa_async_lock); 35991544Seschrock tasks = spa->spa_async_tasks; 36001544Seschrock spa->spa_async_tasks = 0; 36011544Seschrock mutex_exit(&spa->spa_async_lock); 36021544Seschrock 36031544Seschrock /* 36041635Sbonwick * See if the config needs to be updated. 36051635Sbonwick */ 36061635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 36071635Sbonwick mutex_enter(&spa_namespace_lock); 36081635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 36091635Sbonwick mutex_exit(&spa_namespace_lock); 36101635Sbonwick } 36111635Sbonwick 36121635Sbonwick /* 36134451Seschrock * See if any devices need to be marked REMOVED. 36145329Sgw25295 * 36155329Sgw25295 * XXX - We avoid doing this when we are in 36165329Sgw25295 * I/O failure state since spa_vdev_enter() grabs 36175329Sgw25295 * the namespace lock and would not be able to obtain 36185329Sgw25295 * the writer config lock. 36191544Seschrock */ 36205329Sgw25295 if (tasks & SPA_ASYNC_REMOVE && 36215329Sgw25295 spa_state(spa) != POOL_STATE_IO_FAILURE) { 36224451Seschrock txg = spa_vdev_enter(spa); 36234451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 36244451Seschrock (void) spa_vdev_exit(spa, NULL, txg, 0); 36254451Seschrock } 36261544Seschrock 36271544Seschrock /* 36281544Seschrock * If any devices are done replacing, detach them. 36291544Seschrock */ 36304451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 36314451Seschrock spa_vdev_resilver_done(spa); 3632789Sahrens 36331544Seschrock /* 36344451Seschrock * Kick off a scrub. When starting a RESILVER scrub (or an EVERYTHING 36354451Seschrock * scrub which can become a resilver), we need to hold 36364451Seschrock * spa_namespace_lock() because the sysevent we post via 36374451Seschrock * spa_event_notify() needs to get the name of the pool. 36381544Seschrock */ 36394451Seschrock if (tasks & SPA_ASYNC_SCRUB) { 36404451Seschrock mutex_enter(&spa_namespace_lock); 36411544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); 36424451Seschrock mutex_exit(&spa_namespace_lock); 36434451Seschrock } 36441544Seschrock 36451544Seschrock /* 36461544Seschrock * Kick off a resilver. 36471544Seschrock */ 36484451Seschrock if (tasks & SPA_ASYNC_RESILVER) { 36494451Seschrock mutex_enter(&spa_namespace_lock); 36501544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 36514451Seschrock mutex_exit(&spa_namespace_lock); 36524451Seschrock } 36531544Seschrock 36541544Seschrock /* 36551544Seschrock * Let the world know that we're done. 36561544Seschrock */ 36571544Seschrock mutex_enter(&spa->spa_async_lock); 36581544Seschrock spa->spa_async_thread = NULL; 36591544Seschrock cv_broadcast(&spa->spa_async_cv); 36601544Seschrock mutex_exit(&spa->spa_async_lock); 36611544Seschrock thread_exit(); 36621544Seschrock } 36631544Seschrock 36641544Seschrock void 36651544Seschrock spa_async_suspend(spa_t *spa) 36661544Seschrock { 36671544Seschrock mutex_enter(&spa->spa_async_lock); 36681544Seschrock spa->spa_async_suspended++; 36691544Seschrock while (spa->spa_async_thread != NULL) 36701544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 36711544Seschrock mutex_exit(&spa->spa_async_lock); 36721544Seschrock } 36731544Seschrock 36741544Seschrock void 36751544Seschrock spa_async_resume(spa_t *spa) 36761544Seschrock { 36771544Seschrock mutex_enter(&spa->spa_async_lock); 36781544Seschrock ASSERT(spa->spa_async_suspended != 0); 36791544Seschrock spa->spa_async_suspended--; 36801544Seschrock mutex_exit(&spa->spa_async_lock); 36811544Seschrock } 36821544Seschrock 36831544Seschrock static void 36841544Seschrock spa_async_dispatch(spa_t *spa) 36851544Seschrock { 36861544Seschrock mutex_enter(&spa->spa_async_lock); 36871544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 36881635Sbonwick spa->spa_async_thread == NULL && 36891635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 36901544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 36911544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 36921544Seschrock mutex_exit(&spa->spa_async_lock); 36931544Seschrock } 36941544Seschrock 36951544Seschrock void 36961544Seschrock spa_async_request(spa_t *spa, int task) 36971544Seschrock { 36981544Seschrock mutex_enter(&spa->spa_async_lock); 36991544Seschrock spa->spa_async_tasks |= task; 37001544Seschrock mutex_exit(&spa->spa_async_lock); 3701789Sahrens } 3702789Sahrens 3703789Sahrens /* 3704789Sahrens * ========================================================================== 3705789Sahrens * SPA syncing routines 3706789Sahrens * ========================================================================== 3707789Sahrens */ 3708789Sahrens 3709789Sahrens static void 3710789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3711789Sahrens { 3712789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 3713789Sahrens dmu_tx_t *tx; 3714789Sahrens blkptr_t blk; 3715789Sahrens uint64_t itor = 0; 3716789Sahrens zio_t *zio; 3717789Sahrens int error; 3718789Sahrens uint8_t c = 1; 3719789Sahrens 3720789Sahrens zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); 3721789Sahrens 3722789Sahrens while (bplist_iterate(bpl, &itor, &blk) == 0) 3723789Sahrens zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); 3724789Sahrens 3725789Sahrens error = zio_wait(zio); 3726789Sahrens ASSERT3U(error, ==, 0); 3727789Sahrens 3728789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3729789Sahrens bplist_vacate(bpl, tx); 3730789Sahrens 3731789Sahrens /* 3732789Sahrens * Pre-dirty the first block so we sync to convergence faster. 3733789Sahrens * (Usually only the first block is needed.) 3734789Sahrens */ 3735789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3736789Sahrens dmu_tx_commit(tx); 3737789Sahrens } 3738789Sahrens 3739789Sahrens static void 37402082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 37412082Seschrock { 37422082Seschrock char *packed = NULL; 37432082Seschrock size_t nvsize = 0; 37442082Seschrock dmu_buf_t *db; 37452082Seschrock 37462082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 37472082Seschrock 37482082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 37492082Seschrock 37502082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 37512082Seschrock KM_SLEEP) == 0); 37522082Seschrock 37532082Seschrock dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); 37542082Seschrock 37552082Seschrock kmem_free(packed, nvsize); 37562082Seschrock 37572082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 37582082Seschrock dmu_buf_will_dirty(db, tx); 37592082Seschrock *(uint64_t *)db->db_data = nvsize; 37602082Seschrock dmu_buf_rele(db, FTAG); 37612082Seschrock } 37622082Seschrock 37632082Seschrock static void 37645450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 37655450Sbrendan const char *config, const char *entry) 37662082Seschrock { 37672082Seschrock nvlist_t *nvroot; 37685450Sbrendan nvlist_t **list; 37692082Seschrock int i; 37702082Seschrock 37715450Sbrendan if (!sav->sav_sync) 37722082Seschrock return; 37732082Seschrock 37742082Seschrock /* 37755450Sbrendan * Update the MOS nvlist describing the list of available devices. 37765450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 37774451Seschrock * valid and the vdevs are labeled appropriately. 37782082Seschrock */ 37795450Sbrendan if (sav->sav_object == 0) { 37805450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 37815450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 37825450Sbrendan sizeof (uint64_t), tx); 37832082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 37845450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 37855450Sbrendan &sav->sav_object, tx) == 0); 37862082Seschrock } 37872082Seschrock 37882082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 37895450Sbrendan if (sav->sav_count == 0) { 37905450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 37912082Seschrock } else { 37925450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 37935450Sbrendan for (i = 0; i < sav->sav_count; i++) 37945450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 37955450Sbrendan B_FALSE, B_FALSE, B_TRUE); 37965450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 37975450Sbrendan sav->sav_count) == 0); 37985450Sbrendan for (i = 0; i < sav->sav_count; i++) 37995450Sbrendan nvlist_free(list[i]); 38005450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 38012082Seschrock } 38022082Seschrock 38035450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 38042926Sek110237 nvlist_free(nvroot); 38052082Seschrock 38065450Sbrendan sav->sav_sync = B_FALSE; 38072082Seschrock } 38082082Seschrock 38092082Seschrock static void 3810789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 3811789Sahrens { 3812789Sahrens nvlist_t *config; 3813789Sahrens 3814789Sahrens if (list_is_empty(&spa->spa_dirty_list)) 3815789Sahrens return; 3816789Sahrens 3817789Sahrens config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); 3818789Sahrens 38191635Sbonwick if (spa->spa_config_syncing) 38201635Sbonwick nvlist_free(spa->spa_config_syncing); 38211635Sbonwick spa->spa_config_syncing = config; 3822789Sahrens 38232082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 3824789Sahrens } 3825789Sahrens 38265094Slling /* 38275094Slling * Set zpool properties. 38285094Slling */ 38293912Slling static void 38304543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 38313912Slling { 38323912Slling spa_t *spa = arg1; 38335094Slling objset_t *mos = spa->spa_meta_objset; 38343912Slling nvlist_t *nvp = arg2; 38355094Slling nvpair_t *elem; 38364451Seschrock uint64_t intval; 38375363Seschrock char *strval, *slash; 38385094Slling zpool_prop_t prop; 38395094Slling const char *propname; 38405094Slling zprop_type_t proptype; 38415094Slling 38425094Slling elem = NULL; 38435094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 38445094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 38455094Slling case ZPOOL_PROP_VERSION: 38465094Slling /* 38475094Slling * Only set version for non-zpool-creation cases 38485094Slling * (set/import). spa_create() needs special care 38495094Slling * for version setting. 38505094Slling */ 38515094Slling if (tx->tx_txg != TXG_INITIAL) { 38525094Slling VERIFY(nvpair_value_uint64(elem, 38535094Slling &intval) == 0); 38545094Slling ASSERT(intval <= SPA_VERSION); 38555094Slling ASSERT(intval >= spa_version(spa)); 38565094Slling spa->spa_uberblock.ub_version = intval; 38575094Slling vdev_config_dirty(spa->spa_root_vdev); 38585094Slling } 38595094Slling break; 38605094Slling 38615094Slling case ZPOOL_PROP_ALTROOT: 38625094Slling /* 38635094Slling * 'altroot' is a non-persistent property. It should 38645094Slling * have been set temporarily at creation or import time. 38655094Slling */ 38665094Slling ASSERT(spa->spa_root != NULL); 38675094Slling break; 38685094Slling 38695363Seschrock case ZPOOL_PROP_CACHEFILE: 38705094Slling /* 38715363Seschrock * 'cachefile' is a non-persistent property, but note 38725363Seschrock * an async request that the config cache needs to be 38735363Seschrock * udpated. 38745094Slling */ 38755363Seschrock VERIFY(nvpair_value_string(elem, &strval) == 0); 38765363Seschrock if (spa->spa_config_dir) 38775363Seschrock spa_strfree(spa->spa_config_dir); 38785363Seschrock if (spa->spa_config_file) 38795363Seschrock spa_strfree(spa->spa_config_file); 38805363Seschrock 38815363Seschrock if (strval[0] == '\0') { 38825363Seschrock spa->spa_config_dir = NULL; 38835363Seschrock spa->spa_config_file = NULL; 38845363Seschrock } else if (strcmp(strval, "none") == 0) { 38855363Seschrock spa->spa_config_dir = spa_strdup(strval); 38865363Seschrock spa->spa_config_file = NULL; 38875363Seschrock } else { 38885621Seschrock /* 38895621Seschrock * If the cachefile is in the root directory, 38905621Seschrock * we will end up with an empty string for 38915621Seschrock * spa_config_dir. This value is only ever 38925621Seschrock * used when concatenated with '/', so an empty 38935621Seschrock * string still behaves correctly and keeps the 38945621Seschrock * rest of the code simple. 38955621Seschrock */ 38965363Seschrock slash = strrchr(strval, '/'); 38975363Seschrock ASSERT(slash != NULL); 38985363Seschrock *slash = '\0'; 38995621Seschrock if (strcmp(strval, spa_config_dir) == 0 && 39005621Seschrock strcmp(slash + 1, ZPOOL_CACHE_FILE) == 0) { 39015621Seschrock spa->spa_config_dir = NULL; 39025621Seschrock spa->spa_config_file = NULL; 39035621Seschrock } else { 39045621Seschrock spa->spa_config_dir = 39055621Seschrock spa_strdup(strval); 39065621Seschrock spa->spa_config_file = 39075621Seschrock spa_strdup(slash + 1); 39085621Seschrock } 39095363Seschrock } 39105363Seschrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 39114543Smarks break; 39125094Slling default: 39135094Slling /* 39145094Slling * Set pool property values in the poolprops mos object. 39155094Slling */ 39165094Slling mutex_enter(&spa->spa_props_lock); 39175094Slling if (spa->spa_pool_props_object == 0) { 39185094Slling objset_t *mos = spa->spa_meta_objset; 39195094Slling 39205094Slling VERIFY((spa->spa_pool_props_object = 39215094Slling zap_create(mos, DMU_OT_POOL_PROPS, 39225094Slling DMU_OT_NONE, 0, tx)) > 0); 39235094Slling 39245094Slling VERIFY(zap_update(mos, 39255094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 39265094Slling 8, 1, &spa->spa_pool_props_object, tx) 39275094Slling == 0); 39285094Slling } 39295094Slling mutex_exit(&spa->spa_props_lock); 39305094Slling 39315094Slling /* normalize the property name */ 39325094Slling propname = zpool_prop_to_name(prop); 39335094Slling proptype = zpool_prop_get_type(prop); 39345094Slling 39355094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 39365094Slling ASSERT(proptype == PROP_TYPE_STRING); 39375094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 39385094Slling VERIFY(zap_update(mos, 39395094Slling spa->spa_pool_props_object, propname, 39405094Slling 1, strlen(strval) + 1, strval, tx) == 0); 39415094Slling 39425094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 39435094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 39445094Slling 39455094Slling if (proptype == PROP_TYPE_INDEX) { 39465094Slling const char *unused; 39475094Slling VERIFY(zpool_prop_index_to_string( 39485094Slling prop, intval, &unused) == 0); 39495094Slling } 39505094Slling VERIFY(zap_update(mos, 39515094Slling spa->spa_pool_props_object, propname, 39525094Slling 8, 1, &intval, tx) == 0); 39535094Slling } else { 39545094Slling ASSERT(0); /* not allowed */ 39555094Slling } 39565094Slling 39575329Sgw25295 switch (prop) { 39585329Sgw25295 case ZPOOL_PROP_DELEGATION: 39595094Slling spa->spa_delegation = intval; 39605329Sgw25295 break; 39615329Sgw25295 case ZPOOL_PROP_BOOTFS: 39625094Slling spa->spa_bootfs = intval; 39635329Sgw25295 break; 39645329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 39655329Sgw25295 spa->spa_failmode = intval; 39665329Sgw25295 break; 39675329Sgw25295 default: 39685329Sgw25295 break; 39695329Sgw25295 } 39703912Slling } 39715094Slling 39725094Slling /* log internal history if this is not a zpool create */ 39735094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 39745094Slling tx->tx_txg != TXG_INITIAL) { 39755094Slling spa_history_internal_log(LOG_POOL_PROPSET, 39765094Slling spa, tx, cr, "%s %lld %s", 39775094Slling nvpair_name(elem), intval, spa->spa_name); 39785094Slling } 39793912Slling } 39803912Slling } 39813912Slling 3982789Sahrens /* 3983789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 3984789Sahrens * part of the process, so we iterate until it converges. 3985789Sahrens */ 3986789Sahrens void 3987789Sahrens spa_sync(spa_t *spa, uint64_t txg) 3988789Sahrens { 3989789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 3990789Sahrens objset_t *mos = spa->spa_meta_objset; 3991789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 39921635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 3993789Sahrens vdev_t *vd; 39945688Sbonwick vdev_t *svd[SPA_DVAS_PER_BP]; 39955688Sbonwick int svdcount = 0; 3996789Sahrens dmu_tx_t *tx; 3997789Sahrens int dirty_vdevs; 3998789Sahrens 3999789Sahrens /* 4000789Sahrens * Lock out configuration changes. 4001789Sahrens */ 40021544Seschrock spa_config_enter(spa, RW_READER, FTAG); 4003789Sahrens 4004789Sahrens spa->spa_syncing_txg = txg; 4005789Sahrens spa->spa_sync_pass = 0; 4006789Sahrens 40071544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4008789Sahrens 40092082Seschrock tx = dmu_tx_create_assigned(dp, txg); 40102082Seschrock 40112082Seschrock /* 40124577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 40132082Seschrock * set spa_deflate if we have no raid-z vdevs. 40142082Seschrock */ 40154577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 40164577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 40172082Seschrock int i; 40182082Seschrock 40192082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 40202082Seschrock vd = rvd->vdev_child[i]; 40212082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 40222082Seschrock break; 40232082Seschrock } 40242082Seschrock if (i == rvd->vdev_children) { 40252082Seschrock spa->spa_deflate = TRUE; 40262082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 40272082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 40282082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 40292082Seschrock } 40302082Seschrock } 40312082Seschrock 4032789Sahrens /* 4033789Sahrens * If anything has changed in this txg, push the deferred frees 4034789Sahrens * from the previous txg. If not, leave them alone so that we 4035789Sahrens * don't generate work on an otherwise idle system. 4036789Sahrens */ 4037789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 40382329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 40392329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4040789Sahrens spa_sync_deferred_frees(spa, txg); 4041789Sahrens 4042789Sahrens /* 4043789Sahrens * Iterate to convergence. 4044789Sahrens */ 4045789Sahrens do { 4046789Sahrens spa->spa_sync_pass++; 4047789Sahrens 4048789Sahrens spa_sync_config_object(spa, tx); 40495450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 40505450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 40515450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 40525450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 40531544Seschrock spa_errlog_sync(spa, txg); 4054789Sahrens dsl_pool_sync(dp, txg); 4055789Sahrens 4056789Sahrens dirty_vdevs = 0; 4057789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4058789Sahrens vdev_sync(vd, txg); 4059789Sahrens dirty_vdevs++; 4060789Sahrens } 4061789Sahrens 4062789Sahrens bplist_sync(bpl, tx); 4063789Sahrens } while (dirty_vdevs); 4064789Sahrens 4065789Sahrens bplist_close(bpl); 4066789Sahrens 4067789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4068789Sahrens 4069789Sahrens /* 4070789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4071789Sahrens * to commit the transaction group. 40721635Sbonwick * 40735688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 40745688Sbonwick * random top-level vdevs that are known to be visible in the 40755688Sbonwick * config cache (see spa_vdev_add() for details). If there *are* 40765688Sbonwick * dirty vdevs -- or if the sync to our random subset fails -- 40775688Sbonwick * then sync the uberblock to all vdevs. 4078789Sahrens */ 40795688Sbonwick if (list_is_empty(&spa->spa_dirty_list)) { 40801635Sbonwick int children = rvd->vdev_children; 40811635Sbonwick int c0 = spa_get_random(children); 40821635Sbonwick int c; 40831635Sbonwick 40841635Sbonwick for (c = 0; c < children; c++) { 40851635Sbonwick vd = rvd->vdev_child[(c0 + c) % children]; 40865688Sbonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 40871635Sbonwick continue; 40885688Sbonwick svd[svdcount++] = vd; 40895688Sbonwick if (svdcount == SPA_DVAS_PER_BP) 40901635Sbonwick break; 40911635Sbonwick } 40921635Sbonwick } 40935688Sbonwick if (svdcount == 0 || vdev_config_sync(svd, svdcount, txg) != 0) 40945688Sbonwick VERIFY3U(vdev_config_sync(rvd->vdev_child, 40955688Sbonwick rvd->vdev_children, txg), ==, 0); 40961635Sbonwick 40972082Seschrock dmu_tx_commit(tx); 40982082Seschrock 40991635Sbonwick /* 41001635Sbonwick * Clear the dirty config list. 41011635Sbonwick */ 41021635Sbonwick while ((vd = list_head(&spa->spa_dirty_list)) != NULL) 41031635Sbonwick vdev_config_clean(vd); 41041635Sbonwick 41051635Sbonwick /* 41061635Sbonwick * Now that the new config has synced transactionally, 41071635Sbonwick * let it become visible to the config cache. 41081635Sbonwick */ 41091635Sbonwick if (spa->spa_config_syncing != NULL) { 41101635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 41111635Sbonwick spa->spa_config_txg = txg; 41121635Sbonwick spa->spa_config_syncing = NULL; 41131635Sbonwick } 4114789Sahrens 4115789Sahrens /* 4116789Sahrens * Make a stable copy of the fully synced uberblock. 4117789Sahrens * We use this as the root for pool traversals. 4118789Sahrens */ 4119789Sahrens spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ 4120789Sahrens 4121789Sahrens spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ 4122789Sahrens 4123789Sahrens rw_enter(&spa->spa_traverse_lock, RW_WRITER); 4124789Sahrens spa->spa_traverse_wanted = 0; 4125789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4126789Sahrens rw_exit(&spa->spa_traverse_lock); 4127789Sahrens 4128789Sahrens spa_scrub_resume(spa); /* resume scrub with new ubsync */ 4129789Sahrens 4130789Sahrens /* 4131789Sahrens * Clean up the ZIL records for the synced txg. 4132789Sahrens */ 4133789Sahrens dsl_pool_zil_clean(dp); 4134789Sahrens 4135789Sahrens /* 4136789Sahrens * Update usable space statistics. 4137789Sahrens */ 4138789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4139789Sahrens vdev_sync_done(vd, txg); 4140789Sahrens 4141789Sahrens /* 4142789Sahrens * It had better be the case that we didn't dirty anything 41432082Seschrock * since vdev_config_sync(). 4144789Sahrens */ 4145789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4146789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4147789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4148789Sahrens ASSERT(bpl->bpl_queue == NULL); 4149789Sahrens 41501544Seschrock spa_config_exit(spa, FTAG); 41511544Seschrock 41521544Seschrock /* 41531544Seschrock * If any async tasks have been requested, kick them off. 41541544Seschrock */ 41551544Seschrock spa_async_dispatch(spa); 4156789Sahrens } 4157789Sahrens 4158789Sahrens /* 4159789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4160789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4161789Sahrens * sync. 4162789Sahrens */ 4163789Sahrens void 4164789Sahrens spa_sync_allpools(void) 4165789Sahrens { 4166789Sahrens spa_t *spa = NULL; 4167789Sahrens mutex_enter(&spa_namespace_lock); 4168789Sahrens while ((spa = spa_next(spa)) != NULL) { 4169789Sahrens if (spa_state(spa) != POOL_STATE_ACTIVE) 4170789Sahrens continue; 4171789Sahrens spa_open_ref(spa, FTAG); 4172789Sahrens mutex_exit(&spa_namespace_lock); 4173789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4174789Sahrens mutex_enter(&spa_namespace_lock); 4175789Sahrens spa_close(spa, FTAG); 4176789Sahrens } 4177789Sahrens mutex_exit(&spa_namespace_lock); 4178789Sahrens } 4179789Sahrens 4180789Sahrens /* 4181789Sahrens * ========================================================================== 4182789Sahrens * Miscellaneous routines 4183789Sahrens * ========================================================================== 4184789Sahrens */ 4185789Sahrens 4186789Sahrens /* 4187789Sahrens * Remove all pools in the system. 4188789Sahrens */ 4189789Sahrens void 4190789Sahrens spa_evict_all(void) 4191789Sahrens { 4192789Sahrens spa_t *spa; 4193789Sahrens 4194789Sahrens /* 4195789Sahrens * Remove all cached state. All pools should be closed now, 4196789Sahrens * so every spa in the AVL tree should be unreferenced. 4197789Sahrens */ 4198789Sahrens mutex_enter(&spa_namespace_lock); 4199789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4200789Sahrens /* 42011544Seschrock * Stop async tasks. The async thread may need to detach 42021544Seschrock * a device that's been replaced, which requires grabbing 42031544Seschrock * spa_namespace_lock, so we must drop it here. 4204789Sahrens */ 4205789Sahrens spa_open_ref(spa, FTAG); 4206789Sahrens mutex_exit(&spa_namespace_lock); 42071544Seschrock spa_async_suspend(spa); 42084808Sek110237 mutex_enter(&spa_namespace_lock); 4209789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 4210789Sahrens spa_close(spa, FTAG); 4211789Sahrens 4212789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4213789Sahrens spa_unload(spa); 4214789Sahrens spa_deactivate(spa); 4215789Sahrens } 4216789Sahrens spa_remove(spa); 4217789Sahrens } 4218789Sahrens mutex_exit(&spa_namespace_lock); 4219789Sahrens } 42201544Seschrock 42211544Seschrock vdev_t * 42221544Seschrock spa_lookup_by_guid(spa_t *spa, uint64_t guid) 42231544Seschrock { 42241544Seschrock return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); 42251544Seschrock } 42261760Seschrock 42271760Seschrock void 42285094Slling spa_upgrade(spa_t *spa, uint64_t version) 42291760Seschrock { 42301760Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 42311760Seschrock 42321760Seschrock /* 42331760Seschrock * This should only be called for a non-faulted pool, and since a 42341760Seschrock * future version would result in an unopenable pool, this shouldn't be 42351760Seschrock * possible. 42361760Seschrock */ 42374577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 42385094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 42395094Slling 42405094Slling spa->spa_uberblock.ub_version = version; 42411760Seschrock vdev_config_dirty(spa->spa_root_vdev); 42421760Seschrock 42431760Seschrock spa_config_exit(spa, FTAG); 42442082Seschrock 42452082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 42461760Seschrock } 42472082Seschrock 42482082Seschrock boolean_t 42492082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 42502082Seschrock { 42512082Seschrock int i; 42523377Seschrock uint64_t spareguid; 42535450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 42545450Sbrendan 42555450Sbrendan for (i = 0; i < sav->sav_count; i++) 42565450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 42572082Seschrock return (B_TRUE); 42582082Seschrock 42595450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 42605450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 42615450Sbrendan &spareguid) == 0 && spareguid == guid) 42623377Seschrock return (B_TRUE); 42633377Seschrock } 42643377Seschrock 42652082Seschrock return (B_FALSE); 42662082Seschrock } 42673912Slling 42684451Seschrock /* 42694451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 42704451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 42714451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 42724451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 42734451Seschrock * or zdb as real changes. 42744451Seschrock */ 42754451Seschrock void 42764451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 42774451Seschrock { 42784451Seschrock #ifdef _KERNEL 42794451Seschrock sysevent_t *ev; 42804451Seschrock sysevent_attr_list_t *attr = NULL; 42814451Seschrock sysevent_value_t value; 42824451Seschrock sysevent_id_t eid; 42834451Seschrock 42844451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 42854451Seschrock SE_SLEEP); 42864451Seschrock 42874451Seschrock value.value_type = SE_DATA_TYPE_STRING; 42884451Seschrock value.value.sv_string = spa_name(spa); 42894451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 42904451Seschrock goto done; 42914451Seschrock 42924451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 42934451Seschrock value.value.sv_uint64 = spa_guid(spa); 42944451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 42954451Seschrock goto done; 42964451Seschrock 42974451Seschrock if (vd) { 42984451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 42994451Seschrock value.value.sv_uint64 = vd->vdev_guid; 43004451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 43014451Seschrock SE_SLEEP) != 0) 43024451Seschrock goto done; 43034451Seschrock 43044451Seschrock if (vd->vdev_path) { 43054451Seschrock value.value_type = SE_DATA_TYPE_STRING; 43064451Seschrock value.value.sv_string = vd->vdev_path; 43074451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 43084451Seschrock &value, SE_SLEEP) != 0) 43094451Seschrock goto done; 43104451Seschrock } 43114451Seschrock } 43124451Seschrock 43135756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 43145756Seschrock goto done; 43155756Seschrock attr = NULL; 43165756Seschrock 43174451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 43184451Seschrock 43194451Seschrock done: 43204451Seschrock if (attr) 43214451Seschrock sysevent_free_attr(attr); 43224451Seschrock sysevent_free(ev); 43234451Seschrock #endif 43244451Seschrock } 4325