1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 233377Seschrock * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28789Sahrens 29789Sahrens /* 30789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 31789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 32789Sahrens * pool. 33789Sahrens */ 34789Sahrens 35789Sahrens #include <sys/zfs_context.h> 361544Seschrock #include <sys/fm/fs/zfs.h> 37789Sahrens #include <sys/spa_impl.h> 38789Sahrens #include <sys/zio.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zio_compress.h> 41789Sahrens #include <sys/dmu.h> 42789Sahrens #include <sys/dmu_tx.h> 43789Sahrens #include <sys/zap.h> 44789Sahrens #include <sys/zil.h> 45789Sahrens #include <sys/vdev_impl.h> 46789Sahrens #include <sys/metaslab.h> 47789Sahrens #include <sys/uberblock_impl.h> 48789Sahrens #include <sys/txg.h> 49789Sahrens #include <sys/avl.h> 50789Sahrens #include <sys/dmu_traverse.h> 513912Slling #include <sys/dmu_objset.h> 52789Sahrens #include <sys/unique.h> 53789Sahrens #include <sys/dsl_pool.h> 543912Slling #include <sys/dsl_dataset.h> 55789Sahrens #include <sys/dsl_dir.h> 56789Sahrens #include <sys/dsl_prop.h> 573912Slling #include <sys/dsl_synctask.h> 58789Sahrens #include <sys/fs/zfs.h> 595450Sbrendan #include <sys/arc.h> 60789Sahrens #include <sys/callb.h> 613975Sek110237 #include <sys/systeminfo.h> 623975Sek110237 #include <sys/sunddi.h> 63789Sahrens 645094Slling #include "zfs_prop.h" 655094Slling 662986Sek110237 int zio_taskq_threads = 8; 672986Sek110237 685094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 695094Slling 705094Slling /* 715094Slling * ========================================================================== 725094Slling * SPA properties routines 735094Slling * ========================================================================== 745094Slling */ 755094Slling 765094Slling /* 775094Slling * Add a (source=src, propname=propval) list to an nvlist. 785094Slling */ 795094Slling static int 805094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 815094Slling uint64_t intval, zprop_source_t src) 825094Slling { 835094Slling const char *propname = zpool_prop_to_name(prop); 845094Slling nvlist_t *propval; 855094Slling int err = 0; 865094Slling 875094Slling if (err = nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)) 885094Slling return (err); 895094Slling 905094Slling if (err = nvlist_add_uint64(propval, ZPROP_SOURCE, src)) 915094Slling goto out; 925094Slling 935094Slling if (strval != NULL) { 945094Slling if (err = nvlist_add_string(propval, ZPROP_VALUE, strval)) 955094Slling goto out; 965094Slling } else { 975094Slling if (err = nvlist_add_uint64(propval, ZPROP_VALUE, intval)) 985094Slling goto out; 995094Slling } 1005094Slling 1015094Slling err = nvlist_add_nvlist(nvl, propname, propval); 1025094Slling out: 1035094Slling nvlist_free(propval); 1045094Slling return (err); 1055094Slling } 1065094Slling 1075094Slling /* 1085094Slling * Get property values from the spa configuration. 1095094Slling */ 1105094Slling static int 1115094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1125094Slling { 1135094Slling uint64_t size = spa_get_space(spa); 1145094Slling uint64_t used = spa_get_alloc(spa); 1155094Slling uint64_t cap, version; 1165094Slling zprop_source_t src = ZPROP_SRC_NONE; 1175094Slling int err; 1185363Seschrock char *cachefile; 1195363Seschrock size_t len; 1205094Slling 1215094Slling /* 1225094Slling * readonly properties 1235094Slling */ 1245094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa->spa_name, 1255094Slling 0, src)) 1265094Slling return (err); 1275094Slling 1285094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src)) 1295094Slling return (err); 1305094Slling 1315094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src)) 1325094Slling return (err); 1335094Slling 1345094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 1355094Slling size - used, src)) 1365094Slling return (err); 1375094Slling 1385094Slling cap = (size == 0) ? 0 : (used * 100 / size); 1395094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src)) 1405094Slling return (err); 1415094Slling 1425094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, 1435094Slling spa_guid(spa), src)) 1445094Slling return (err); 1455094Slling 1465094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1475094Slling spa->spa_root_vdev->vdev_state, src)) 1485094Slling return (err); 1495094Slling 1505094Slling /* 1515094Slling * settable properties that are not stored in the pool property object. 1525094Slling */ 1535094Slling version = spa_version(spa); 1545094Slling if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1555094Slling src = ZPROP_SRC_DEFAULT; 1565094Slling else 1575094Slling src = ZPROP_SRC_LOCAL; 1585094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, 1595094Slling version, src)) 1605094Slling return (err); 1615094Slling 1625094Slling if (spa->spa_root != NULL) { 1635094Slling src = ZPROP_SRC_LOCAL; 1645094Slling if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, 1655094Slling spa->spa_root, 0, src)) 1665094Slling return (err); 1675094Slling } 1685094Slling 1695363Seschrock if (spa->spa_config_dir != NULL) { 1705363Seschrock if (strcmp(spa->spa_config_dir, "none") == 0) { 1715363Seschrock err = spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1725363Seschrock spa->spa_config_dir, 0, ZPROP_SRC_LOCAL); 1735363Seschrock } else { 1745363Seschrock len = strlen(spa->spa_config_dir) + 1755363Seschrock strlen(spa->spa_config_file) + 2; 1765363Seschrock cachefile = kmem_alloc(len, KM_SLEEP); 1775363Seschrock (void) snprintf(cachefile, len, "%s/%s", 1785363Seschrock spa->spa_config_dir, spa->spa_config_file); 1795363Seschrock err = spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1805363Seschrock cachefile, 0, ZPROP_SRC_LOCAL); 1815363Seschrock kmem_free(cachefile, len); 1825363Seschrock } 1835363Seschrock 1845363Seschrock if (err) 1855363Seschrock return (err); 1865363Seschrock } 1875094Slling 1885094Slling return (0); 1895094Slling } 1905094Slling 1915094Slling /* 1925094Slling * Get zpool property values. 1935094Slling */ 1945094Slling int 1955094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 1965094Slling { 1975094Slling zap_cursor_t zc; 1985094Slling zap_attribute_t za; 1995094Slling objset_t *mos = spa->spa_meta_objset; 2005094Slling int err; 2015094Slling 2025094Slling if (err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)) 2035094Slling return (err); 2045094Slling 2055094Slling /* 2065094Slling * Get properties from the spa config. 2075094Slling */ 2085094Slling if (err = spa_prop_get_config(spa, nvp)) 2095094Slling goto out; 2105094Slling 2115094Slling mutex_enter(&spa->spa_props_lock); 2125094Slling /* If no pool property object, no more prop to get. */ 2135094Slling if (spa->spa_pool_props_object == 0) { 2145094Slling mutex_exit(&spa->spa_props_lock); 2155094Slling return (0); 2165094Slling } 2175094Slling 2185094Slling /* 2195094Slling * Get properties from the MOS pool property object. 2205094Slling */ 2215094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2225094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2235094Slling zap_cursor_advance(&zc)) { 2245094Slling uint64_t intval = 0; 2255094Slling char *strval = NULL; 2265094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2275094Slling zpool_prop_t prop; 2285094Slling 2295094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2305094Slling continue; 2315094Slling 2325094Slling switch (za.za_integer_length) { 2335094Slling case 8: 2345094Slling /* integer property */ 2355094Slling if (za.za_first_integer != 2365094Slling zpool_prop_default_numeric(prop)) 2375094Slling src = ZPROP_SRC_LOCAL; 2385094Slling 2395094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2405094Slling dsl_pool_t *dp; 2415094Slling dsl_dataset_t *ds = NULL; 2425094Slling 2435094Slling dp = spa_get_dsl(spa); 2445094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2455094Slling if (err = dsl_dataset_open_obj(dp, 2465094Slling za.za_first_integer, NULL, DS_MODE_NONE, 2475094Slling FTAG, &ds)) { 2485094Slling rw_exit(&dp->dp_config_rwlock); 2495094Slling break; 2505094Slling } 2515094Slling 2525094Slling strval = kmem_alloc( 2535094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2545094Slling KM_SLEEP); 2555094Slling dsl_dataset_name(ds, strval); 2565094Slling dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2575094Slling rw_exit(&dp->dp_config_rwlock); 2585094Slling } else { 2595094Slling strval = NULL; 2605094Slling intval = za.za_first_integer; 2615094Slling } 2625094Slling 2635094Slling err = spa_prop_add_list(*nvp, prop, strval, 2645094Slling intval, src); 2655094Slling 2665094Slling if (strval != NULL) 2675094Slling kmem_free(strval, 2685094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2695094Slling 2705094Slling break; 2715094Slling 2725094Slling case 1: 2735094Slling /* string property */ 2745094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2755094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2765094Slling za.za_name, 1, za.za_num_integers, strval); 2775094Slling if (err) { 2785094Slling kmem_free(strval, za.za_num_integers); 2795094Slling break; 2805094Slling } 2815094Slling err = spa_prop_add_list(*nvp, prop, strval, 0, src); 2825094Slling kmem_free(strval, za.za_num_integers); 2835094Slling break; 2845094Slling 2855094Slling default: 2865094Slling break; 2875094Slling } 2885094Slling } 2895094Slling zap_cursor_fini(&zc); 2905094Slling mutex_exit(&spa->spa_props_lock); 2915094Slling out: 2925094Slling if (err && err != ENOENT) { 2935094Slling nvlist_free(*nvp); 2945094Slling return (err); 2955094Slling } 2965094Slling 2975094Slling return (0); 2985094Slling } 2995094Slling 3005094Slling /* 3015094Slling * Validate the given pool properties nvlist and modify the list 3025094Slling * for the property values to be set. 3035094Slling */ 3045094Slling static int 3055094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3065094Slling { 3075094Slling nvpair_t *elem; 3085094Slling int error = 0, reset_bootfs = 0; 3095094Slling uint64_t objnum; 3105094Slling 3115094Slling elem = NULL; 3125094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3135094Slling zpool_prop_t prop; 3145094Slling char *propname, *strval; 3155094Slling uint64_t intval; 3165094Slling vdev_t *rvdev; 3175094Slling char *vdev_type; 3185094Slling objset_t *os; 3195363Seschrock char *slash; 3205094Slling 3215094Slling propname = nvpair_name(elem); 3225094Slling 3235094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3245094Slling return (EINVAL); 3255094Slling 3265094Slling switch (prop) { 3275094Slling case ZPOOL_PROP_VERSION: 3285094Slling error = nvpair_value_uint64(elem, &intval); 3295094Slling if (!error && 3305094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3315094Slling error = EINVAL; 3325094Slling break; 3335094Slling 3345094Slling case ZPOOL_PROP_DELEGATION: 3355094Slling case ZPOOL_PROP_AUTOREPLACE: 3365094Slling error = nvpair_value_uint64(elem, &intval); 3375094Slling if (!error && intval > 1) 3385094Slling error = EINVAL; 3395094Slling break; 3405094Slling 3415094Slling case ZPOOL_PROP_BOOTFS: 3425094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3435094Slling error = ENOTSUP; 3445094Slling break; 3455094Slling } 3465094Slling 3475094Slling /* 3485094Slling * A bootable filesystem can not be on a RAIDZ pool 3495094Slling * nor a striped pool with more than 1 device. 3505094Slling */ 3515094Slling rvdev = spa->spa_root_vdev; 3525094Slling vdev_type = 3535094Slling rvdev->vdev_child[0]->vdev_ops->vdev_op_type; 3545094Slling if (rvdev->vdev_children > 1 || 3555094Slling strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || 3565094Slling strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { 3575094Slling error = ENOTSUP; 3585094Slling break; 3595094Slling } 3605094Slling 3615094Slling reset_bootfs = 1; 3625094Slling 3635094Slling error = nvpair_value_string(elem, &strval); 3645094Slling 3655094Slling if (!error) { 3665094Slling if (strval == NULL || strval[0] == '\0') { 3675094Slling objnum = zpool_prop_default_numeric( 3685094Slling ZPOOL_PROP_BOOTFS); 3695094Slling break; 3705094Slling } 3715094Slling 3725094Slling if (error = dmu_objset_open(strval, DMU_OST_ZFS, 3735094Slling DS_MODE_STANDARD | DS_MODE_READONLY, &os)) 3745094Slling break; 3755094Slling objnum = dmu_objset_id(os); 3765094Slling dmu_objset_close(os); 3775094Slling } 3785094Slling break; 3795329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3805329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3815329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3825329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3835329Sgw25295 error = EINVAL; 3845329Sgw25295 3855329Sgw25295 /* 3865329Sgw25295 * This is a special case which only occurs when 3875329Sgw25295 * the pool has completely failed. This allows 3885329Sgw25295 * the user to change the in-core failmode property 3895329Sgw25295 * without syncing it out to disk (I/Os might 3905329Sgw25295 * currently be blocked). We do this by returning 3915329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 3925329Sgw25295 * into thinking we encountered a property validation 3935329Sgw25295 * error. 3945329Sgw25295 */ 3955329Sgw25295 if (!error && spa_state(spa) == POOL_STATE_IO_FAILURE) { 3965329Sgw25295 spa->spa_failmode = intval; 3975329Sgw25295 error = EIO; 3985329Sgw25295 } 3995329Sgw25295 break; 4005363Seschrock 4015363Seschrock case ZPOOL_PROP_CACHEFILE: 4025363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4035363Seschrock break; 4045363Seschrock 4055363Seschrock if (strval[0] == '\0') 4065363Seschrock break; 4075363Seschrock 4085363Seschrock if (strcmp(strval, "none") == 0) 4095363Seschrock break; 4105363Seschrock 4115363Seschrock if (strval[0] != '/') { 4125363Seschrock error = EINVAL; 4135363Seschrock break; 4145363Seschrock } 4155363Seschrock 4165363Seschrock slash = strrchr(strval, '/'); 4175363Seschrock ASSERT(slash != NULL); 4185363Seschrock 4195363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4205363Seschrock strcmp(slash, "/..") == 0) 4215363Seschrock error = EINVAL; 4225363Seschrock break; 4235094Slling } 4245094Slling 4255094Slling if (error) 4265094Slling break; 4275094Slling } 4285094Slling 4295094Slling if (!error && reset_bootfs) { 4305094Slling error = nvlist_remove(props, 4315094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4325094Slling 4335094Slling if (!error) { 4345094Slling error = nvlist_add_uint64(props, 4355094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4365094Slling } 4375094Slling } 4385094Slling 4395094Slling return (error); 4405094Slling } 4415094Slling 4425094Slling int 4435094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4445094Slling { 4455094Slling int error; 4465094Slling 4475094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4485094Slling return (error); 4495094Slling 4505094Slling return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 4515094Slling spa, nvp, 3)); 4525094Slling } 4535094Slling 4545094Slling /* 4555094Slling * If the bootfs property value is dsobj, clear it. 4565094Slling */ 4575094Slling void 4585094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 4595094Slling { 4605094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 4615094Slling VERIFY(zap_remove(spa->spa_meta_objset, 4625094Slling spa->spa_pool_props_object, 4635094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 4645094Slling spa->spa_bootfs = 0; 4655094Slling } 4665094Slling } 4675094Slling 468789Sahrens /* 469789Sahrens * ========================================================================== 470789Sahrens * SPA state manipulation (open/create/destroy/import/export) 471789Sahrens * ========================================================================== 472789Sahrens */ 473789Sahrens 4741544Seschrock static int 4751544Seschrock spa_error_entry_compare(const void *a, const void *b) 4761544Seschrock { 4771544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 4781544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 4791544Seschrock int ret; 4801544Seschrock 4811544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 4821544Seschrock sizeof (zbookmark_t)); 4831544Seschrock 4841544Seschrock if (ret < 0) 4851544Seschrock return (-1); 4861544Seschrock else if (ret > 0) 4871544Seschrock return (1); 4881544Seschrock else 4891544Seschrock return (0); 4901544Seschrock } 4911544Seschrock 4921544Seschrock /* 4931544Seschrock * Utility function which retrieves copies of the current logs and 4941544Seschrock * re-initializes them in the process. 4951544Seschrock */ 4961544Seschrock void 4971544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 4981544Seschrock { 4991544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5001544Seschrock 5011544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5021544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5031544Seschrock 5041544Seschrock avl_create(&spa->spa_errlist_scrub, 5051544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5061544Seschrock offsetof(spa_error_entry_t, se_avl)); 5071544Seschrock avl_create(&spa->spa_errlist_last, 5081544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5091544Seschrock offsetof(spa_error_entry_t, se_avl)); 5101544Seschrock } 5111544Seschrock 512789Sahrens /* 513789Sahrens * Activate an uninitialized pool. 514789Sahrens */ 515789Sahrens static void 516789Sahrens spa_activate(spa_t *spa) 517789Sahrens { 518789Sahrens int t; 519789Sahrens 520789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 521789Sahrens 522789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 523789Sahrens 524789Sahrens spa->spa_normal_class = metaslab_class_create(); 5254527Sperrin spa->spa_log_class = metaslab_class_create(); 526789Sahrens 527789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 528789Sahrens spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", 5292986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 530789Sahrens TASKQ_PREPOPULATE); 531789Sahrens spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", 5322986Sek110237 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 533789Sahrens TASKQ_PREPOPULATE); 534789Sahrens } 535789Sahrens 536789Sahrens list_create(&spa->spa_dirty_list, sizeof (vdev_t), 537789Sahrens offsetof(vdev_t, vdev_dirty_node)); 5385329Sgw25295 list_create(&spa->spa_zio_list, sizeof (zio_t), 5395329Sgw25295 offsetof(zio_t, zio_link_node)); 540789Sahrens 541789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 542789Sahrens offsetof(struct vdev, vdev_txg_node)); 5431544Seschrock 5441544Seschrock avl_create(&spa->spa_errlist_scrub, 5451544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5461544Seschrock offsetof(spa_error_entry_t, se_avl)); 5471544Seschrock avl_create(&spa->spa_errlist_last, 5481544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5491544Seschrock offsetof(spa_error_entry_t, se_avl)); 550789Sahrens } 551789Sahrens 552789Sahrens /* 553789Sahrens * Opposite of spa_activate(). 554789Sahrens */ 555789Sahrens static void 556789Sahrens spa_deactivate(spa_t *spa) 557789Sahrens { 558789Sahrens int t; 559789Sahrens 560789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 561789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 562789Sahrens ASSERT(spa->spa_root_vdev == NULL); 563789Sahrens 564789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 565789Sahrens 566789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 567789Sahrens 568789Sahrens list_destroy(&spa->spa_dirty_list); 5695329Sgw25295 list_destroy(&spa->spa_zio_list); 570789Sahrens 571789Sahrens for (t = 0; t < ZIO_TYPES; t++) { 572789Sahrens taskq_destroy(spa->spa_zio_issue_taskq[t]); 573789Sahrens taskq_destroy(spa->spa_zio_intr_taskq[t]); 574789Sahrens spa->spa_zio_issue_taskq[t] = NULL; 575789Sahrens spa->spa_zio_intr_taskq[t] = NULL; 576789Sahrens } 577789Sahrens 578789Sahrens metaslab_class_destroy(spa->spa_normal_class); 579789Sahrens spa->spa_normal_class = NULL; 580789Sahrens 5814527Sperrin metaslab_class_destroy(spa->spa_log_class); 5824527Sperrin spa->spa_log_class = NULL; 5834527Sperrin 5841544Seschrock /* 5851544Seschrock * If this was part of an import or the open otherwise failed, we may 5861544Seschrock * still have errors left in the queues. Empty them just in case. 5871544Seschrock */ 5881544Seschrock spa_errlog_drain(spa); 5891544Seschrock 5901544Seschrock avl_destroy(&spa->spa_errlist_scrub); 5911544Seschrock avl_destroy(&spa->spa_errlist_last); 5921544Seschrock 593789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 594789Sahrens } 595789Sahrens 596789Sahrens /* 597789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 598789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 599789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 600789Sahrens * All vdev validation is done by the vdev_alloc() routine. 601789Sahrens */ 6022082Seschrock static int 6032082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 6042082Seschrock uint_t id, int atype) 605789Sahrens { 606789Sahrens nvlist_t **child; 607789Sahrens uint_t c, children; 6082082Seschrock int error; 6092082Seschrock 6102082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 6112082Seschrock return (error); 6122082Seschrock 6132082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 6142082Seschrock return (0); 615789Sahrens 616789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 617789Sahrens &child, &children) != 0) { 6182082Seschrock vdev_free(*vdp); 6192082Seschrock *vdp = NULL; 6202082Seschrock return (EINVAL); 621789Sahrens } 622789Sahrens 623789Sahrens for (c = 0; c < children; c++) { 6242082Seschrock vdev_t *vd; 6252082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 6262082Seschrock atype)) != 0) { 6272082Seschrock vdev_free(*vdp); 6282082Seschrock *vdp = NULL; 6292082Seschrock return (error); 630789Sahrens } 631789Sahrens } 632789Sahrens 6332082Seschrock ASSERT(*vdp != NULL); 6342082Seschrock 6352082Seschrock return (0); 636789Sahrens } 637789Sahrens 638789Sahrens /* 639789Sahrens * Opposite of spa_load(). 640789Sahrens */ 641789Sahrens static void 642789Sahrens spa_unload(spa_t *spa) 643789Sahrens { 6442082Seschrock int i; 6452082Seschrock 646789Sahrens /* 6471544Seschrock * Stop async tasks. 6481544Seschrock */ 6491544Seschrock spa_async_suspend(spa); 6501544Seschrock 6511544Seschrock /* 652789Sahrens * Stop syncing. 653789Sahrens */ 654789Sahrens if (spa->spa_sync_on) { 655789Sahrens txg_sync_stop(spa->spa_dsl_pool); 656789Sahrens spa->spa_sync_on = B_FALSE; 657789Sahrens } 658789Sahrens 659789Sahrens /* 660789Sahrens * Wait for any outstanding prefetch I/O to complete. 661789Sahrens */ 6621544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 6631544Seschrock spa_config_exit(spa, FTAG); 664789Sahrens 665789Sahrens /* 6665450Sbrendan * Drop and purge level 2 cache 6675450Sbrendan */ 6685450Sbrendan spa_l2cache_drop(spa); 6695450Sbrendan 6705450Sbrendan /* 671789Sahrens * Close the dsl pool. 672789Sahrens */ 673789Sahrens if (spa->spa_dsl_pool) { 674789Sahrens dsl_pool_close(spa->spa_dsl_pool); 675789Sahrens spa->spa_dsl_pool = NULL; 676789Sahrens } 677789Sahrens 678789Sahrens /* 679789Sahrens * Close all vdevs. 680789Sahrens */ 6811585Sbonwick if (spa->spa_root_vdev) 682789Sahrens vdev_free(spa->spa_root_vdev); 6831585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 6841544Seschrock 6855450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 6865450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 6875450Sbrendan if (spa->spa_spares.sav_vdevs) { 6885450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 6895450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 6905450Sbrendan spa->spa_spares.sav_vdevs = NULL; 6915450Sbrendan } 6925450Sbrendan if (spa->spa_spares.sav_config) { 6935450Sbrendan nvlist_free(spa->spa_spares.sav_config); 6945450Sbrendan spa->spa_spares.sav_config = NULL; 6952082Seschrock } 6965450Sbrendan 6975450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 6985450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 6995450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 7005450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 7015450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 7025450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 7035450Sbrendan } 7045450Sbrendan if (spa->spa_l2cache.sav_config) { 7055450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 7065450Sbrendan spa->spa_l2cache.sav_config = NULL; 7072082Seschrock } 7082082Seschrock 7091544Seschrock spa->spa_async_suspended = 0; 710789Sahrens } 711789Sahrens 712789Sahrens /* 7132082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 7142082Seschrock * this pool. When this is called, we have some form of basic information in 7155450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 7165450Sbrendan * then re-generate a more complete list including status information. 7172082Seschrock */ 7182082Seschrock static void 7192082Seschrock spa_load_spares(spa_t *spa) 7202082Seschrock { 7212082Seschrock nvlist_t **spares; 7222082Seschrock uint_t nspares; 7232082Seschrock int i; 7243377Seschrock vdev_t *vd, *tvd; 7252082Seschrock 7262082Seschrock /* 7272082Seschrock * First, close and free any existing spare vdevs. 7282082Seschrock */ 7295450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 7305450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 7313377Seschrock 7323377Seschrock /* Undo the call to spa_activate() below */ 7333377Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && 7343377Seschrock tvd->vdev_isspare) 7353377Seschrock spa_spare_remove(tvd); 7363377Seschrock vdev_close(vd); 7373377Seschrock vdev_free(vd); 7382082Seschrock } 7393377Seschrock 7405450Sbrendan if (spa->spa_spares.sav_vdevs) 7415450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7425450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7435450Sbrendan 7445450Sbrendan if (spa->spa_spares.sav_config == NULL) 7452082Seschrock nspares = 0; 7462082Seschrock else 7475450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 7482082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 7492082Seschrock 7505450Sbrendan spa->spa_spares.sav_count = (int)nspares; 7515450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7522082Seschrock 7532082Seschrock if (nspares == 0) 7542082Seschrock return; 7552082Seschrock 7562082Seschrock /* 7572082Seschrock * Construct the array of vdevs, opening them to get status in the 7583377Seschrock * process. For each spare, there is potentially two different vdev_t 7593377Seschrock * structures associated with it: one in the list of spares (used only 7603377Seschrock * for basic validation purposes) and one in the active vdev 7613377Seschrock * configuration (if it's spared in). During this phase we open and 7623377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 7633377Seschrock * active configuration, then we also mark this vdev as an active spare. 7642082Seschrock */ 7655450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 7665450Sbrendan KM_SLEEP); 7675450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 7682082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 7692082Seschrock VDEV_ALLOC_SPARE) == 0); 7702082Seschrock ASSERT(vd != NULL); 7712082Seschrock 7725450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 7732082Seschrock 7743377Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { 7753377Seschrock if (!tvd->vdev_isspare) 7763377Seschrock spa_spare_add(tvd); 7773377Seschrock 7783377Seschrock /* 7793377Seschrock * We only mark the spare active if we were successfully 7803377Seschrock * able to load the vdev. Otherwise, importing a pool 7813377Seschrock * with a bad active spare would result in strange 7823377Seschrock * behavior, because multiple pool would think the spare 7833377Seschrock * is actively in use. 7843377Seschrock * 7853377Seschrock * There is a vulnerability here to an equally bizarre 7863377Seschrock * circumstance, where a dead active spare is later 7873377Seschrock * brought back to life (onlined or otherwise). Given 7883377Seschrock * the rarity of this scenario, and the extra complexity 7893377Seschrock * it adds, we ignore the possibility. 7903377Seschrock */ 7913377Seschrock if (!vdev_is_dead(tvd)) 7923377Seschrock spa_spare_activate(tvd); 7933377Seschrock } 7943377Seschrock 7952082Seschrock if (vdev_open(vd) != 0) 7962082Seschrock continue; 7972082Seschrock 7982082Seschrock vd->vdev_top = vd; 7995450Sbrendan if (vdev_validate_aux(vd) == 0) 8005450Sbrendan spa_spare_add(vd); 8012082Seschrock } 8022082Seschrock 8032082Seschrock /* 8042082Seschrock * Recompute the stashed list of spares, with status information 8052082Seschrock * this time. 8062082Seschrock */ 8075450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 8082082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 8092082Seschrock 8105450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 8115450Sbrendan KM_SLEEP); 8125450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 8135450Sbrendan spares[i] = vdev_config_generate(spa, 8145450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 8155450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 8165450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 8175450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 8182082Seschrock nvlist_free(spares[i]); 8195450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 8205450Sbrendan } 8215450Sbrendan 8225450Sbrendan /* 8235450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 8245450Sbrendan * this pool. When this is called, we have some form of basic information in 8255450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 8265450Sbrendan * then re-generate a more complete list including status information. 8275450Sbrendan * Devices which are already active have their details maintained, and are 8285450Sbrendan * not re-opened. 8295450Sbrendan */ 8305450Sbrendan static void 8315450Sbrendan spa_load_l2cache(spa_t *spa) 8325450Sbrendan { 8335450Sbrendan nvlist_t **l2cache; 8345450Sbrendan uint_t nl2cache; 8355450Sbrendan int i, j, oldnvdevs; 8365450Sbrendan uint64_t guid; 8375450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 8385450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 8395450Sbrendan 8405450Sbrendan if (sav->sav_config != NULL) { 8415450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 8425450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 8435450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 8445450Sbrendan } else { 8455450Sbrendan nl2cache = 0; 8465450Sbrendan } 8475450Sbrendan 8485450Sbrendan oldvdevs = sav->sav_vdevs; 8495450Sbrendan oldnvdevs = sav->sav_count; 8505450Sbrendan sav->sav_vdevs = NULL; 8515450Sbrendan sav->sav_count = 0; 8525450Sbrendan 8535450Sbrendan /* 8545450Sbrendan * Process new nvlist of vdevs. 8555450Sbrendan */ 8565450Sbrendan for (i = 0; i < nl2cache; i++) { 8575450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 8585450Sbrendan &guid) == 0); 8595450Sbrendan 8605450Sbrendan newvdevs[i] = NULL; 8615450Sbrendan for (j = 0; j < oldnvdevs; j++) { 8625450Sbrendan vd = oldvdevs[j]; 8635450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 8645450Sbrendan /* 8655450Sbrendan * Retain previous vdev for add/remove ops. 8665450Sbrendan */ 8675450Sbrendan newvdevs[i] = vd; 8685450Sbrendan oldvdevs[j] = NULL; 8695450Sbrendan break; 8705450Sbrendan } 8715450Sbrendan } 8725450Sbrendan 8735450Sbrendan if (newvdevs[i] == NULL) { 8745450Sbrendan /* 8755450Sbrendan * Create new vdev 8765450Sbrendan */ 8775450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 8785450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 8795450Sbrendan ASSERT(vd != NULL); 8805450Sbrendan newvdevs[i] = vd; 8815450Sbrendan 8825450Sbrendan /* 8835450Sbrendan * Commit this vdev as an l2cache device, 8845450Sbrendan * even if it fails to open. 8855450Sbrendan */ 8865450Sbrendan spa_l2cache_add(vd); 8875450Sbrendan 8885450Sbrendan if (vdev_open(vd) != 0) 8895450Sbrendan continue; 8905450Sbrendan 8915450Sbrendan vd->vdev_top = vd; 8925450Sbrendan (void) vdev_validate_aux(vd); 8935450Sbrendan 8945450Sbrendan if (!vdev_is_dead(vd)) { 8955450Sbrendan uint64_t size; 8965450Sbrendan size = vdev_get_rsize(vd); 8975450Sbrendan ASSERT3U(size, >, 0); 8985450Sbrendan if (spa_mode & FWRITE) { 8995450Sbrendan l2arc_add_vdev(spa, vd, 9005450Sbrendan VDEV_LABEL_START_SIZE, 9015450Sbrendan size - VDEV_LABEL_START_SIZE); 9025450Sbrendan } 9035450Sbrendan spa_l2cache_activate(vd); 9045450Sbrendan } 9055450Sbrendan } 9065450Sbrendan } 9075450Sbrendan 9085450Sbrendan /* 9095450Sbrendan * Purge vdevs that were dropped 9105450Sbrendan */ 9115450Sbrendan for (i = 0; i < oldnvdevs; i++) { 9125450Sbrendan uint64_t pool; 9135450Sbrendan 9145450Sbrendan vd = oldvdevs[i]; 9155450Sbrendan if (vd != NULL) { 9165450Sbrendan if (spa_mode & FWRITE && 9175450Sbrendan spa_l2cache_exists(vd->vdev_guid, &pool) && 9185450Sbrendan pool != 0ULL) { 9195450Sbrendan l2arc_remove_vdev(vd); 9205450Sbrendan } 9215450Sbrendan (void) vdev_close(vd); 9225450Sbrendan spa_l2cache_remove(vd); 9235450Sbrendan } 9245450Sbrendan } 9255450Sbrendan 9265450Sbrendan if (oldvdevs) 9275450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 9285450Sbrendan 9295450Sbrendan if (sav->sav_config == NULL) 9305450Sbrendan goto out; 9315450Sbrendan 9325450Sbrendan sav->sav_vdevs = newvdevs; 9335450Sbrendan sav->sav_count = (int)nl2cache; 9345450Sbrendan 9355450Sbrendan /* 9365450Sbrendan * Recompute the stashed list of l2cache devices, with status 9375450Sbrendan * information this time. 9385450Sbrendan */ 9395450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 9405450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 9415450Sbrendan 9425450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 9435450Sbrendan for (i = 0; i < sav->sav_count; i++) 9445450Sbrendan l2cache[i] = vdev_config_generate(spa, 9455450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 9465450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 9475450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 9485450Sbrendan out: 9495450Sbrendan for (i = 0; i < sav->sav_count; i++) 9505450Sbrendan nvlist_free(l2cache[i]); 9515450Sbrendan if (sav->sav_count) 9525450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 9532082Seschrock } 9542082Seschrock 9552082Seschrock static int 9562082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 9572082Seschrock { 9582082Seschrock dmu_buf_t *db; 9592082Seschrock char *packed = NULL; 9602082Seschrock size_t nvsize = 0; 9612082Seschrock int error; 9622082Seschrock *value = NULL; 9632082Seschrock 9642082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 9652082Seschrock nvsize = *(uint64_t *)db->db_data; 9662082Seschrock dmu_buf_rele(db, FTAG); 9672082Seschrock 9682082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 9692082Seschrock error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 9702082Seschrock if (error == 0) 9712082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 9722082Seschrock kmem_free(packed, nvsize); 9732082Seschrock 9742082Seschrock return (error); 9752082Seschrock } 9762082Seschrock 9772082Seschrock /* 9784451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 9794451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 9804451Seschrock */ 9814451Seschrock static void 9824451Seschrock spa_check_removed(vdev_t *vd) 9834451Seschrock { 9844451Seschrock int c; 9854451Seschrock 9864451Seschrock for (c = 0; c < vd->vdev_children; c++) 9874451Seschrock spa_check_removed(vd->vdev_child[c]); 9884451Seschrock 9894451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 9904451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 9914451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 9924451Seschrock } 9934451Seschrock } 9944451Seschrock 9954451Seschrock /* 996789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 9971544Seschrock * source of configuration information. 998789Sahrens */ 999789Sahrens static int 10001544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1001789Sahrens { 1002789Sahrens int error = 0; 1003789Sahrens nvlist_t *nvroot = NULL; 1004789Sahrens vdev_t *rvd; 1005789Sahrens uberblock_t *ub = &spa->spa_uberblock; 10061635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1007789Sahrens uint64_t pool_guid; 10082082Seschrock uint64_t version; 1009789Sahrens zio_t *zio; 10104451Seschrock uint64_t autoreplace = 0; 1011789Sahrens 10121544Seschrock spa->spa_load_state = state; 10131635Sbonwick 1014789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 10151733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 10161544Seschrock error = EINVAL; 10171544Seschrock goto out; 10181544Seschrock } 1019789Sahrens 10202082Seschrock /* 10212082Seschrock * Versioning wasn't explicitly added to the label until later, so if 10222082Seschrock * it's not present treat it as the initial version. 10232082Seschrock */ 10242082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 10254577Sahrens version = SPA_VERSION_INITIAL; 10262082Seschrock 10271733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 10281733Sbonwick &spa->spa_config_txg); 10291733Sbonwick 10301635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 10311544Seschrock spa_guid_exists(pool_guid, 0)) { 10321544Seschrock error = EEXIST; 10331544Seschrock goto out; 10341544Seschrock } 1035789Sahrens 10362174Seschrock spa->spa_load_guid = pool_guid; 10372174Seschrock 1038789Sahrens /* 10392082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 10402082Seschrock * value that will be returned by spa_version() since parsing the 10412082Seschrock * configuration requires knowing the version number. 1042789Sahrens */ 10431544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 10442082Seschrock spa->spa_ubsync.ub_version = version; 10452082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 10461544Seschrock spa_config_exit(spa, FTAG); 1047789Sahrens 10482082Seschrock if (error != 0) 10491544Seschrock goto out; 1050789Sahrens 10511585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1052789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1053789Sahrens 1054789Sahrens /* 1055789Sahrens * Try to open all vdevs, loading each label in the process. 1056789Sahrens */ 10574070Smc142369 error = vdev_open(rvd); 10584070Smc142369 if (error != 0) 10591544Seschrock goto out; 1060789Sahrens 1061789Sahrens /* 10621986Seschrock * Validate the labels for all leaf vdevs. We need to grab the config 10631986Seschrock * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD 10641986Seschrock * flag. 10651986Seschrock */ 10661986Seschrock spa_config_enter(spa, RW_READER, FTAG); 10671986Seschrock error = vdev_validate(rvd); 10681986Seschrock spa_config_exit(spa, FTAG); 10691986Seschrock 10704070Smc142369 if (error != 0) 10711986Seschrock goto out; 10721986Seschrock 10731986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 10741986Seschrock error = ENXIO; 10751986Seschrock goto out; 10761986Seschrock } 10771986Seschrock 10781986Seschrock /* 1079789Sahrens * Find the best uberblock. 1080789Sahrens */ 1081789Sahrens bzero(ub, sizeof (uberblock_t)); 1082789Sahrens 1083789Sahrens zio = zio_root(spa, NULL, NULL, 1084789Sahrens ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1085789Sahrens vdev_uberblock_load(zio, rvd, ub); 1086789Sahrens error = zio_wait(zio); 1087789Sahrens 1088789Sahrens /* 1089789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1090789Sahrens */ 1091789Sahrens if (ub->ub_txg == 0) { 10921760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 10931760Seschrock VDEV_AUX_CORRUPT_DATA); 10941544Seschrock error = ENXIO; 10951544Seschrock goto out; 10961544Seschrock } 10971544Seschrock 10981544Seschrock /* 10991544Seschrock * If the pool is newer than the code, we can't open it. 11001544Seschrock */ 11014577Sahrens if (ub->ub_version > SPA_VERSION) { 11021760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11031760Seschrock VDEV_AUX_VERSION_NEWER); 11041544Seschrock error = ENOTSUP; 11051544Seschrock goto out; 1106789Sahrens } 1107789Sahrens 1108789Sahrens /* 1109789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1110789Sahrens * incomplete configuration. 1111789Sahrens */ 11121732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 11131544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11141544Seschrock VDEV_AUX_BAD_GUID_SUM); 11151544Seschrock error = ENXIO; 11161544Seschrock goto out; 1117789Sahrens } 1118789Sahrens 1119789Sahrens /* 1120789Sahrens * Initialize internal SPA structures. 1121789Sahrens */ 1122789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1123789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1124789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 11251544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 11261544Seschrock if (error) { 11271544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11281544Seschrock VDEV_AUX_CORRUPT_DATA); 11291544Seschrock goto out; 11301544Seschrock } 1131789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1132789Sahrens 11331544Seschrock if (zap_lookup(spa->spa_meta_objset, 1134789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 11351544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 11361544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11371544Seschrock VDEV_AUX_CORRUPT_DATA); 11381544Seschrock error = EIO; 11391544Seschrock goto out; 11401544Seschrock } 1141789Sahrens 1142789Sahrens if (!mosconfig) { 11432082Seschrock nvlist_t *newconfig; 11443975Sek110237 uint64_t hostid; 11452082Seschrock 11462082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 11471544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11481544Seschrock VDEV_AUX_CORRUPT_DATA); 11491544Seschrock error = EIO; 11501544Seschrock goto out; 11511544Seschrock } 1152789Sahrens 11533975Sek110237 if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID, 11543975Sek110237 &hostid) == 0) { 11553975Sek110237 char *hostname; 11563975Sek110237 unsigned long myhostid = 0; 11573975Sek110237 11583975Sek110237 VERIFY(nvlist_lookup_string(newconfig, 11593975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 11603975Sek110237 11613975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 11624178Slling if (hostid != 0 && myhostid != 0 && 11634178Slling (unsigned long)hostid != myhostid) { 11643975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 11653975Sek110237 "loaded as it was last accessed by " 11663975Sek110237 "another system (host: %s hostid: 0x%lx). " 11673975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 11683975Sek110237 spa->spa_name, hostname, 11693975Sek110237 (unsigned long)hostid); 11703975Sek110237 error = EBADF; 11713975Sek110237 goto out; 11723975Sek110237 } 11733975Sek110237 } 11743975Sek110237 1175789Sahrens spa_config_set(spa, newconfig); 1176789Sahrens spa_unload(spa); 1177789Sahrens spa_deactivate(spa); 1178789Sahrens spa_activate(spa); 1179789Sahrens 11801544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 11811544Seschrock } 11821544Seschrock 11831544Seschrock if (zap_lookup(spa->spa_meta_objset, 11841544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 11851544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 11861544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 11871544Seschrock VDEV_AUX_CORRUPT_DATA); 11881544Seschrock error = EIO; 11891544Seschrock goto out; 1190789Sahrens } 1191789Sahrens 11921544Seschrock /* 11932082Seschrock * Load the bit that tells us to use the new accounting function 11942082Seschrock * (raid-z deflation). If we have an older pool, this will not 11952082Seschrock * be present. 11962082Seschrock */ 11972082Seschrock error = zap_lookup(spa->spa_meta_objset, 11982082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 11992082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 12002082Seschrock if (error != 0 && error != ENOENT) { 12012082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12022082Seschrock VDEV_AUX_CORRUPT_DATA); 12032082Seschrock error = EIO; 12042082Seschrock goto out; 12052082Seschrock } 12062082Seschrock 12072082Seschrock /* 12081544Seschrock * Load the persistent error log. If we have an older pool, this will 12091544Seschrock * not be present. 12101544Seschrock */ 12111544Seschrock error = zap_lookup(spa->spa_meta_objset, 12121544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 12131544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 12141807Sbonwick if (error != 0 && error != ENOENT) { 12151544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12161544Seschrock VDEV_AUX_CORRUPT_DATA); 12171544Seschrock error = EIO; 12181544Seschrock goto out; 12191544Seschrock } 12201544Seschrock 12211544Seschrock error = zap_lookup(spa->spa_meta_objset, 12221544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 12231544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 12241544Seschrock if (error != 0 && error != ENOENT) { 12251544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12261544Seschrock VDEV_AUX_CORRUPT_DATA); 12271544Seschrock error = EIO; 12281544Seschrock goto out; 12291544Seschrock } 1230789Sahrens 1231789Sahrens /* 12322926Sek110237 * Load the history object. If we have an older pool, this 12332926Sek110237 * will not be present. 12342926Sek110237 */ 12352926Sek110237 error = zap_lookup(spa->spa_meta_objset, 12362926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 12372926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 12382926Sek110237 if (error != 0 && error != ENOENT) { 12392926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12402926Sek110237 VDEV_AUX_CORRUPT_DATA); 12412926Sek110237 error = EIO; 12422926Sek110237 goto out; 12432926Sek110237 } 12442926Sek110237 12452926Sek110237 /* 12462082Seschrock * Load any hot spares for this pool. 12472082Seschrock */ 12482082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 12495450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 12502082Seschrock if (error != 0 && error != ENOENT) { 12512082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12522082Seschrock VDEV_AUX_CORRUPT_DATA); 12532082Seschrock error = EIO; 12542082Seschrock goto out; 12552082Seschrock } 12562082Seschrock if (error == 0) { 12574577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 12585450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 12595450Sbrendan &spa->spa_spares.sav_config) != 0) { 12602082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12612082Seschrock VDEV_AUX_CORRUPT_DATA); 12622082Seschrock error = EIO; 12632082Seschrock goto out; 12642082Seschrock } 12652082Seschrock 12662082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 12672082Seschrock spa_load_spares(spa); 12682082Seschrock spa_config_exit(spa, FTAG); 12692082Seschrock } 12702082Seschrock 12715450Sbrendan /* 12725450Sbrendan * Load any level 2 ARC devices for this pool. 12735450Sbrendan */ 12745450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 12755450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 12765450Sbrendan &spa->spa_l2cache.sav_object); 12775450Sbrendan if (error != 0 && error != ENOENT) { 12785450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12795450Sbrendan VDEV_AUX_CORRUPT_DATA); 12805450Sbrendan error = EIO; 12815450Sbrendan goto out; 12825450Sbrendan } 12835450Sbrendan if (error == 0) { 12845450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 12855450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 12865450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 12875450Sbrendan vdev_set_state(rvd, B_TRUE, 12885450Sbrendan VDEV_STATE_CANT_OPEN, 12895450Sbrendan VDEV_AUX_CORRUPT_DATA); 12905450Sbrendan error = EIO; 12915450Sbrendan goto out; 12925450Sbrendan } 12935450Sbrendan 12945450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 12955450Sbrendan spa_load_l2cache(spa); 12965450Sbrendan spa_config_exit(spa, FTAG); 12975450Sbrendan } 12985450Sbrendan 12995094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 13004543Smarks 13013912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 13023912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 13033912Slling 13043912Slling if (error && error != ENOENT) { 13053912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13063912Slling VDEV_AUX_CORRUPT_DATA); 13073912Slling error = EIO; 13083912Slling goto out; 13093912Slling } 13103912Slling 13113912Slling if (error == 0) { 13123912Slling (void) zap_lookup(spa->spa_meta_objset, 13133912Slling spa->spa_pool_props_object, 13144451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 13153912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 13164451Seschrock (void) zap_lookup(spa->spa_meta_objset, 13174451Seschrock spa->spa_pool_props_object, 13184451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 13194451Seschrock sizeof (uint64_t), 1, &autoreplace); 13204543Smarks (void) zap_lookup(spa->spa_meta_objset, 13214543Smarks spa->spa_pool_props_object, 13224543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 13234543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 13245329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 13255329Sgw25295 spa->spa_pool_props_object, 13265329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 13275329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 13283912Slling } 13293912Slling 13302082Seschrock /* 13314451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 13324451Seschrock * the ZFS DE that it should not issue any faults for unopenable 13334451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 13344451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 13354451Seschrock * over. 13364451Seschrock */ 13374451Seschrock if (autoreplace) 13384451Seschrock spa_check_removed(spa->spa_root_vdev); 13394451Seschrock 13404451Seschrock /* 13411986Seschrock * Load the vdev state for all toplevel vdevs. 1342789Sahrens */ 13431986Seschrock vdev_load(rvd); 1344789Sahrens 1345789Sahrens /* 1346789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1347789Sahrens */ 13481544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 1349789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 13501544Seschrock spa_config_exit(spa, FTAG); 1351789Sahrens 1352789Sahrens /* 1353789Sahrens * Check the state of the root vdev. If it can't be opened, it 1354789Sahrens * indicates one or more toplevel vdevs are faulted. 1355789Sahrens */ 13561544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 13571544Seschrock error = ENXIO; 13581544Seschrock goto out; 13591544Seschrock } 1360789Sahrens 13611544Seschrock if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { 13621635Sbonwick dmu_tx_t *tx; 13631635Sbonwick int need_update = B_FALSE; 13641585Sbonwick int c; 13651601Sbonwick 13661635Sbonwick /* 13671635Sbonwick * Claim log blocks that haven't been committed yet. 13681635Sbonwick * This must all happen in a single txg. 13691635Sbonwick */ 13701601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1371789Sahrens spa_first_txg(spa)); 13722417Sahrens (void) dmu_objset_find(spa->spa_name, 13732417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1374789Sahrens dmu_tx_commit(tx); 1375789Sahrens 1376789Sahrens spa->spa_sync_on = B_TRUE; 1377789Sahrens txg_sync_start(spa->spa_dsl_pool); 1378789Sahrens 1379789Sahrens /* 1380789Sahrens * Wait for all claims to sync. 1381789Sahrens */ 1382789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 13831585Sbonwick 13841585Sbonwick /* 13851635Sbonwick * If the config cache is stale, or we have uninitialized 13861635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 13871585Sbonwick */ 13881635Sbonwick if (config_cache_txg != spa->spa_config_txg || 13891635Sbonwick state == SPA_LOAD_IMPORT) 13901635Sbonwick need_update = B_TRUE; 13911635Sbonwick 13921635Sbonwick for (c = 0; c < rvd->vdev_children; c++) 13931635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 13941635Sbonwick need_update = B_TRUE; 13951585Sbonwick 13961585Sbonwick /* 13971635Sbonwick * Update the config cache asychronously in case we're the 13981635Sbonwick * root pool, in which case the config cache isn't writable yet. 13991585Sbonwick */ 14001635Sbonwick if (need_update) 14011635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 1402789Sahrens } 1403789Sahrens 14041544Seschrock error = 0; 14051544Seschrock out: 14062082Seschrock if (error && error != EBADF) 14071544Seschrock zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); 14081544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 14091544Seschrock spa->spa_ena = 0; 14101544Seschrock 14111544Seschrock return (error); 1412789Sahrens } 1413789Sahrens 1414789Sahrens /* 1415789Sahrens * Pool Open/Import 1416789Sahrens * 1417789Sahrens * The import case is identical to an open except that the configuration is sent 1418789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1419789Sahrens * case of an open, the pool configuration will exist in the 14204451Seschrock * POOL_STATE_UNINITIALIZED state. 1421789Sahrens * 1422789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1423789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1424789Sahrens * ambiguous state. 1425789Sahrens */ 1426789Sahrens static int 1427789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1428789Sahrens { 1429789Sahrens spa_t *spa; 1430789Sahrens int error; 1431789Sahrens int loaded = B_FALSE; 1432789Sahrens int locked = B_FALSE; 1433789Sahrens 1434789Sahrens *spapp = NULL; 1435789Sahrens 1436789Sahrens /* 1437789Sahrens * As disgusting as this is, we need to support recursive calls to this 1438789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1439789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1440789Sahrens * avoid dsl_dir_open() calling this in the first place. 1441789Sahrens */ 1442789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1443789Sahrens mutex_enter(&spa_namespace_lock); 1444789Sahrens locked = B_TRUE; 1445789Sahrens } 1446789Sahrens 1447789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1448789Sahrens if (locked) 1449789Sahrens mutex_exit(&spa_namespace_lock); 1450789Sahrens return (ENOENT); 1451789Sahrens } 1452789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1453789Sahrens 1454789Sahrens spa_activate(spa); 1455789Sahrens 14561635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1457789Sahrens 1458789Sahrens if (error == EBADF) { 1459789Sahrens /* 14601986Seschrock * If vdev_validate() returns failure (indicated by 14611986Seschrock * EBADF), it indicates that one of the vdevs indicates 14621986Seschrock * that the pool has been exported or destroyed. If 14631986Seschrock * this is the case, the config cache is out of sync and 14641986Seschrock * we should remove the pool from the namespace. 1465789Sahrens */ 14662082Seschrock zfs_post_ok(spa, NULL); 1467789Sahrens spa_unload(spa); 1468789Sahrens spa_deactivate(spa); 1469789Sahrens spa_remove(spa); 1470789Sahrens spa_config_sync(); 1471789Sahrens if (locked) 1472789Sahrens mutex_exit(&spa_namespace_lock); 1473789Sahrens return (ENOENT); 14741544Seschrock } 14751544Seschrock 14761544Seschrock if (error) { 1477789Sahrens /* 1478789Sahrens * We can't open the pool, but we still have useful 1479789Sahrens * information: the state of each vdev after the 1480789Sahrens * attempted vdev_open(). Return this to the user. 1481789Sahrens */ 14821635Sbonwick if (config != NULL && spa->spa_root_vdev != NULL) { 14831635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 1484789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1485789Sahrens B_TRUE); 14861635Sbonwick spa_config_exit(spa, FTAG); 14871635Sbonwick } 1488789Sahrens spa_unload(spa); 1489789Sahrens spa_deactivate(spa); 14901544Seschrock spa->spa_last_open_failed = B_TRUE; 1491789Sahrens if (locked) 1492789Sahrens mutex_exit(&spa_namespace_lock); 1493789Sahrens *spapp = NULL; 1494789Sahrens return (error); 14951544Seschrock } else { 14961544Seschrock zfs_post_ok(spa, NULL); 14971544Seschrock spa->spa_last_open_failed = B_FALSE; 1498789Sahrens } 1499789Sahrens 1500789Sahrens loaded = B_TRUE; 1501789Sahrens } 1502789Sahrens 1503789Sahrens spa_open_ref(spa, tag); 15044451Seschrock 15054451Seschrock /* 15064451Seschrock * If we just loaded the pool, resilver anything that's out of date. 15074451Seschrock */ 15084451Seschrock if (loaded && (spa_mode & FWRITE)) 15094451Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 15104451Seschrock 1511789Sahrens if (locked) 1512789Sahrens mutex_exit(&spa_namespace_lock); 1513789Sahrens 1514789Sahrens *spapp = spa; 1515789Sahrens 1516789Sahrens if (config != NULL) { 15171544Seschrock spa_config_enter(spa, RW_READER, FTAG); 1518789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 15191544Seschrock spa_config_exit(spa, FTAG); 1520789Sahrens } 1521789Sahrens 1522789Sahrens return (0); 1523789Sahrens } 1524789Sahrens 1525789Sahrens int 1526789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1527789Sahrens { 1528789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1529789Sahrens } 1530789Sahrens 15311544Seschrock /* 15321544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 15331544Seschrock * preventing it from being exported or destroyed. 15341544Seschrock */ 15351544Seschrock spa_t * 15361544Seschrock spa_inject_addref(char *name) 15371544Seschrock { 15381544Seschrock spa_t *spa; 15391544Seschrock 15401544Seschrock mutex_enter(&spa_namespace_lock); 15411544Seschrock if ((spa = spa_lookup(name)) == NULL) { 15421544Seschrock mutex_exit(&spa_namespace_lock); 15431544Seschrock return (NULL); 15441544Seschrock } 15451544Seschrock spa->spa_inject_ref++; 15461544Seschrock mutex_exit(&spa_namespace_lock); 15471544Seschrock 15481544Seschrock return (spa); 15491544Seschrock } 15501544Seschrock 15511544Seschrock void 15521544Seschrock spa_inject_delref(spa_t *spa) 15531544Seschrock { 15541544Seschrock mutex_enter(&spa_namespace_lock); 15551544Seschrock spa->spa_inject_ref--; 15561544Seschrock mutex_exit(&spa_namespace_lock); 15571544Seschrock } 15581544Seschrock 15595450Sbrendan /* 15605450Sbrendan * Add spares device information to the nvlist. 15615450Sbrendan */ 15622082Seschrock static void 15632082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 15642082Seschrock { 15652082Seschrock nvlist_t **spares; 15662082Seschrock uint_t i, nspares; 15672082Seschrock nvlist_t *nvroot; 15682082Seschrock uint64_t guid; 15692082Seschrock vdev_stat_t *vs; 15702082Seschrock uint_t vsc; 15713377Seschrock uint64_t pool; 15722082Seschrock 15735450Sbrendan if (spa->spa_spares.sav_count == 0) 15742082Seschrock return; 15752082Seschrock 15762082Seschrock VERIFY(nvlist_lookup_nvlist(config, 15772082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 15785450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 15792082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 15802082Seschrock if (nspares != 0) { 15812082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 15822082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 15832082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 15842082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 15852082Seschrock 15862082Seschrock /* 15872082Seschrock * Go through and find any spares which have since been 15882082Seschrock * repurposed as an active spare. If this is the case, update 15892082Seschrock * their status appropriately. 15902082Seschrock */ 15912082Seschrock for (i = 0; i < nspares; i++) { 15922082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 15932082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 15943377Seschrock if (spa_spare_exists(guid, &pool) && pool != 0ULL) { 15952082Seschrock VERIFY(nvlist_lookup_uint64_array( 15962082Seschrock spares[i], ZPOOL_CONFIG_STATS, 15972082Seschrock (uint64_t **)&vs, &vsc) == 0); 15982082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 15992082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 16002082Seschrock } 16012082Seschrock } 16022082Seschrock } 16032082Seschrock } 16042082Seschrock 16055450Sbrendan /* 16065450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 16075450Sbrendan */ 16085450Sbrendan static void 16095450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 16105450Sbrendan { 16115450Sbrendan nvlist_t **l2cache; 16125450Sbrendan uint_t i, j, nl2cache; 16135450Sbrendan nvlist_t *nvroot; 16145450Sbrendan uint64_t guid; 16155450Sbrendan vdev_t *vd; 16165450Sbrendan vdev_stat_t *vs; 16175450Sbrendan uint_t vsc; 16185450Sbrendan 16195450Sbrendan if (spa->spa_l2cache.sav_count == 0) 16205450Sbrendan return; 16215450Sbrendan 16225450Sbrendan spa_config_enter(spa, RW_READER, FTAG); 16235450Sbrendan 16245450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 16255450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 16265450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 16275450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 16285450Sbrendan if (nl2cache != 0) { 16295450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 16305450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 16315450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 16325450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 16335450Sbrendan 16345450Sbrendan /* 16355450Sbrendan * Update level 2 cache device stats. 16365450Sbrendan */ 16375450Sbrendan 16385450Sbrendan for (i = 0; i < nl2cache; i++) { 16395450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 16405450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 16415450Sbrendan 16425450Sbrendan vd = NULL; 16435450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 16445450Sbrendan if (guid == 16455450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 16465450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 16475450Sbrendan break; 16485450Sbrendan } 16495450Sbrendan } 16505450Sbrendan ASSERT(vd != NULL); 16515450Sbrendan 16525450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 16535450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 16545450Sbrendan vdev_get_stats(vd, vs); 16555450Sbrendan } 16565450Sbrendan } 16575450Sbrendan 16585450Sbrendan spa_config_exit(spa, FTAG); 16595450Sbrendan } 16605450Sbrendan 1661789Sahrens int 16621544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1663789Sahrens { 1664789Sahrens int error; 1665789Sahrens spa_t *spa; 1666789Sahrens 1667789Sahrens *config = NULL; 1668789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1669789Sahrens 16702082Seschrock if (spa && *config != NULL) { 16711544Seschrock VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, 16721544Seschrock spa_get_errlog_size(spa)) == 0); 16731544Seschrock 16742082Seschrock spa_add_spares(spa, *config); 16755450Sbrendan spa_add_l2cache(spa, *config); 16762082Seschrock } 16772082Seschrock 16781544Seschrock /* 16791544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 16801544Seschrock * and call spa_lookup() directly. 16811544Seschrock */ 16821544Seschrock if (altroot) { 16831544Seschrock if (spa == NULL) { 16841544Seschrock mutex_enter(&spa_namespace_lock); 16851544Seschrock spa = spa_lookup(name); 16861544Seschrock if (spa) 16871544Seschrock spa_altroot(spa, altroot, buflen); 16881544Seschrock else 16891544Seschrock altroot[0] = '\0'; 16901544Seschrock spa = NULL; 16911544Seschrock mutex_exit(&spa_namespace_lock); 16921544Seschrock } else { 16931544Seschrock spa_altroot(spa, altroot, buflen); 16941544Seschrock } 16951544Seschrock } 16961544Seschrock 1697789Sahrens if (spa != NULL) 1698789Sahrens spa_close(spa, FTAG); 1699789Sahrens 1700789Sahrens return (error); 1701789Sahrens } 1702789Sahrens 1703789Sahrens /* 17045450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 17055450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 17065450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 17075450Sbrendan * specified, as long as they are well-formed. 17082082Seschrock */ 17092082Seschrock static int 17105450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 17115450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 17125450Sbrendan vdev_labeltype_t label) 17132082Seschrock { 17145450Sbrendan nvlist_t **dev; 17155450Sbrendan uint_t i, ndev; 17162082Seschrock vdev_t *vd; 17172082Seschrock int error; 17182082Seschrock 17192082Seschrock /* 17205450Sbrendan * It's acceptable to have no devs specified. 17212082Seschrock */ 17225450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 17232082Seschrock return (0); 17242082Seschrock 17255450Sbrendan if (ndev == 0) 17262082Seschrock return (EINVAL); 17272082Seschrock 17282082Seschrock /* 17295450Sbrendan * Make sure the pool is formatted with a version that supports this 17305450Sbrendan * device type. 17312082Seschrock */ 17325450Sbrendan if (spa_version(spa) < version) 17332082Seschrock return (ENOTSUP); 17342082Seschrock 17353377Seschrock /* 17365450Sbrendan * Set the pending device list so we correctly handle device in-use 17373377Seschrock * checking. 17383377Seschrock */ 17395450Sbrendan sav->sav_pending = dev; 17405450Sbrendan sav->sav_npending = ndev; 17415450Sbrendan 17425450Sbrendan for (i = 0; i < ndev; i++) { 17435450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 17442082Seschrock mode)) != 0) 17453377Seschrock goto out; 17462082Seschrock 17472082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 17482082Seschrock vdev_free(vd); 17493377Seschrock error = EINVAL; 17503377Seschrock goto out; 17512082Seschrock } 17522082Seschrock 17535450Sbrendan /* 17545450Sbrendan * The L2ARC currently only supports disk devices. 17555450Sbrendan */ 17565450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 17575450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 17585450Sbrendan error = ENOTBLK; 17595450Sbrendan goto out; 17605450Sbrendan } 17615450Sbrendan 17622082Seschrock vd->vdev_top = vd; 17633377Seschrock 17643377Seschrock if ((error = vdev_open(vd)) == 0 && 17655450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 17665450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 17673377Seschrock vd->vdev_guid) == 0); 17682082Seschrock } 17692082Seschrock 17702082Seschrock vdev_free(vd); 17713377Seschrock 17725450Sbrendan if (error && 17735450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 17743377Seschrock goto out; 17753377Seschrock else 17763377Seschrock error = 0; 17772082Seschrock } 17782082Seschrock 17793377Seschrock out: 17805450Sbrendan sav->sav_pending = NULL; 17815450Sbrendan sav->sav_npending = 0; 17823377Seschrock return (error); 17832082Seschrock } 17842082Seschrock 17855450Sbrendan static int 17865450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 17875450Sbrendan { 17885450Sbrendan int error; 17895450Sbrendan 17905450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 17915450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 17925450Sbrendan VDEV_LABEL_SPARE)) != 0) { 17935450Sbrendan return (error); 17945450Sbrendan } 17955450Sbrendan 17965450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 17975450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 17985450Sbrendan VDEV_LABEL_L2CACHE)); 17995450Sbrendan } 18005450Sbrendan 18015450Sbrendan static void 18025450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 18035450Sbrendan const char *config) 18045450Sbrendan { 18055450Sbrendan int i; 18065450Sbrendan 18075450Sbrendan if (sav->sav_config != NULL) { 18085450Sbrendan nvlist_t **olddevs; 18095450Sbrendan uint_t oldndevs; 18105450Sbrendan nvlist_t **newdevs; 18115450Sbrendan 18125450Sbrendan /* 18135450Sbrendan * Generate new dev list by concatentating with the 18145450Sbrendan * current dev list. 18155450Sbrendan */ 18165450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 18175450Sbrendan &olddevs, &oldndevs) == 0); 18185450Sbrendan 18195450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 18205450Sbrendan (ndevs + oldndevs), KM_SLEEP); 18215450Sbrendan for (i = 0; i < oldndevs; i++) 18225450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 18235450Sbrendan KM_SLEEP) == 0); 18245450Sbrendan for (i = 0; i < ndevs; i++) 18255450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 18265450Sbrendan KM_SLEEP) == 0); 18275450Sbrendan 18285450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 18295450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 18305450Sbrendan 18315450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 18325450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 18335450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 18345450Sbrendan nvlist_free(newdevs[i]); 18355450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 18365450Sbrendan } else { 18375450Sbrendan /* 18385450Sbrendan * Generate a new dev list. 18395450Sbrendan */ 18405450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 18415450Sbrendan KM_SLEEP) == 0); 18425450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 18435450Sbrendan devs, ndevs) == 0); 18445450Sbrendan } 18455450Sbrendan } 18465450Sbrendan 18475450Sbrendan /* 18485450Sbrendan * Stop and drop level 2 ARC devices 18495450Sbrendan */ 18505450Sbrendan void 18515450Sbrendan spa_l2cache_drop(spa_t *spa) 18525450Sbrendan { 18535450Sbrendan vdev_t *vd; 18545450Sbrendan int i; 18555450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 18565450Sbrendan 18575450Sbrendan for (i = 0; i < sav->sav_count; i++) { 18585450Sbrendan uint64_t pool; 18595450Sbrendan 18605450Sbrendan vd = sav->sav_vdevs[i]; 18615450Sbrendan ASSERT(vd != NULL); 18625450Sbrendan 18635450Sbrendan if (spa_mode & FWRITE && 18645450Sbrendan spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL) { 18655450Sbrendan l2arc_remove_vdev(vd); 18665450Sbrendan } 18675450Sbrendan if (vd->vdev_isl2cache) 18685450Sbrendan spa_l2cache_remove(vd); 18695450Sbrendan vdev_clear_stats(vd); 18705450Sbrendan (void) vdev_close(vd); 18715450Sbrendan } 18725450Sbrendan } 18735450Sbrendan 18742082Seschrock /* 1875789Sahrens * Pool Creation 1876789Sahrens */ 1877789Sahrens int 18785094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 18794715Sek110237 const char *history_str) 1880789Sahrens { 1881789Sahrens spa_t *spa; 18825094Slling char *altroot = NULL; 18831635Sbonwick vdev_t *rvd; 1884789Sahrens dsl_pool_t *dp; 1885789Sahrens dmu_tx_t *tx; 18862082Seschrock int c, error = 0; 1887789Sahrens uint64_t txg = TXG_INITIAL; 18885450Sbrendan nvlist_t **spares, **l2cache; 18895450Sbrendan uint_t nspares, nl2cache; 18905094Slling uint64_t version; 1891789Sahrens 1892789Sahrens /* 1893789Sahrens * If this pool already exists, return failure. 1894789Sahrens */ 1895789Sahrens mutex_enter(&spa_namespace_lock); 1896789Sahrens if (spa_lookup(pool) != NULL) { 1897789Sahrens mutex_exit(&spa_namespace_lock); 1898789Sahrens return (EEXIST); 1899789Sahrens } 1900789Sahrens 1901789Sahrens /* 1902789Sahrens * Allocate a new spa_t structure. 1903789Sahrens */ 19045094Slling (void) nvlist_lookup_string(props, 19055094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 19061635Sbonwick spa = spa_add(pool, altroot); 1907789Sahrens spa_activate(spa); 1908789Sahrens 1909789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 19105094Slling 19115094Slling if (props && (error = spa_prop_validate(spa, props))) { 19125094Slling spa_unload(spa); 19135094Slling spa_deactivate(spa); 19145094Slling spa_remove(spa); 19155094Slling return (error); 19165094Slling } 19175094Slling 19185094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 19195094Slling &version) != 0) 19205094Slling version = SPA_VERSION; 19215094Slling ASSERT(version <= SPA_VERSION); 19225094Slling spa->spa_uberblock.ub_version = version; 1923789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1924789Sahrens 19251635Sbonwick /* 19261635Sbonwick * Create the root vdev. 19271635Sbonwick */ 19281635Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 19291635Sbonwick 19302082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 19312082Seschrock 19322082Seschrock ASSERT(error != 0 || rvd != NULL); 19332082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 19342082Seschrock 19352082Seschrock if (error == 0 && rvd->vdev_children == 0) 19361635Sbonwick error = EINVAL; 19372082Seschrock 19382082Seschrock if (error == 0 && 19392082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 19405450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 19412082Seschrock VDEV_ALLOC_ADD)) == 0) { 19422082Seschrock for (c = 0; c < rvd->vdev_children; c++) 19432082Seschrock vdev_init(rvd->vdev_child[c], txg); 19442082Seschrock vdev_config_dirty(rvd); 19451635Sbonwick } 19461635Sbonwick 19471635Sbonwick spa_config_exit(spa, FTAG); 1948789Sahrens 19492082Seschrock if (error != 0) { 1950789Sahrens spa_unload(spa); 1951789Sahrens spa_deactivate(spa); 1952789Sahrens spa_remove(spa); 1953789Sahrens mutex_exit(&spa_namespace_lock); 1954789Sahrens return (error); 1955789Sahrens } 1956789Sahrens 19572082Seschrock /* 19582082Seschrock * Get the list of spares, if specified. 19592082Seschrock */ 19602082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 19612082Seschrock &spares, &nspares) == 0) { 19625450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 19632082Seschrock KM_SLEEP) == 0); 19645450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 19652082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 19662082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 19672082Seschrock spa_load_spares(spa); 19682082Seschrock spa_config_exit(spa, FTAG); 19695450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 19705450Sbrendan } 19715450Sbrendan 19725450Sbrendan /* 19735450Sbrendan * Get the list of level 2 cache devices, if specified. 19745450Sbrendan */ 19755450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 19765450Sbrendan &l2cache, &nl2cache) == 0) { 19775450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 19785450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 19795450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 19805450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 19815450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 19825450Sbrendan spa_load_l2cache(spa); 19835450Sbrendan spa_config_exit(spa, FTAG); 19845450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 19852082Seschrock } 19862082Seschrock 1987789Sahrens spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); 1988789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 1989789Sahrens 1990789Sahrens tx = dmu_tx_create_assigned(dp, txg); 1991789Sahrens 1992789Sahrens /* 1993789Sahrens * Create the pool config object. 1994789Sahrens */ 1995789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 1996789Sahrens DMU_OT_PACKED_NVLIST, 1 << 14, 1997789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 1998789Sahrens 19991544Seschrock if (zap_add(spa->spa_meta_objset, 2000789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 20011544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 20021544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 20031544Seschrock } 2004789Sahrens 20055094Slling /* Newly created pools with the right version are always deflated. */ 20065094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 20075094Slling spa->spa_deflate = TRUE; 20085094Slling if (zap_add(spa->spa_meta_objset, 20095094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 20105094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 20115094Slling cmn_err(CE_PANIC, "failed to add deflate"); 20125094Slling } 20132082Seschrock } 20142082Seschrock 2015789Sahrens /* 2016789Sahrens * Create the deferred-free bplist object. Turn off compression 2017789Sahrens * because sync-to-convergence takes longer if the blocksize 2018789Sahrens * keeps changing. 2019789Sahrens */ 2020789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2021789Sahrens 1 << 14, tx); 2022789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2023789Sahrens ZIO_COMPRESS_OFF, tx); 2024789Sahrens 20251544Seschrock if (zap_add(spa->spa_meta_objset, 2026789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 20271544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 20281544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 20291544Seschrock } 2030789Sahrens 20312926Sek110237 /* 20322926Sek110237 * Create the pool's history object. 20332926Sek110237 */ 20345094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 20355094Slling spa_history_create_obj(spa, tx); 20365094Slling 20375094Slling /* 20385094Slling * Set pool properties. 20395094Slling */ 20405094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 20415094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 20425329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 20435094Slling if (props) 20445094Slling spa_sync_props(spa, props, CRED(), tx); 20452926Sek110237 2046789Sahrens dmu_tx_commit(tx); 2047789Sahrens 2048789Sahrens spa->spa_sync_on = B_TRUE; 2049789Sahrens txg_sync_start(spa->spa_dsl_pool); 2050789Sahrens 2051789Sahrens /* 2052789Sahrens * We explicitly wait for the first transaction to complete so that our 2053789Sahrens * bean counters are appropriately updated. 2054789Sahrens */ 2055789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2056789Sahrens 2057789Sahrens spa_config_sync(); 2058789Sahrens 20595094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 20604715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 20614715Sek110237 2062789Sahrens mutex_exit(&spa_namespace_lock); 2063789Sahrens 2064789Sahrens return (0); 2065789Sahrens } 2066789Sahrens 2067789Sahrens /* 2068789Sahrens * Import the given pool into the system. We set up the necessary spa_t and 2069789Sahrens * then call spa_load() to do the dirty work. 2070789Sahrens */ 2071789Sahrens int 20725094Slling spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2073789Sahrens { 2074789Sahrens spa_t *spa; 20755094Slling char *altroot = NULL; 2076789Sahrens int error; 20772082Seschrock nvlist_t *nvroot; 20785450Sbrendan nvlist_t **spares, **l2cache; 20795450Sbrendan uint_t nspares, nl2cache; 2080789Sahrens 2081789Sahrens /* 2082789Sahrens * If a pool with this name exists, return failure. 2083789Sahrens */ 2084789Sahrens mutex_enter(&spa_namespace_lock); 2085789Sahrens if (spa_lookup(pool) != NULL) { 2086789Sahrens mutex_exit(&spa_namespace_lock); 2087789Sahrens return (EEXIST); 2088789Sahrens } 2089789Sahrens 2090789Sahrens /* 20911635Sbonwick * Create and initialize the spa structure. 2092789Sahrens */ 20935094Slling (void) nvlist_lookup_string(props, 20945094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 20951635Sbonwick spa = spa_add(pool, altroot); 2096789Sahrens spa_activate(spa); 2097789Sahrens 2098789Sahrens /* 20991635Sbonwick * Pass off the heavy lifting to spa_load(). 21001732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 21011732Sbonwick * is actually the one to trust when doing an import. 21021601Sbonwick */ 21031732Sbonwick error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 2104789Sahrens 21052082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 21062082Seschrock /* 21072082Seschrock * Toss any existing sparelist, as it doesn't have any validity anymore, 21082082Seschrock * and conflicts with spa_has_spare(). 21092082Seschrock */ 21105450Sbrendan if (spa->spa_spares.sav_config) { 21115450Sbrendan nvlist_free(spa->spa_spares.sav_config); 21125450Sbrendan spa->spa_spares.sav_config = NULL; 21132082Seschrock spa_load_spares(spa); 21142082Seschrock } 21155450Sbrendan if (spa->spa_l2cache.sav_config) { 21165450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 21175450Sbrendan spa->spa_l2cache.sav_config = NULL; 21185450Sbrendan spa_load_l2cache(spa); 21195450Sbrendan } 21202082Seschrock 21212082Seschrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 21222082Seschrock &nvroot) == 0); 21235450Sbrendan if (error == 0) 21245450Sbrendan error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); 21255450Sbrendan if (error == 0) 21265450Sbrendan error = spa_validate_aux(spa, nvroot, -1ULL, 21275450Sbrendan VDEV_ALLOC_L2CACHE); 21282082Seschrock spa_config_exit(spa, FTAG); 21292082Seschrock 21305094Slling if (error != 0 || (props && (error = spa_prop_set(spa, props)))) { 2131789Sahrens spa_unload(spa); 2132789Sahrens spa_deactivate(spa); 2133789Sahrens spa_remove(spa); 2134789Sahrens mutex_exit(&spa_namespace_lock); 2135789Sahrens return (error); 2136789Sahrens } 2137789Sahrens 21381635Sbonwick /* 21395450Sbrendan * Override any spares and level 2 cache devices as specified by 21405450Sbrendan * the user, as these may have correct device names/devids, etc. 21412082Seschrock */ 21422082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 21432082Seschrock &spares, &nspares) == 0) { 21445450Sbrendan if (spa->spa_spares.sav_config) 21455450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, 21462082Seschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 21472082Seschrock else 21485450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 21492082Seschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 21505450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 21512082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 21522082Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 21532082Seschrock spa_load_spares(spa); 21542082Seschrock spa_config_exit(spa, FTAG); 21555450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 21565450Sbrendan } 21575450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 21585450Sbrendan &l2cache, &nl2cache) == 0) { 21595450Sbrendan if (spa->spa_l2cache.sav_config) 21605450Sbrendan VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 21615450Sbrendan ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 21625450Sbrendan else 21635450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 21645450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 21655450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 21665450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 21675450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 21685450Sbrendan spa_load_l2cache(spa); 21695450Sbrendan spa_config_exit(spa, FTAG); 21705450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 21712082Seschrock } 21722082Seschrock 21732082Seschrock /* 21741635Sbonwick * Update the config cache to include the newly-imported pool. 21751635Sbonwick */ 21764627Sck153898 if (spa_mode & FWRITE) 21774627Sck153898 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 21781635Sbonwick 2179789Sahrens /* 2180789Sahrens * Resilver anything that's out of date. 2181789Sahrens */ 2182789Sahrens if (spa_mode & FWRITE) 2183789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 2184789Sahrens 21854451Seschrock mutex_exit(&spa_namespace_lock); 21864451Seschrock 2187789Sahrens return (0); 2188789Sahrens } 2189789Sahrens 2190789Sahrens /* 2191789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2192789Sahrens * to get the vdev stats associated with the imported devices. 2193789Sahrens */ 2194789Sahrens #define TRYIMPORT_NAME "$import" 2195789Sahrens 2196789Sahrens nvlist_t * 2197789Sahrens spa_tryimport(nvlist_t *tryconfig) 2198789Sahrens { 2199789Sahrens nvlist_t *config = NULL; 2200789Sahrens char *poolname; 2201789Sahrens spa_t *spa; 2202789Sahrens uint64_t state; 2203789Sahrens 2204789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2205789Sahrens return (NULL); 2206789Sahrens 2207789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2208789Sahrens return (NULL); 2209789Sahrens 22101635Sbonwick /* 22111635Sbonwick * Create and initialize the spa structure. 22121635Sbonwick */ 2213789Sahrens mutex_enter(&spa_namespace_lock); 22141635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 2215789Sahrens spa_activate(spa); 2216789Sahrens 2217789Sahrens /* 22181635Sbonwick * Pass off the heavy lifting to spa_load(). 22191732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 22201732Sbonwick * is actually the one to trust when doing an import. 2221789Sahrens */ 22221732Sbonwick (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2223789Sahrens 2224789Sahrens /* 2225789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2226789Sahrens */ 2227789Sahrens if (spa->spa_root_vdev != NULL) { 22281635Sbonwick spa_config_enter(spa, RW_READER, FTAG); 2229789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 22301635Sbonwick spa_config_exit(spa, FTAG); 2231789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2232789Sahrens poolname) == 0); 2233789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2234789Sahrens state) == 0); 22353975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 22363975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 22372082Seschrock 22382082Seschrock /* 22395450Sbrendan * Add the list of hot spares and level 2 cache devices. 22402082Seschrock */ 22412082Seschrock spa_add_spares(spa, config); 22425450Sbrendan spa_add_l2cache(spa, config); 2243789Sahrens } 2244789Sahrens 2245789Sahrens spa_unload(spa); 2246789Sahrens spa_deactivate(spa); 2247789Sahrens spa_remove(spa); 2248789Sahrens mutex_exit(&spa_namespace_lock); 2249789Sahrens 2250789Sahrens return (config); 2251789Sahrens } 2252789Sahrens 2253789Sahrens /* 2254789Sahrens * Pool export/destroy 2255789Sahrens * 2256789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2257789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2258789Sahrens * update the pool state and sync all the labels to disk, removing the 2259789Sahrens * configuration from the cache afterwards. 2260789Sahrens */ 2261789Sahrens static int 22621775Sbillm spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) 2263789Sahrens { 2264789Sahrens spa_t *spa; 2265789Sahrens 22661775Sbillm if (oldconfig) 22671775Sbillm *oldconfig = NULL; 22681775Sbillm 2269789Sahrens if (!(spa_mode & FWRITE)) 2270789Sahrens return (EROFS); 2271789Sahrens 2272789Sahrens mutex_enter(&spa_namespace_lock); 2273789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2274789Sahrens mutex_exit(&spa_namespace_lock); 2275789Sahrens return (ENOENT); 2276789Sahrens } 2277789Sahrens 2278789Sahrens /* 22791544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 22801544Seschrock * reacquire the namespace lock, and see if we can export. 22811544Seschrock */ 22821544Seschrock spa_open_ref(spa, FTAG); 22831544Seschrock mutex_exit(&spa_namespace_lock); 22841544Seschrock spa_async_suspend(spa); 22851544Seschrock mutex_enter(&spa_namespace_lock); 22861544Seschrock spa_close(spa, FTAG); 22871544Seschrock 22881544Seschrock /* 2289789Sahrens * The pool will be in core if it's openable, 2290789Sahrens * in which case we can modify its state. 2291789Sahrens */ 2292789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2293789Sahrens /* 2294789Sahrens * Objsets may be open only because they're dirty, so we 2295789Sahrens * have to force it to sync before checking spa_refcnt. 2296789Sahrens */ 2297789Sahrens spa_scrub_suspend(spa); 2298789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2299789Sahrens 23001544Seschrock /* 23011544Seschrock * A pool cannot be exported or destroyed if there are active 23021544Seschrock * references. If we are resetting a pool, allow references by 23031544Seschrock * fault injection handlers. 23041544Seschrock */ 23051544Seschrock if (!spa_refcount_zero(spa) || 23061544Seschrock (spa->spa_inject_ref != 0 && 23071544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 2308789Sahrens spa_scrub_resume(spa); 23091544Seschrock spa_async_resume(spa); 2310789Sahrens mutex_exit(&spa_namespace_lock); 2311789Sahrens return (EBUSY); 2312789Sahrens } 2313789Sahrens 2314789Sahrens spa_scrub_resume(spa); 2315789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 2316789Sahrens 2317789Sahrens /* 2318789Sahrens * We want this to be reflected on every label, 2319789Sahrens * so mark them all dirty. spa_unload() will do the 2320789Sahrens * final sync that pushes these changes out. 2321789Sahrens */ 23221544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 23231601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 23241544Seschrock spa->spa_state = new_state; 23251635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 23261544Seschrock vdev_config_dirty(spa->spa_root_vdev); 23271601Sbonwick spa_config_exit(spa, FTAG); 23281544Seschrock } 2329789Sahrens } 2330789Sahrens 23314451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 23324451Seschrock 2333789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2334789Sahrens spa_unload(spa); 2335789Sahrens spa_deactivate(spa); 2336789Sahrens } 2337789Sahrens 23381775Sbillm if (oldconfig && spa->spa_config) 23391775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 23401775Sbillm 23411544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 23425363Seschrock spa_config_check(spa->spa_config_dir, 23435363Seschrock spa->spa_config_file); 23441544Seschrock spa_remove(spa); 23451544Seschrock spa_config_sync(); 23461544Seschrock } 2347789Sahrens mutex_exit(&spa_namespace_lock); 2348789Sahrens 2349789Sahrens return (0); 2350789Sahrens } 2351789Sahrens 2352789Sahrens /* 2353789Sahrens * Destroy a storage pool. 2354789Sahrens */ 2355789Sahrens int 2356789Sahrens spa_destroy(char *pool) 2357789Sahrens { 23581775Sbillm return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); 2359789Sahrens } 2360789Sahrens 2361789Sahrens /* 2362789Sahrens * Export a storage pool. 2363789Sahrens */ 2364789Sahrens int 23651775Sbillm spa_export(char *pool, nvlist_t **oldconfig) 2366789Sahrens { 23671775Sbillm return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); 2368789Sahrens } 2369789Sahrens 2370789Sahrens /* 23711544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 23721544Seschrock * from the namespace in any way. 23731544Seschrock */ 23741544Seschrock int 23751544Seschrock spa_reset(char *pool) 23761544Seschrock { 23771775Sbillm return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); 23781544Seschrock } 23791544Seschrock 23801544Seschrock 23811544Seschrock /* 2382789Sahrens * ========================================================================== 2383789Sahrens * Device manipulation 2384789Sahrens * ========================================================================== 2385789Sahrens */ 2386789Sahrens 2387789Sahrens /* 23884527Sperrin * Add a device to a storage pool. 2389789Sahrens */ 2390789Sahrens int 2391789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2392789Sahrens { 2393789Sahrens uint64_t txg; 23941635Sbonwick int c, error; 2395789Sahrens vdev_t *rvd = spa->spa_root_vdev; 23961585Sbonwick vdev_t *vd, *tvd; 23975450Sbrendan nvlist_t **spares, **l2cache; 23985450Sbrendan uint_t nspares, nl2cache; 2399789Sahrens 2400789Sahrens txg = spa_vdev_enter(spa); 2401789Sahrens 24022082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 24032082Seschrock VDEV_ALLOC_ADD)) != 0) 24042082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 24052082Seschrock 24063377Seschrock spa->spa_pending_vdev = vd; 2407789Sahrens 24085450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 24095450Sbrendan &nspares) != 0) 24102082Seschrock nspares = 0; 24112082Seschrock 24125450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 24135450Sbrendan &nl2cache) != 0) 24145450Sbrendan nl2cache = 0; 24155450Sbrendan 24165450Sbrendan if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) { 24173377Seschrock spa->spa_pending_vdev = NULL; 24182082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 24193377Seschrock } 24202082Seschrock 24212082Seschrock if (vd->vdev_children != 0) { 24223377Seschrock if ((error = vdev_create(vd, txg, B_FALSE)) != 0) { 24233377Seschrock spa->spa_pending_vdev = NULL; 24242082Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 24252082Seschrock } 24262082Seschrock } 24272082Seschrock 24283377Seschrock /* 24295450Sbrendan * We must validate the spares and l2cache devices after checking the 24305450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 24313377Seschrock */ 24325450Sbrendan if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) { 24333377Seschrock spa->spa_pending_vdev = NULL; 24343377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 24353377Seschrock } 24363377Seschrock 24373377Seschrock spa->spa_pending_vdev = NULL; 24383377Seschrock 24393377Seschrock /* 24403377Seschrock * Transfer each new top-level vdev from vd to rvd. 24413377Seschrock */ 24423377Seschrock for (c = 0; c < vd->vdev_children; c++) { 24433377Seschrock tvd = vd->vdev_child[c]; 24443377Seschrock vdev_remove_child(vd, tvd); 24453377Seschrock tvd->vdev_id = rvd->vdev_children; 24463377Seschrock vdev_add_child(rvd, tvd); 24473377Seschrock vdev_config_dirty(tvd); 24483377Seschrock } 24493377Seschrock 24502082Seschrock if (nspares != 0) { 24515450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 24525450Sbrendan ZPOOL_CONFIG_SPARES); 24532082Seschrock spa_load_spares(spa); 24545450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 24555450Sbrendan } 24565450Sbrendan 24575450Sbrendan if (nl2cache != 0) { 24585450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 24595450Sbrendan ZPOOL_CONFIG_L2CACHE); 24605450Sbrendan spa_load_l2cache(spa); 24615450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2462789Sahrens } 2463789Sahrens 2464789Sahrens /* 24651585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 24661585Sbonwick * If other threads start allocating from these vdevs before we 24671585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 24681585Sbonwick * fail to open the pool because there are DVAs that the config cache 24691585Sbonwick * can't translate. Therefore, we first add the vdevs without 24701585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 24711635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 24721585Sbonwick * 24731585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 24741585Sbonwick * if we lose power at any point in this sequence, the remaining 24751585Sbonwick * steps will be completed the next time we load the pool. 2476789Sahrens */ 24771635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 24781585Sbonwick 24791635Sbonwick mutex_enter(&spa_namespace_lock); 24801635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 24811635Sbonwick mutex_exit(&spa_namespace_lock); 2482789Sahrens 24831635Sbonwick return (0); 2484789Sahrens } 2485789Sahrens 2486789Sahrens /* 2487789Sahrens * Attach a device to a mirror. The arguments are the path to any device 2488789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 2489789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 2490789Sahrens * 2491789Sahrens * If 'replacing' is specified, the new device is intended to replace the 2492789Sahrens * existing device; in this case the two devices are made into their own 24934451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 2494789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 2495789Sahrens * extra rules: you can't attach to it after it's been created, and upon 2496789Sahrens * completion of resilvering, the first disk (the one being replaced) 2497789Sahrens * is automatically detached. 2498789Sahrens */ 2499789Sahrens int 25001544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 2501789Sahrens { 2502789Sahrens uint64_t txg, open_txg; 2503789Sahrens int error; 2504789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2505789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 25062082Seschrock vdev_ops_t *pvops; 25074527Sperrin int is_log; 2508789Sahrens 2509789Sahrens txg = spa_vdev_enter(spa); 2510789Sahrens 25111544Seschrock oldvd = vdev_lookup_by_guid(rvd, guid); 2512789Sahrens 2513789Sahrens if (oldvd == NULL) 2514789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2515789Sahrens 25161585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 25171585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 25181585Sbonwick 2519789Sahrens pvd = oldvd->vdev_parent; 2520789Sahrens 25212082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 25224451Seschrock VDEV_ALLOC_ADD)) != 0) 25234451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 25244451Seschrock 25254451Seschrock if (newrootvd->vdev_children != 1) 2526789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2527789Sahrens 2528789Sahrens newvd = newrootvd->vdev_child[0]; 2529789Sahrens 2530789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 2531789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2532789Sahrens 25332082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 2534789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 2535789Sahrens 25364527Sperrin /* 25374527Sperrin * Spares can't replace logs 25384527Sperrin */ 25394527Sperrin is_log = oldvd->vdev_islog; 25404527Sperrin if (is_log && newvd->vdev_isspare) 25414527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25424527Sperrin 25432082Seschrock if (!replacing) { 25442082Seschrock /* 25452082Seschrock * For attach, the only allowable parent is a mirror or the root 25462082Seschrock * vdev. 25472082Seschrock */ 25482082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 25492082Seschrock pvd->vdev_ops != &vdev_root_ops) 25502082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25512082Seschrock 25522082Seschrock pvops = &vdev_mirror_ops; 25532082Seschrock } else { 25542082Seschrock /* 25552082Seschrock * Active hot spares can only be replaced by inactive hot 25562082Seschrock * spares. 25572082Seschrock */ 25582082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 25592082Seschrock pvd->vdev_child[1] == oldvd && 25602082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 25612082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25622082Seschrock 25632082Seschrock /* 25642082Seschrock * If the source is a hot spare, and the parent isn't already a 25652082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 25663377Seschrock * want to create a replacing vdev. The user is not allowed to 25673377Seschrock * attach to a spared vdev child unless the 'isspare' state is 25683377Seschrock * the same (spare replaces spare, non-spare replaces 25693377Seschrock * non-spare). 25702082Seschrock */ 25712082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 25722082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25733377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 25743377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 25753377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 25762082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 25772082Seschrock newvd->vdev_isspare) 25782082Seschrock pvops = &vdev_spare_ops; 25792082Seschrock else 25802082Seschrock pvops = &vdev_replacing_ops; 25812082Seschrock } 25822082Seschrock 25831175Slling /* 25841175Slling * Compare the new device size with the replaceable/attachable 25851175Slling * device size. 25861175Slling */ 25871175Slling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 2588789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 2589789Sahrens 25901732Sbonwick /* 25911732Sbonwick * The new device cannot have a higher alignment requirement 25921732Sbonwick * than the top-level vdev. 25931732Sbonwick */ 25941732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 2595789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 2596789Sahrens 2597789Sahrens /* 2598789Sahrens * If this is an in-place replacement, update oldvd's path and devid 2599789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 2600789Sahrens */ 2601789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 2602789Sahrens spa_strfree(oldvd->vdev_path); 2603789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 2604789Sahrens KM_SLEEP); 2605789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 2606789Sahrens newvd->vdev_path, "old"); 2607789Sahrens if (oldvd->vdev_devid != NULL) { 2608789Sahrens spa_strfree(oldvd->vdev_devid); 2609789Sahrens oldvd->vdev_devid = NULL; 2610789Sahrens } 2611789Sahrens } 2612789Sahrens 2613789Sahrens /* 26142082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 26152082Seschrock * mirror/replacing/spare vdev above oldvd. 2616789Sahrens */ 2617789Sahrens if (pvd->vdev_ops != pvops) 2618789Sahrens pvd = vdev_add_parent(oldvd, pvops); 2619789Sahrens 2620789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 2621789Sahrens ASSERT(pvd->vdev_ops == pvops); 2622789Sahrens ASSERT(oldvd->vdev_parent == pvd); 2623789Sahrens 2624789Sahrens /* 2625789Sahrens * Extract the new device from its root and add it to pvd. 2626789Sahrens */ 2627789Sahrens vdev_remove_child(newrootvd, newvd); 2628789Sahrens newvd->vdev_id = pvd->vdev_children; 2629789Sahrens vdev_add_child(pvd, newvd); 2630789Sahrens 26311544Seschrock /* 26321544Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 26331544Seschrock * the addition of newvd may have decreased our parent's asize. 26341544Seschrock */ 26351544Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 26361544Seschrock 2637789Sahrens tvd = newvd->vdev_top; 2638789Sahrens ASSERT(pvd->vdev_top == tvd); 2639789Sahrens ASSERT(tvd->vdev_parent == rvd); 2640789Sahrens 2641789Sahrens vdev_config_dirty(tvd); 2642789Sahrens 2643789Sahrens /* 2644789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 2645789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 2646789Sahrens */ 2647789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 2648789Sahrens 2649789Sahrens mutex_enter(&newvd->vdev_dtl_lock); 2650789Sahrens space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, 2651789Sahrens open_txg - TXG_INITIAL + 1); 2652789Sahrens mutex_exit(&newvd->vdev_dtl_lock); 2653789Sahrens 26543377Seschrock if (newvd->vdev_isspare) 26553377Seschrock spa_spare_activate(newvd); 26561544Seschrock 2657789Sahrens /* 2658789Sahrens * Mark newvd's DTL dirty in this txg. 2659789Sahrens */ 26601732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 2661789Sahrens 2662789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 2663789Sahrens 2664789Sahrens /* 26654451Seschrock * Kick off a resilver to update newvd. We need to grab the namespace 26664451Seschrock * lock because spa_scrub() needs to post a sysevent with the pool name. 2667789Sahrens */ 26684451Seschrock mutex_enter(&spa_namespace_lock); 2669789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 26704451Seschrock mutex_exit(&spa_namespace_lock); 2671789Sahrens 2672789Sahrens return (0); 2673789Sahrens } 2674789Sahrens 2675789Sahrens /* 2676789Sahrens * Detach a device from a mirror or replacing vdev. 2677789Sahrens * If 'replace_done' is specified, only detach if the parent 2678789Sahrens * is a replacing vdev. 2679789Sahrens */ 2680789Sahrens int 26811544Seschrock spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) 2682789Sahrens { 2683789Sahrens uint64_t txg; 2684789Sahrens int c, t, error; 2685789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2686789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 26872082Seschrock boolean_t unspare = B_FALSE; 26882082Seschrock uint64_t unspare_guid; 2689789Sahrens 2690789Sahrens txg = spa_vdev_enter(spa); 2691789Sahrens 26921544Seschrock vd = vdev_lookup_by_guid(rvd, guid); 2693789Sahrens 2694789Sahrens if (vd == NULL) 2695789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2696789Sahrens 26971585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 26981585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 26991585Sbonwick 2700789Sahrens pvd = vd->vdev_parent; 2701789Sahrens 2702789Sahrens /* 2703789Sahrens * If replace_done is specified, only remove this device if it's 27042082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 27052082Seschrock * disk can be removed. 2706789Sahrens */ 27072082Seschrock if (replace_done) { 27082082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 27092082Seschrock if (vd->vdev_id != 0) 27102082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 27112082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 27122082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 27132082Seschrock } 27142082Seschrock } 27152082Seschrock 27162082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 27174577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 2718789Sahrens 2719789Sahrens /* 27202082Seschrock * Only mirror, replacing, and spare vdevs support detach. 2721789Sahrens */ 2722789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 27232082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 27242082Seschrock pvd->vdev_ops != &vdev_spare_ops) 2725789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 2726789Sahrens 2727789Sahrens /* 2728789Sahrens * If there's only one replica, you can't detach it. 2729789Sahrens */ 2730789Sahrens if (pvd->vdev_children <= 1) 2731789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 2732789Sahrens 2733789Sahrens /* 2734789Sahrens * If all siblings have non-empty DTLs, this device may have the only 2735789Sahrens * valid copy of the data, which means we cannot safely detach it. 2736789Sahrens * 2737789Sahrens * XXX -- as in the vdev_offline() case, we really want a more 2738789Sahrens * precise DTL check. 2739789Sahrens */ 2740789Sahrens for (c = 0; c < pvd->vdev_children; c++) { 2741789Sahrens uint64_t dirty; 2742789Sahrens 2743789Sahrens cvd = pvd->vdev_child[c]; 2744789Sahrens if (cvd == vd) 2745789Sahrens continue; 2746789Sahrens if (vdev_is_dead(cvd)) 2747789Sahrens continue; 2748789Sahrens mutex_enter(&cvd->vdev_dtl_lock); 2749789Sahrens dirty = cvd->vdev_dtl_map.sm_space | 2750789Sahrens cvd->vdev_dtl_scrub.sm_space; 2751789Sahrens mutex_exit(&cvd->vdev_dtl_lock); 2752789Sahrens if (!dirty) 2753789Sahrens break; 2754789Sahrens } 27552082Seschrock 27562082Seschrock /* 27572082Seschrock * If we are a replacing or spare vdev, then we can always detach the 27582082Seschrock * latter child, as that is how one cancels the operation. 27592082Seschrock */ 27602082Seschrock if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && 27612082Seschrock c == pvd->vdev_children) 2762789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 2763789Sahrens 2764789Sahrens /* 27652082Seschrock * If we are detaching the original disk from a spare, then it implies 27662082Seschrock * that the spare should become a real disk, and be removed from the 27672082Seschrock * active spare list for the pool. 27682082Seschrock */ 27692082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 27702082Seschrock vd->vdev_id == 0) 27712082Seschrock unspare = B_TRUE; 27722082Seschrock 27732082Seschrock /* 2774789Sahrens * Erase the disk labels so the disk can be used for other things. 2775789Sahrens * This must be done after all other error cases are handled, 2776789Sahrens * but before we disembowel vd (so we can still do I/O to it). 2777789Sahrens * But if we can't do it, don't treat the error as fatal -- 2778789Sahrens * it may be that the unwritability of the disk is the reason 2779789Sahrens * it's being detached! 2780789Sahrens */ 27813377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 2782789Sahrens 2783789Sahrens /* 2784789Sahrens * Remove vd from its parent and compact the parent's children. 2785789Sahrens */ 2786789Sahrens vdev_remove_child(pvd, vd); 2787789Sahrens vdev_compact_children(pvd); 2788789Sahrens 2789789Sahrens /* 2790789Sahrens * Remember one of the remaining children so we can get tvd below. 2791789Sahrens */ 2792789Sahrens cvd = pvd->vdev_child[0]; 2793789Sahrens 2794789Sahrens /* 27952082Seschrock * If we need to remove the remaining child from the list of hot spares, 27962082Seschrock * do it now, marking the vdev as no longer a spare in the process. We 27972082Seschrock * must do this before vdev_remove_parent(), because that can change the 27982082Seschrock * GUID if it creates a new toplevel GUID. 27992082Seschrock */ 28002082Seschrock if (unspare) { 28012082Seschrock ASSERT(cvd->vdev_isspare); 28023377Seschrock spa_spare_remove(cvd); 28032082Seschrock unspare_guid = cvd->vdev_guid; 28042082Seschrock } 28052082Seschrock 28062082Seschrock /* 2807789Sahrens * If the parent mirror/replacing vdev only has one child, 2808789Sahrens * the parent is no longer needed. Remove it from the tree. 2809789Sahrens */ 2810789Sahrens if (pvd->vdev_children == 1) 2811789Sahrens vdev_remove_parent(cvd); 2812789Sahrens 2813789Sahrens /* 2814789Sahrens * We don't set tvd until now because the parent we just removed 2815789Sahrens * may have been the previous top-level vdev. 2816789Sahrens */ 2817789Sahrens tvd = cvd->vdev_top; 2818789Sahrens ASSERT(tvd->vdev_parent == rvd); 2819789Sahrens 2820789Sahrens /* 28213377Seschrock * Reevaluate the parent vdev state. 2822789Sahrens */ 28234451Seschrock vdev_propagate_state(cvd); 2824789Sahrens 2825789Sahrens /* 28263377Seschrock * If the device we just detached was smaller than the others, it may be 28273377Seschrock * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 28283377Seschrock * can't fail because the existing metaslabs are already in core, so 28293377Seschrock * there's nothing to read from disk. 2830789Sahrens */ 28311732Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 2832789Sahrens 2833789Sahrens vdev_config_dirty(tvd); 2834789Sahrens 2835789Sahrens /* 28363377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 28373377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 28383377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 28393377Seschrock * prevent vd from being accessed after it's freed. 2840789Sahrens */ 2841789Sahrens for (t = 0; t < TXG_SIZE; t++) 2842789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 28431732Sbonwick vd->vdev_detached = B_TRUE; 28441732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 2845789Sahrens 28464451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 28474451Seschrock 28482082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 28492082Seschrock 28502082Seschrock /* 28513377Seschrock * If this was the removal of the original device in a hot spare vdev, 28523377Seschrock * then we want to go through and remove the device from the hot spare 28533377Seschrock * list of every other pool. 28542082Seschrock */ 28552082Seschrock if (unspare) { 28562082Seschrock spa = NULL; 28572082Seschrock mutex_enter(&spa_namespace_lock); 28582082Seschrock while ((spa = spa_next(spa)) != NULL) { 28592082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 28602082Seschrock continue; 28612082Seschrock 28622082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 28632082Seschrock } 28642082Seschrock mutex_exit(&spa_namespace_lock); 28652082Seschrock } 28662082Seschrock 28672082Seschrock return (error); 28682082Seschrock } 28692082Seschrock 28702082Seschrock /* 28715450Sbrendan * Remove a spares vdev from the nvlist config. 28722082Seschrock */ 28735450Sbrendan static int 28745450Sbrendan spa_remove_spares(spa_aux_vdev_t *sav, uint64_t guid, boolean_t unspare, 28755450Sbrendan nvlist_t **spares, int nspares, vdev_t *vd) 28762082Seschrock { 28775450Sbrendan nvlist_t *nv, **newspares; 28785450Sbrendan int i, j; 28792082Seschrock 28802082Seschrock nv = NULL; 28815450Sbrendan for (i = 0; i < nspares; i++) { 28825450Sbrendan uint64_t theguid; 28835450Sbrendan 28845450Sbrendan VERIFY(nvlist_lookup_uint64(spares[i], 28855450Sbrendan ZPOOL_CONFIG_GUID, &theguid) == 0); 28865450Sbrendan if (theguid == guid) { 28875450Sbrendan nv = spares[i]; 28885450Sbrendan break; 28892082Seschrock } 28902082Seschrock } 28912082Seschrock 28922082Seschrock /* 28935450Sbrendan * Only remove the hot spare if it's not currently in use in this pool. 28942082Seschrock */ 28955450Sbrendan if (nv == NULL && vd == NULL) 28965450Sbrendan return (ENOENT); 28975450Sbrendan 28985450Sbrendan if (nv == NULL && vd != NULL) 28995450Sbrendan return (ENOTSUP); 29005450Sbrendan 29015450Sbrendan if (!unspare && nv != NULL && vd != NULL) 29025450Sbrendan return (EBUSY); 29032082Seschrock 29042082Seschrock if (nspares == 1) { 29052082Seschrock newspares = NULL; 29062082Seschrock } else { 29072082Seschrock newspares = kmem_alloc((nspares - 1) * sizeof (void *), 29082082Seschrock KM_SLEEP); 29092082Seschrock for (i = 0, j = 0; i < nspares; i++) { 29102082Seschrock if (spares[i] != nv) 29112082Seschrock VERIFY(nvlist_dup(spares[i], 29122082Seschrock &newspares[j++], KM_SLEEP) == 0); 29132082Seschrock } 29142082Seschrock } 29152082Seschrock 29165450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_SPARES, 29172082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 29185450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 29195450Sbrendan ZPOOL_CONFIG_SPARES, newspares, nspares - 1) == 0); 29202082Seschrock for (i = 0; i < nspares - 1; i++) 29212082Seschrock nvlist_free(newspares[i]); 29222082Seschrock kmem_free(newspares, (nspares - 1) * sizeof (void *)); 29235450Sbrendan 29245450Sbrendan return (0); 29255450Sbrendan } 29265450Sbrendan 29275450Sbrendan /* 29285450Sbrendan * Remove an l2cache vdev from the nvlist config. 29295450Sbrendan */ 29305450Sbrendan static int 29315450Sbrendan spa_remove_l2cache(spa_aux_vdev_t *sav, uint64_t guid, nvlist_t **l2cache, 29325450Sbrendan int nl2cache, vdev_t *vd) 29335450Sbrendan { 29345450Sbrendan nvlist_t *nv, **newl2cache; 29355450Sbrendan int i, j; 29365450Sbrendan 29375450Sbrendan nv = NULL; 29385450Sbrendan for (i = 0; i < nl2cache; i++) { 29395450Sbrendan uint64_t theguid; 29405450Sbrendan 29415450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 29425450Sbrendan ZPOOL_CONFIG_GUID, &theguid) == 0); 29435450Sbrendan if (theguid == guid) { 29445450Sbrendan nv = l2cache[i]; 29455450Sbrendan break; 29465450Sbrendan } 29475450Sbrendan } 29485450Sbrendan 29495450Sbrendan if (vd == NULL) { 29505450Sbrendan for (i = 0; i < nl2cache; i++) { 29515450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) { 29525450Sbrendan vd = sav->sav_vdevs[i]; 29535450Sbrendan break; 29545450Sbrendan } 29555450Sbrendan } 29565450Sbrendan } 29575450Sbrendan 29585450Sbrendan if (nv == NULL && vd == NULL) 29595450Sbrendan return (ENOENT); 29605450Sbrendan 29615450Sbrendan if (nv == NULL && vd != NULL) 29625450Sbrendan return (ENOTSUP); 29635450Sbrendan 29645450Sbrendan if (nl2cache == 1) { 29655450Sbrendan newl2cache = NULL; 29665450Sbrendan } else { 29675450Sbrendan newl2cache = kmem_alloc((nl2cache - 1) * sizeof (void *), 29685450Sbrendan KM_SLEEP); 29695450Sbrendan for (i = 0, j = 0; i < nl2cache; i++) { 29705450Sbrendan if (l2cache[i] != nv) 29715450Sbrendan VERIFY(nvlist_dup(l2cache[i], 29725450Sbrendan &newl2cache[j++], KM_SLEEP) == 0); 29735450Sbrendan } 29745450Sbrendan } 29755450Sbrendan 29765450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 29775450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 29785450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 29795450Sbrendan ZPOOL_CONFIG_L2CACHE, newl2cache, nl2cache - 1) == 0); 29805450Sbrendan for (i = 0; i < nl2cache - 1; i++) 29815450Sbrendan nvlist_free(newl2cache[i]); 29825450Sbrendan kmem_free(newl2cache, (nl2cache - 1) * sizeof (void *)); 29835450Sbrendan 29845450Sbrendan return (0); 29855450Sbrendan } 29865450Sbrendan 29875450Sbrendan /* 29885450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 29895450Sbrendan * spares and level 2 ARC devices. 29905450Sbrendan */ 29915450Sbrendan int 29925450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 29935450Sbrendan { 29945450Sbrendan vdev_t *vd; 29955450Sbrendan nvlist_t **spares, **l2cache; 29965450Sbrendan uint_t nspares, nl2cache; 29975450Sbrendan int error = 0; 29985450Sbrendan 29995450Sbrendan spa_config_enter(spa, RW_WRITER, FTAG); 30005450Sbrendan 30015450Sbrendan vd = spa_lookup_by_guid(spa, guid); 30025450Sbrendan 30035450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 30045450Sbrendan spa_spare_exists(guid, NULL) && 30055450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 30065450Sbrendan ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { 30075450Sbrendan if ((error = spa_remove_spares(&spa->spa_spares, guid, unspare, 30085450Sbrendan spares, nspares, vd)) != 0) 30095450Sbrendan goto out; 30105450Sbrendan spa_load_spares(spa); 30115450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 30125450Sbrendan goto out; 30135450Sbrendan } 30145450Sbrendan 30155450Sbrendan if (spa->spa_l2cache.sav_vdevs != NULL && 30165450Sbrendan spa_l2cache_exists(guid, NULL) && 30175450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 30185450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { 30195450Sbrendan if ((error = spa_remove_l2cache(&spa->spa_l2cache, guid, 30205450Sbrendan l2cache, nl2cache, vd)) != 0) 30215450Sbrendan goto out; 30225450Sbrendan spa_load_l2cache(spa); 30235450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 30245450Sbrendan } 30252082Seschrock 30262082Seschrock out: 30272082Seschrock spa_config_exit(spa, FTAG); 30285450Sbrendan return (error); 3029789Sahrens } 3030789Sahrens 3031789Sahrens /* 30324451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 30334451Seschrock * current spared, so we can detach it. 3034789Sahrens */ 30351544Seschrock static vdev_t * 30364451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3037789Sahrens { 30381544Seschrock vdev_t *newvd, *oldvd; 3039789Sahrens int c; 3040789Sahrens 30411544Seschrock for (c = 0; c < vd->vdev_children; c++) { 30424451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 30431544Seschrock if (oldvd != NULL) 30441544Seschrock return (oldvd); 30451544Seschrock } 3046789Sahrens 30474451Seschrock /* 30484451Seschrock * Check for a completed replacement. 30494451Seschrock */ 3050789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 30511544Seschrock oldvd = vd->vdev_child[0]; 30521544Seschrock newvd = vd->vdev_child[1]; 3053789Sahrens 30541544Seschrock mutex_enter(&newvd->vdev_dtl_lock); 30551544Seschrock if (newvd->vdev_dtl_map.sm_space == 0 && 30561544Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 30571544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30581544Seschrock return (oldvd); 30591544Seschrock } 30601544Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30611544Seschrock } 3062789Sahrens 30634451Seschrock /* 30644451Seschrock * Check for a completed resilver with the 'unspare' flag set. 30654451Seschrock */ 30664451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 30674451Seschrock newvd = vd->vdev_child[0]; 30684451Seschrock oldvd = vd->vdev_child[1]; 30694451Seschrock 30704451Seschrock mutex_enter(&newvd->vdev_dtl_lock); 30714451Seschrock if (newvd->vdev_unspare && 30724451Seschrock newvd->vdev_dtl_map.sm_space == 0 && 30734451Seschrock newvd->vdev_dtl_scrub.sm_space == 0) { 30744451Seschrock newvd->vdev_unspare = 0; 30754451Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30764451Seschrock return (oldvd); 30774451Seschrock } 30784451Seschrock mutex_exit(&newvd->vdev_dtl_lock); 30794451Seschrock } 30804451Seschrock 30811544Seschrock return (NULL); 3082789Sahrens } 3083789Sahrens 30841544Seschrock static void 30854451Seschrock spa_vdev_resilver_done(spa_t *spa) 3086789Sahrens { 30871544Seschrock vdev_t *vd; 30882082Seschrock vdev_t *pvd; 30891544Seschrock uint64_t guid; 30902082Seschrock uint64_t pguid = 0; 3091789Sahrens 30921544Seschrock spa_config_enter(spa, RW_READER, FTAG); 3093789Sahrens 30944451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 30951544Seschrock guid = vd->vdev_guid; 30962082Seschrock /* 30972082Seschrock * If we have just finished replacing a hot spared device, then 30982082Seschrock * we need to detach the parent's first child (the original hot 30992082Seschrock * spare) as well. 31002082Seschrock */ 31012082Seschrock pvd = vd->vdev_parent; 31022082Seschrock if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && 31032082Seschrock pvd->vdev_id == 0) { 31042082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 31052082Seschrock ASSERT(pvd->vdev_parent->vdev_children == 2); 31062082Seschrock pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; 31072082Seschrock } 31081544Seschrock spa_config_exit(spa, FTAG); 31091544Seschrock if (spa_vdev_detach(spa, guid, B_TRUE) != 0) 31101544Seschrock return; 31112082Seschrock if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) 31122082Seschrock return; 31131544Seschrock spa_config_enter(spa, RW_READER, FTAG); 3114789Sahrens } 3115789Sahrens 31161544Seschrock spa_config_exit(spa, FTAG); 3117789Sahrens } 3118789Sahrens 3119789Sahrens /* 31201354Seschrock * Update the stored path for this vdev. Dirty the vdev configuration, relying 31211354Seschrock * on spa_vdev_enter/exit() to synchronize the labels and cache. 31221354Seschrock */ 31231354Seschrock int 31241354Seschrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 31251354Seschrock { 31261354Seschrock vdev_t *rvd, *vd; 31271354Seschrock uint64_t txg; 31281354Seschrock 31291354Seschrock rvd = spa->spa_root_vdev; 31301354Seschrock 31311354Seschrock txg = spa_vdev_enter(spa); 31321354Seschrock 31332082Seschrock if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 31342082Seschrock /* 31355450Sbrendan * Determine if this is a reference to a hot spare or l2cache 31365450Sbrendan * device. If it is, update the path as stored in their 31375450Sbrendan * device list. 31382082Seschrock */ 31395450Sbrendan nvlist_t **spares, **l2cache; 31405450Sbrendan uint_t i, nspares, nl2cache; 31415450Sbrendan 31425450Sbrendan if (spa->spa_spares.sav_config != NULL) { 31435450Sbrendan VERIFY(nvlist_lookup_nvlist_array( 31445450Sbrendan spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 31455450Sbrendan &spares, &nspares) == 0); 31462082Seschrock for (i = 0; i < nspares; i++) { 31472082Seschrock uint64_t theguid; 31482082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 31492082Seschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 31505450Sbrendan if (theguid == guid) { 31515450Sbrendan VERIFY(nvlist_add_string(spares[i], 31525450Sbrendan ZPOOL_CONFIG_PATH, newpath) == 0); 31535450Sbrendan spa_load_spares(spa); 31545450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 31555450Sbrendan return (spa_vdev_exit(spa, NULL, txg, 31565450Sbrendan 0)); 31575450Sbrendan } 31582082Seschrock } 31592082Seschrock } 31605450Sbrendan 31615450Sbrendan if (spa->spa_l2cache.sav_config != NULL) { 31625450Sbrendan VERIFY(nvlist_lookup_nvlist_array( 31635450Sbrendan spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, 31645450Sbrendan &l2cache, &nl2cache) == 0); 31655450Sbrendan for (i = 0; i < nl2cache; i++) { 31665450Sbrendan uint64_t theguid; 31675450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 31685450Sbrendan ZPOOL_CONFIG_GUID, &theguid) == 0); 31695450Sbrendan if (theguid == guid) { 31705450Sbrendan VERIFY(nvlist_add_string(l2cache[i], 31715450Sbrendan ZPOOL_CONFIG_PATH, newpath) == 0); 31725450Sbrendan spa_load_l2cache(spa); 31735450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 31745450Sbrendan return (spa_vdev_exit(spa, NULL, txg, 31755450Sbrendan 0)); 31765450Sbrendan } 31775450Sbrendan } 31785450Sbrendan } 31795450Sbrendan 31805450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 31812082Seschrock } 31821354Seschrock 31831585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 31841585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 31851585Sbonwick 31861354Seschrock spa_strfree(vd->vdev_path); 31871354Seschrock vd->vdev_path = spa_strdup(newpath); 31881354Seschrock 31891354Seschrock vdev_config_dirty(vd->vdev_top); 31901354Seschrock 31911354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 31921354Seschrock } 31931354Seschrock 31941354Seschrock /* 3195789Sahrens * ========================================================================== 3196789Sahrens * SPA Scrubbing 3197789Sahrens * ========================================================================== 3198789Sahrens */ 3199789Sahrens 3200789Sahrens static void 3201789Sahrens spa_scrub_io_done(zio_t *zio) 3202789Sahrens { 3203789Sahrens spa_t *spa = zio->io_spa; 3204789Sahrens 32054309Smaybee arc_data_buf_free(zio->io_data, zio->io_size); 3206789Sahrens 3207789Sahrens mutex_enter(&spa->spa_scrub_lock); 32081544Seschrock if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 32091775Sbillm vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; 3210789Sahrens spa->spa_scrub_errors++; 3211789Sahrens mutex_enter(&vd->vdev_stat_lock); 3212789Sahrens vd->vdev_stat.vs_scrub_errors++; 3213789Sahrens mutex_exit(&vd->vdev_stat_lock); 3214789Sahrens } 32153697Smishra 32163697Smishra if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight) 32171544Seschrock cv_broadcast(&spa->spa_scrub_io_cv); 32183697Smishra 32193697Smishra ASSERT(spa->spa_scrub_inflight >= 0); 32203697Smishra 32211544Seschrock mutex_exit(&spa->spa_scrub_lock); 3222789Sahrens } 3223789Sahrens 3224789Sahrens static void 32251544Seschrock spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, 32261544Seschrock zbookmark_t *zb) 3227789Sahrens { 3228789Sahrens size_t size = BP_GET_LSIZE(bp); 32293697Smishra void *data; 3230789Sahrens 3231789Sahrens mutex_enter(&spa->spa_scrub_lock); 32323697Smishra /* 32333697Smishra * Do not give too much work to vdev(s). 32343697Smishra */ 32353697Smishra while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) { 32363697Smishra cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 32373697Smishra } 3238789Sahrens spa->spa_scrub_inflight++; 3239789Sahrens mutex_exit(&spa->spa_scrub_lock); 3240789Sahrens 32414309Smaybee data = arc_data_buf_alloc(size); 32423697Smishra 32431544Seschrock if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) 32441544Seschrock flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ 32451544Seschrock 32461807Sbonwick flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; 32471544Seschrock 3248789Sahrens zio_nowait(zio_read(NULL, spa, bp, data, size, 32491544Seschrock spa_scrub_io_done, NULL, priority, flags, zb)); 3250789Sahrens } 3251789Sahrens 3252789Sahrens /* ARGSUSED */ 3253789Sahrens static int 3254789Sahrens spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 3255789Sahrens { 3256789Sahrens blkptr_t *bp = &bc->bc_blkptr; 32571775Sbillm vdev_t *vd = spa->spa_root_vdev; 32581775Sbillm dva_t *dva = bp->blk_dva; 32591775Sbillm int needs_resilver = B_FALSE; 32601775Sbillm int d; 3261789Sahrens 32621775Sbillm if (bc->bc_errno) { 3263789Sahrens /* 3264789Sahrens * We can't scrub this block, but we can continue to scrub 3265789Sahrens * the rest of the pool. Note the error and move along. 3266789Sahrens */ 3267789Sahrens mutex_enter(&spa->spa_scrub_lock); 3268789Sahrens spa->spa_scrub_errors++; 3269789Sahrens mutex_exit(&spa->spa_scrub_lock); 3270789Sahrens 32711775Sbillm mutex_enter(&vd->vdev_stat_lock); 32721775Sbillm vd->vdev_stat.vs_scrub_errors++; 32731775Sbillm mutex_exit(&vd->vdev_stat_lock); 3274789Sahrens 3275789Sahrens return (ERESTART); 3276789Sahrens } 3277789Sahrens 3278789Sahrens ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); 3279789Sahrens 32801775Sbillm for (d = 0; d < BP_GET_NDVAS(bp); d++) { 32811775Sbillm vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); 32821775Sbillm 32831775Sbillm ASSERT(vd != NULL); 32841775Sbillm 32851775Sbillm /* 32861775Sbillm * Keep track of how much data we've examined so that 32871775Sbillm * zpool(1M) status can make useful progress reports. 32881775Sbillm */ 32891775Sbillm mutex_enter(&vd->vdev_stat_lock); 32901775Sbillm vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); 32911775Sbillm mutex_exit(&vd->vdev_stat_lock); 3292789Sahrens 32931775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { 32941775Sbillm if (DVA_GET_GANG(&dva[d])) { 32951775Sbillm /* 32961775Sbillm * Gang members may be spread across multiple 32971775Sbillm * vdevs, so the best we can do is look at the 32981775Sbillm * pool-wide DTL. 32991775Sbillm * XXX -- it would be better to change our 33001775Sbillm * allocation policy to ensure that this can't 33011775Sbillm * happen. 33021775Sbillm */ 33031775Sbillm vd = spa->spa_root_vdev; 33041775Sbillm } 33051775Sbillm if (vdev_dtl_contains(&vd->vdev_dtl_map, 33061775Sbillm bp->blk_birth, 1)) 33071775Sbillm needs_resilver = B_TRUE; 3308789Sahrens } 33091775Sbillm } 33101775Sbillm 33111775Sbillm if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) 3312789Sahrens spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, 33131544Seschrock ZIO_FLAG_SCRUB, &bc->bc_bookmark); 33141775Sbillm else if (needs_resilver) 33151775Sbillm spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, 33161775Sbillm ZIO_FLAG_RESILVER, &bc->bc_bookmark); 3317789Sahrens 3318789Sahrens return (0); 3319789Sahrens } 3320789Sahrens 3321789Sahrens static void 3322789Sahrens spa_scrub_thread(spa_t *spa) 3323789Sahrens { 3324789Sahrens callb_cpr_t cprinfo; 3325789Sahrens traverse_handle_t *th = spa->spa_scrub_th; 3326789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3327789Sahrens pool_scrub_type_t scrub_type = spa->spa_scrub_type; 3328789Sahrens int error = 0; 3329789Sahrens boolean_t complete; 3330789Sahrens 3331789Sahrens CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); 3332789Sahrens 3333797Sbonwick /* 3334797Sbonwick * If we're restarting due to a snapshot create/delete, 3335797Sbonwick * wait for that to complete. 3336797Sbonwick */ 3337797Sbonwick txg_wait_synced(spa_get_dsl(spa), 0); 3338797Sbonwick 33391544Seschrock dprintf("start %s mintxg=%llu maxtxg=%llu\n", 33401544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 33411544Seschrock spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); 33421544Seschrock 33431544Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 33441544Seschrock vdev_reopen(rvd); /* purge all vdev caches */ 3345789Sahrens vdev_config_dirty(rvd); /* rewrite all disk labels */ 3346789Sahrens vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); 33471544Seschrock spa_config_exit(spa, FTAG); 3348789Sahrens 3349789Sahrens mutex_enter(&spa->spa_scrub_lock); 3350789Sahrens spa->spa_scrub_errors = 0; 3351789Sahrens spa->spa_scrub_active = 1; 33521544Seschrock ASSERT(spa->spa_scrub_inflight == 0); 3353789Sahrens 3354789Sahrens while (!spa->spa_scrub_stop) { 3355789Sahrens CALLB_CPR_SAFE_BEGIN(&cprinfo); 33561544Seschrock while (spa->spa_scrub_suspended) { 3357789Sahrens spa->spa_scrub_active = 0; 3358789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3359789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 3360789Sahrens spa->spa_scrub_active = 1; 3361789Sahrens } 3362789Sahrens CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); 3363789Sahrens 3364789Sahrens if (spa->spa_scrub_restart_txg != 0) 3365789Sahrens break; 3366789Sahrens 3367789Sahrens mutex_exit(&spa->spa_scrub_lock); 3368789Sahrens error = traverse_more(th); 3369789Sahrens mutex_enter(&spa->spa_scrub_lock); 3370789Sahrens if (error != EAGAIN) 3371789Sahrens break; 3372789Sahrens } 3373789Sahrens 3374789Sahrens while (spa->spa_scrub_inflight) 3375789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 3376789Sahrens 33771601Sbonwick spa->spa_scrub_active = 0; 33781601Sbonwick cv_broadcast(&spa->spa_scrub_cv); 33791601Sbonwick 33801601Sbonwick mutex_exit(&spa->spa_scrub_lock); 33811601Sbonwick 33821601Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 33831601Sbonwick 33841601Sbonwick mutex_enter(&spa->spa_scrub_lock); 33851601Sbonwick 33861601Sbonwick /* 33871601Sbonwick * Note: we check spa_scrub_restart_txg under both spa_scrub_lock 33881601Sbonwick * AND the spa config lock to synchronize with any config changes 33891601Sbonwick * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). 33901601Sbonwick */ 3391789Sahrens if (spa->spa_scrub_restart_txg != 0) 3392789Sahrens error = ERESTART; 3393789Sahrens 33941544Seschrock if (spa->spa_scrub_stop) 33951544Seschrock error = EINTR; 33961544Seschrock 3397789Sahrens /* 33981544Seschrock * Even if there were uncorrectable errors, we consider the scrub 33991544Seschrock * completed. The downside is that if there is a transient error during 34001544Seschrock * a resilver, we won't resilver the data properly to the target. But 34011544Seschrock * if the damage is permanent (more likely) we will resilver forever, 34021544Seschrock * which isn't really acceptable. Since there is enough information for 34031544Seschrock * the user to know what has failed and why, this seems like a more 34041544Seschrock * tractable approach. 3405789Sahrens */ 34061544Seschrock complete = (error == 0); 3407789Sahrens 34081544Seschrock dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", 34091544Seschrock scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", 3410789Sahrens spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", 3411789Sahrens error, spa->spa_scrub_errors, spa->spa_scrub_stop); 3412789Sahrens 3413789Sahrens mutex_exit(&spa->spa_scrub_lock); 3414789Sahrens 3415789Sahrens /* 3416789Sahrens * If the scrub/resilver completed, update all DTLs to reflect this. 3417789Sahrens * Whether it succeeded or not, vacate all temporary scrub DTLs. 3418789Sahrens */ 3419789Sahrens vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, 3420789Sahrens complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); 3421789Sahrens vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); 34221544Seschrock spa_errlog_rotate(spa); 34231601Sbonwick 34244451Seschrock if (scrub_type == POOL_SCRUB_RESILVER && complete) 34254451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH); 34264451Seschrock 34271544Seschrock spa_config_exit(spa, FTAG); 3428789Sahrens 3429789Sahrens mutex_enter(&spa->spa_scrub_lock); 3430789Sahrens 34311544Seschrock /* 34321544Seschrock * We may have finished replacing a device. 34331544Seschrock * Let the async thread assess this and handle the detach. 34341544Seschrock */ 34354451Seschrock spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 3436789Sahrens 3437789Sahrens /* 3438789Sahrens * If we were told to restart, our final act is to start a new scrub. 3439789Sahrens */ 3440789Sahrens if (error == ERESTART) 34411544Seschrock spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? 34421544Seschrock SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); 3443789Sahrens 34441544Seschrock spa->spa_scrub_type = POOL_SCRUB_NONE; 34451544Seschrock spa->spa_scrub_active = 0; 34461544Seschrock spa->spa_scrub_thread = NULL; 34471544Seschrock cv_broadcast(&spa->spa_scrub_cv); 3448789Sahrens CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ 3449789Sahrens thread_exit(); 3450789Sahrens } 3451789Sahrens 3452789Sahrens void 3453789Sahrens spa_scrub_suspend(spa_t *spa) 3454789Sahrens { 3455789Sahrens mutex_enter(&spa->spa_scrub_lock); 34561544Seschrock spa->spa_scrub_suspended++; 3457789Sahrens while (spa->spa_scrub_active) { 3458789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3459789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 3460789Sahrens } 3461789Sahrens while (spa->spa_scrub_inflight) 3462789Sahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 3463789Sahrens mutex_exit(&spa->spa_scrub_lock); 3464789Sahrens } 3465789Sahrens 3466789Sahrens void 3467789Sahrens spa_scrub_resume(spa_t *spa) 3468789Sahrens { 3469789Sahrens mutex_enter(&spa->spa_scrub_lock); 34701544Seschrock ASSERT(spa->spa_scrub_suspended != 0); 34711544Seschrock if (--spa->spa_scrub_suspended == 0) 3472789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3473789Sahrens mutex_exit(&spa->spa_scrub_lock); 3474789Sahrens } 3475789Sahrens 3476789Sahrens void 3477789Sahrens spa_scrub_restart(spa_t *spa, uint64_t txg) 3478789Sahrens { 3479789Sahrens /* 3480789Sahrens * Something happened (e.g. snapshot create/delete) that means 3481789Sahrens * we must restart any in-progress scrubs. The itinerary will 3482789Sahrens * fix this properly. 3483789Sahrens */ 3484789Sahrens mutex_enter(&spa->spa_scrub_lock); 3485789Sahrens spa->spa_scrub_restart_txg = txg; 3486789Sahrens mutex_exit(&spa->spa_scrub_lock); 3487789Sahrens } 3488789Sahrens 34891544Seschrock int 34901544Seschrock spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) 3491789Sahrens { 3492789Sahrens space_seg_t *ss; 3493789Sahrens uint64_t mintxg, maxtxg; 3494789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3495789Sahrens 34964808Sek110237 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 34974808Sek110237 ASSERT(!spa_config_held(spa, RW_WRITER)); 34984808Sek110237 3499789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3500789Sahrens return (ENOTSUP); 3501789Sahrens 35021544Seschrock mutex_enter(&spa->spa_scrub_lock); 35031544Seschrock 3504789Sahrens /* 3505789Sahrens * If there's a scrub or resilver already in progress, stop it. 3506789Sahrens */ 3507789Sahrens while (spa->spa_scrub_thread != NULL) { 3508789Sahrens /* 3509789Sahrens * Don't stop a resilver unless forced. 3510789Sahrens */ 35111544Seschrock if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { 35121544Seschrock mutex_exit(&spa->spa_scrub_lock); 3513789Sahrens return (EBUSY); 35141544Seschrock } 3515789Sahrens spa->spa_scrub_stop = 1; 3516789Sahrens cv_broadcast(&spa->spa_scrub_cv); 3517789Sahrens cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); 3518789Sahrens } 3519789Sahrens 3520789Sahrens /* 3521789Sahrens * Terminate the previous traverse. 3522789Sahrens */ 3523789Sahrens if (spa->spa_scrub_th != NULL) { 3524789Sahrens traverse_fini(spa->spa_scrub_th); 3525789Sahrens spa->spa_scrub_th = NULL; 3526789Sahrens } 3527789Sahrens 35281544Seschrock if (rvd == NULL) { 35291544Seschrock ASSERT(spa->spa_scrub_stop == 0); 35301544Seschrock ASSERT(spa->spa_scrub_type == type); 35311544Seschrock ASSERT(spa->spa_scrub_restart_txg == 0); 35321544Seschrock mutex_exit(&spa->spa_scrub_lock); 35331544Seschrock return (0); 35341544Seschrock } 3535789Sahrens 3536789Sahrens mintxg = TXG_INITIAL - 1; 3537789Sahrens maxtxg = spa_last_synced_txg(spa) + 1; 3538789Sahrens 35391544Seschrock mutex_enter(&rvd->vdev_dtl_lock); 3540789Sahrens 35411544Seschrock if (rvd->vdev_dtl_map.sm_space == 0) { 35421544Seschrock /* 35431544Seschrock * The pool-wide DTL is empty. 35441732Sbonwick * If this is a resilver, there's nothing to do except 35451732Sbonwick * check whether any in-progress replacements have completed. 35461544Seschrock */ 35471732Sbonwick if (type == POOL_SCRUB_RESILVER) { 35481544Seschrock type = POOL_SCRUB_NONE; 35494451Seschrock spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 35501732Sbonwick } 35511544Seschrock } else { 35521544Seschrock /* 35531544Seschrock * The pool-wide DTL is non-empty. 35541544Seschrock * If this is a normal scrub, upgrade to a resilver instead. 35551544Seschrock */ 35561544Seschrock if (type == POOL_SCRUB_EVERYTHING) 35571544Seschrock type = POOL_SCRUB_RESILVER; 35581544Seschrock } 3559789Sahrens 35601544Seschrock if (type == POOL_SCRUB_RESILVER) { 3561789Sahrens /* 3562789Sahrens * Determine the resilvering boundaries. 3563789Sahrens * 3564789Sahrens * Note: (mintxg, maxtxg) is an open interval, 3565789Sahrens * i.e. mintxg and maxtxg themselves are not included. 3566789Sahrens * 3567789Sahrens * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 3568789Sahrens * so we don't claim to resilver a txg that's still changing. 3569789Sahrens */ 3570789Sahrens ss = avl_first(&rvd->vdev_dtl_map.sm_root); 35711544Seschrock mintxg = ss->ss_start - 1; 3572789Sahrens ss = avl_last(&rvd->vdev_dtl_map.sm_root); 35731544Seschrock maxtxg = MIN(ss->ss_end, maxtxg); 35744451Seschrock 35754451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); 3576789Sahrens } 3577789Sahrens 35781544Seschrock mutex_exit(&rvd->vdev_dtl_lock); 35791544Seschrock 35801544Seschrock spa->spa_scrub_stop = 0; 35811544Seschrock spa->spa_scrub_type = type; 35821544Seschrock spa->spa_scrub_restart_txg = 0; 35831544Seschrock 35841544Seschrock if (type != POOL_SCRUB_NONE) { 35851544Seschrock spa->spa_scrub_mintxg = mintxg; 3586789Sahrens spa->spa_scrub_maxtxg = maxtxg; 3587789Sahrens spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, 35881635Sbonwick ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, 35891635Sbonwick ZIO_FLAG_CANFAIL); 3590789Sahrens traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); 3591789Sahrens spa->spa_scrub_thread = thread_create(NULL, 0, 3592789Sahrens spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); 3593789Sahrens } 3594789Sahrens 35951544Seschrock mutex_exit(&spa->spa_scrub_lock); 35961544Seschrock 3597789Sahrens return (0); 3598789Sahrens } 3599789Sahrens 36001544Seschrock /* 36011544Seschrock * ========================================================================== 36021544Seschrock * SPA async task processing 36031544Seschrock * ========================================================================== 36041544Seschrock */ 36051544Seschrock 36061544Seschrock static void 36074451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3608789Sahrens { 36091544Seschrock vdev_t *tvd; 36101544Seschrock int c; 36111544Seschrock 36124451Seschrock for (c = 0; c < vd->vdev_children; c++) { 36134451Seschrock tvd = vd->vdev_child[c]; 36144451Seschrock if (tvd->vdev_remove_wanted) { 36154451Seschrock tvd->vdev_remove_wanted = 0; 36164451Seschrock vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, 36174451Seschrock VDEV_AUX_NONE); 36185329Sgw25295 vdev_clear(spa, tvd, B_TRUE); 36194451Seschrock vdev_config_dirty(tvd->vdev_top); 36201544Seschrock } 36214451Seschrock spa_async_remove(spa, tvd); 36221544Seschrock } 36231544Seschrock } 36241544Seschrock 36251544Seschrock static void 36261544Seschrock spa_async_thread(spa_t *spa) 36271544Seschrock { 36281544Seschrock int tasks; 36294451Seschrock uint64_t txg; 36301544Seschrock 36311544Seschrock ASSERT(spa->spa_sync_on); 3632789Sahrens 36331544Seschrock mutex_enter(&spa->spa_async_lock); 36341544Seschrock tasks = spa->spa_async_tasks; 36351544Seschrock spa->spa_async_tasks = 0; 36361544Seschrock mutex_exit(&spa->spa_async_lock); 36371544Seschrock 36381544Seschrock /* 36391635Sbonwick * See if the config needs to be updated. 36401635Sbonwick */ 36411635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 36421635Sbonwick mutex_enter(&spa_namespace_lock); 36431635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 36441635Sbonwick mutex_exit(&spa_namespace_lock); 36451635Sbonwick } 36461635Sbonwick 36471635Sbonwick /* 36484451Seschrock * See if any devices need to be marked REMOVED. 36495329Sgw25295 * 36505329Sgw25295 * XXX - We avoid doing this when we are in 36515329Sgw25295 * I/O failure state since spa_vdev_enter() grabs 36525329Sgw25295 * the namespace lock and would not be able to obtain 36535329Sgw25295 * the writer config lock. 36541544Seschrock */ 36555329Sgw25295 if (tasks & SPA_ASYNC_REMOVE && 36565329Sgw25295 spa_state(spa) != POOL_STATE_IO_FAILURE) { 36574451Seschrock txg = spa_vdev_enter(spa); 36584451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 36594451Seschrock (void) spa_vdev_exit(spa, NULL, txg, 0); 36604451Seschrock } 36611544Seschrock 36621544Seschrock /* 36631544Seschrock * If any devices are done replacing, detach them. 36641544Seschrock */ 36654451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 36664451Seschrock spa_vdev_resilver_done(spa); 3667789Sahrens 36681544Seschrock /* 36694451Seschrock * Kick off a scrub. When starting a RESILVER scrub (or an EVERYTHING 36704451Seschrock * scrub which can become a resilver), we need to hold 36714451Seschrock * spa_namespace_lock() because the sysevent we post via 36724451Seschrock * spa_event_notify() needs to get the name of the pool. 36731544Seschrock */ 36744451Seschrock if (tasks & SPA_ASYNC_SCRUB) { 36754451Seschrock mutex_enter(&spa_namespace_lock); 36761544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); 36774451Seschrock mutex_exit(&spa_namespace_lock); 36784451Seschrock } 36791544Seschrock 36801544Seschrock /* 36811544Seschrock * Kick off a resilver. 36821544Seschrock */ 36834451Seschrock if (tasks & SPA_ASYNC_RESILVER) { 36844451Seschrock mutex_enter(&spa_namespace_lock); 36851544Seschrock VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 36864451Seschrock mutex_exit(&spa_namespace_lock); 36874451Seschrock } 36881544Seschrock 36891544Seschrock /* 36901544Seschrock * Let the world know that we're done. 36911544Seschrock */ 36921544Seschrock mutex_enter(&spa->spa_async_lock); 36931544Seschrock spa->spa_async_thread = NULL; 36941544Seschrock cv_broadcast(&spa->spa_async_cv); 36951544Seschrock mutex_exit(&spa->spa_async_lock); 36961544Seschrock thread_exit(); 36971544Seschrock } 36981544Seschrock 36991544Seschrock void 37001544Seschrock spa_async_suspend(spa_t *spa) 37011544Seschrock { 37021544Seschrock mutex_enter(&spa->spa_async_lock); 37031544Seschrock spa->spa_async_suspended++; 37041544Seschrock while (spa->spa_async_thread != NULL) 37051544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 37061544Seschrock mutex_exit(&spa->spa_async_lock); 37071544Seschrock } 37081544Seschrock 37091544Seschrock void 37101544Seschrock spa_async_resume(spa_t *spa) 37111544Seschrock { 37121544Seschrock mutex_enter(&spa->spa_async_lock); 37131544Seschrock ASSERT(spa->spa_async_suspended != 0); 37141544Seschrock spa->spa_async_suspended--; 37151544Seschrock mutex_exit(&spa->spa_async_lock); 37161544Seschrock } 37171544Seschrock 37181544Seschrock static void 37191544Seschrock spa_async_dispatch(spa_t *spa) 37201544Seschrock { 37211544Seschrock mutex_enter(&spa->spa_async_lock); 37221544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 37231635Sbonwick spa->spa_async_thread == NULL && 37241635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 37251544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 37261544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 37271544Seschrock mutex_exit(&spa->spa_async_lock); 37281544Seschrock } 37291544Seschrock 37301544Seschrock void 37311544Seschrock spa_async_request(spa_t *spa, int task) 37321544Seschrock { 37331544Seschrock mutex_enter(&spa->spa_async_lock); 37341544Seschrock spa->spa_async_tasks |= task; 37351544Seschrock mutex_exit(&spa->spa_async_lock); 3736789Sahrens } 3737789Sahrens 3738789Sahrens /* 3739789Sahrens * ========================================================================== 3740789Sahrens * SPA syncing routines 3741789Sahrens * ========================================================================== 3742789Sahrens */ 3743789Sahrens 3744789Sahrens static void 3745789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3746789Sahrens { 3747789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 3748789Sahrens dmu_tx_t *tx; 3749789Sahrens blkptr_t blk; 3750789Sahrens uint64_t itor = 0; 3751789Sahrens zio_t *zio; 3752789Sahrens int error; 3753789Sahrens uint8_t c = 1; 3754789Sahrens 3755789Sahrens zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); 3756789Sahrens 3757789Sahrens while (bplist_iterate(bpl, &itor, &blk) == 0) 3758789Sahrens zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); 3759789Sahrens 3760789Sahrens error = zio_wait(zio); 3761789Sahrens ASSERT3U(error, ==, 0); 3762789Sahrens 3763789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3764789Sahrens bplist_vacate(bpl, tx); 3765789Sahrens 3766789Sahrens /* 3767789Sahrens * Pre-dirty the first block so we sync to convergence faster. 3768789Sahrens * (Usually only the first block is needed.) 3769789Sahrens */ 3770789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3771789Sahrens dmu_tx_commit(tx); 3772789Sahrens } 3773789Sahrens 3774789Sahrens static void 37752082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 37762082Seschrock { 37772082Seschrock char *packed = NULL; 37782082Seschrock size_t nvsize = 0; 37792082Seschrock dmu_buf_t *db; 37802082Seschrock 37812082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 37822082Seschrock 37832082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 37842082Seschrock 37852082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 37862082Seschrock KM_SLEEP) == 0); 37872082Seschrock 37882082Seschrock dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); 37892082Seschrock 37902082Seschrock kmem_free(packed, nvsize); 37912082Seschrock 37922082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 37932082Seschrock dmu_buf_will_dirty(db, tx); 37942082Seschrock *(uint64_t *)db->db_data = nvsize; 37952082Seschrock dmu_buf_rele(db, FTAG); 37962082Seschrock } 37972082Seschrock 37982082Seschrock static void 37995450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 38005450Sbrendan const char *config, const char *entry) 38012082Seschrock { 38022082Seschrock nvlist_t *nvroot; 38035450Sbrendan nvlist_t **list; 38042082Seschrock int i; 38052082Seschrock 38065450Sbrendan if (!sav->sav_sync) 38072082Seschrock return; 38082082Seschrock 38092082Seschrock /* 38105450Sbrendan * Update the MOS nvlist describing the list of available devices. 38115450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 38124451Seschrock * valid and the vdevs are labeled appropriately. 38132082Seschrock */ 38145450Sbrendan if (sav->sav_object == 0) { 38155450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 38165450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 38175450Sbrendan sizeof (uint64_t), tx); 38182082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 38195450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 38205450Sbrendan &sav->sav_object, tx) == 0); 38212082Seschrock } 38222082Seschrock 38232082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 38245450Sbrendan if (sav->sav_count == 0) { 38255450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 38262082Seschrock } else { 38275450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 38285450Sbrendan for (i = 0; i < sav->sav_count; i++) 38295450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 38305450Sbrendan B_FALSE, B_FALSE, B_TRUE); 38315450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 38325450Sbrendan sav->sav_count) == 0); 38335450Sbrendan for (i = 0; i < sav->sav_count; i++) 38345450Sbrendan nvlist_free(list[i]); 38355450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 38362082Seschrock } 38372082Seschrock 38385450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 38392926Sek110237 nvlist_free(nvroot); 38402082Seschrock 38415450Sbrendan sav->sav_sync = B_FALSE; 38422082Seschrock } 38432082Seschrock 38442082Seschrock static void 3845789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 3846789Sahrens { 3847789Sahrens nvlist_t *config; 3848789Sahrens 3849789Sahrens if (list_is_empty(&spa->spa_dirty_list)) 3850789Sahrens return; 3851789Sahrens 3852789Sahrens config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); 3853789Sahrens 38541635Sbonwick if (spa->spa_config_syncing) 38551635Sbonwick nvlist_free(spa->spa_config_syncing); 38561635Sbonwick spa->spa_config_syncing = config; 3857789Sahrens 38582082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 3859789Sahrens } 3860789Sahrens 38615094Slling /* 38625094Slling * Set zpool properties. 38635094Slling */ 38643912Slling static void 38654543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 38663912Slling { 38673912Slling spa_t *spa = arg1; 38685094Slling objset_t *mos = spa->spa_meta_objset; 38693912Slling nvlist_t *nvp = arg2; 38705094Slling nvpair_t *elem; 38714451Seschrock uint64_t intval; 38725363Seschrock char *strval, *slash; 38735094Slling zpool_prop_t prop; 38745094Slling const char *propname; 38755094Slling zprop_type_t proptype; 38765094Slling 38775094Slling elem = NULL; 38785094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 38795094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 38805094Slling case ZPOOL_PROP_VERSION: 38815094Slling /* 38825094Slling * Only set version for non-zpool-creation cases 38835094Slling * (set/import). spa_create() needs special care 38845094Slling * for version setting. 38855094Slling */ 38865094Slling if (tx->tx_txg != TXG_INITIAL) { 38875094Slling VERIFY(nvpair_value_uint64(elem, 38885094Slling &intval) == 0); 38895094Slling ASSERT(intval <= SPA_VERSION); 38905094Slling ASSERT(intval >= spa_version(spa)); 38915094Slling spa->spa_uberblock.ub_version = intval; 38925094Slling vdev_config_dirty(spa->spa_root_vdev); 38935094Slling } 38945094Slling break; 38955094Slling 38965094Slling case ZPOOL_PROP_ALTROOT: 38975094Slling /* 38985094Slling * 'altroot' is a non-persistent property. It should 38995094Slling * have been set temporarily at creation or import time. 39005094Slling */ 39015094Slling ASSERT(spa->spa_root != NULL); 39025094Slling break; 39035094Slling 39045363Seschrock case ZPOOL_PROP_CACHEFILE: 39055094Slling /* 39065363Seschrock * 'cachefile' is a non-persistent property, but note 39075363Seschrock * an async request that the config cache needs to be 39085363Seschrock * udpated. 39095094Slling */ 39105363Seschrock VERIFY(nvpair_value_string(elem, &strval) == 0); 39115363Seschrock if (spa->spa_config_dir) 39125363Seschrock spa_strfree(spa->spa_config_dir); 39135363Seschrock if (spa->spa_config_file) 39145363Seschrock spa_strfree(spa->spa_config_file); 39155363Seschrock 39165363Seschrock if (strval[0] == '\0') { 39175363Seschrock spa->spa_config_dir = NULL; 39185363Seschrock spa->spa_config_file = NULL; 39195363Seschrock } else if (strcmp(strval, "none") == 0) { 39205363Seschrock spa->spa_config_dir = spa_strdup(strval); 39215363Seschrock spa->spa_config_file = NULL; 39225363Seschrock } else { 39235621Seschrock /* 39245621Seschrock * If the cachefile is in the root directory, 39255621Seschrock * we will end up with an empty string for 39265621Seschrock * spa_config_dir. This value is only ever 39275621Seschrock * used when concatenated with '/', so an empty 39285621Seschrock * string still behaves correctly and keeps the 39295621Seschrock * rest of the code simple. 39305621Seschrock */ 39315363Seschrock slash = strrchr(strval, '/'); 39325363Seschrock ASSERT(slash != NULL); 39335363Seschrock *slash = '\0'; 39345621Seschrock if (strcmp(strval, spa_config_dir) == 0 && 39355621Seschrock strcmp(slash + 1, ZPOOL_CACHE_FILE) == 0) { 39365621Seschrock spa->spa_config_dir = NULL; 39375621Seschrock spa->spa_config_file = NULL; 39385621Seschrock } else { 39395621Seschrock spa->spa_config_dir = 39405621Seschrock spa_strdup(strval); 39415621Seschrock spa->spa_config_file = 39425621Seschrock spa_strdup(slash + 1); 39435621Seschrock } 39445363Seschrock } 39455363Seschrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 39464543Smarks break; 39475094Slling default: 39485094Slling /* 39495094Slling * Set pool property values in the poolprops mos object. 39505094Slling */ 39515094Slling mutex_enter(&spa->spa_props_lock); 39525094Slling if (spa->spa_pool_props_object == 0) { 39535094Slling objset_t *mos = spa->spa_meta_objset; 39545094Slling 39555094Slling VERIFY((spa->spa_pool_props_object = 39565094Slling zap_create(mos, DMU_OT_POOL_PROPS, 39575094Slling DMU_OT_NONE, 0, tx)) > 0); 39585094Slling 39595094Slling VERIFY(zap_update(mos, 39605094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 39615094Slling 8, 1, &spa->spa_pool_props_object, tx) 39625094Slling == 0); 39635094Slling } 39645094Slling mutex_exit(&spa->spa_props_lock); 39655094Slling 39665094Slling /* normalize the property name */ 39675094Slling propname = zpool_prop_to_name(prop); 39685094Slling proptype = zpool_prop_get_type(prop); 39695094Slling 39705094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 39715094Slling ASSERT(proptype == PROP_TYPE_STRING); 39725094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 39735094Slling VERIFY(zap_update(mos, 39745094Slling spa->spa_pool_props_object, propname, 39755094Slling 1, strlen(strval) + 1, strval, tx) == 0); 39765094Slling 39775094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 39785094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 39795094Slling 39805094Slling if (proptype == PROP_TYPE_INDEX) { 39815094Slling const char *unused; 39825094Slling VERIFY(zpool_prop_index_to_string( 39835094Slling prop, intval, &unused) == 0); 39845094Slling } 39855094Slling VERIFY(zap_update(mos, 39865094Slling spa->spa_pool_props_object, propname, 39875094Slling 8, 1, &intval, tx) == 0); 39885094Slling } else { 39895094Slling ASSERT(0); /* not allowed */ 39905094Slling } 39915094Slling 39925329Sgw25295 switch (prop) { 39935329Sgw25295 case ZPOOL_PROP_DELEGATION: 39945094Slling spa->spa_delegation = intval; 39955329Sgw25295 break; 39965329Sgw25295 case ZPOOL_PROP_BOOTFS: 39975094Slling spa->spa_bootfs = intval; 39985329Sgw25295 break; 39995329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 40005329Sgw25295 spa->spa_failmode = intval; 40015329Sgw25295 break; 40025329Sgw25295 default: 40035329Sgw25295 break; 40045329Sgw25295 } 40053912Slling } 40065094Slling 40075094Slling /* log internal history if this is not a zpool create */ 40085094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 40095094Slling tx->tx_txg != TXG_INITIAL) { 40105094Slling spa_history_internal_log(LOG_POOL_PROPSET, 40115094Slling spa, tx, cr, "%s %lld %s", 40125094Slling nvpair_name(elem), intval, spa->spa_name); 40135094Slling } 40143912Slling } 40153912Slling } 40163912Slling 4017789Sahrens /* 4018789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4019789Sahrens * part of the process, so we iterate until it converges. 4020789Sahrens */ 4021789Sahrens void 4022789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4023789Sahrens { 4024789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4025789Sahrens objset_t *mos = spa->spa_meta_objset; 4026789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 40271635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4028789Sahrens vdev_t *vd; 4029*5688Sbonwick vdev_t *svd[SPA_DVAS_PER_BP]; 4030*5688Sbonwick int svdcount = 0; 4031789Sahrens dmu_tx_t *tx; 4032789Sahrens int dirty_vdevs; 4033789Sahrens 4034789Sahrens /* 4035789Sahrens * Lock out configuration changes. 4036789Sahrens */ 40371544Seschrock spa_config_enter(spa, RW_READER, FTAG); 4038789Sahrens 4039789Sahrens spa->spa_syncing_txg = txg; 4040789Sahrens spa->spa_sync_pass = 0; 4041789Sahrens 40421544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4043789Sahrens 40442082Seschrock tx = dmu_tx_create_assigned(dp, txg); 40452082Seschrock 40462082Seschrock /* 40474577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 40482082Seschrock * set spa_deflate if we have no raid-z vdevs. 40492082Seschrock */ 40504577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 40514577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 40522082Seschrock int i; 40532082Seschrock 40542082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 40552082Seschrock vd = rvd->vdev_child[i]; 40562082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 40572082Seschrock break; 40582082Seschrock } 40592082Seschrock if (i == rvd->vdev_children) { 40602082Seschrock spa->spa_deflate = TRUE; 40612082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 40622082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 40632082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 40642082Seschrock } 40652082Seschrock } 40662082Seschrock 4067789Sahrens /* 4068789Sahrens * If anything has changed in this txg, push the deferred frees 4069789Sahrens * from the previous txg. If not, leave them alone so that we 4070789Sahrens * don't generate work on an otherwise idle system. 4071789Sahrens */ 4072789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 40732329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 40742329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4075789Sahrens spa_sync_deferred_frees(spa, txg); 4076789Sahrens 4077789Sahrens /* 4078789Sahrens * Iterate to convergence. 4079789Sahrens */ 4080789Sahrens do { 4081789Sahrens spa->spa_sync_pass++; 4082789Sahrens 4083789Sahrens spa_sync_config_object(spa, tx); 40845450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 40855450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 40865450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 40875450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 40881544Seschrock spa_errlog_sync(spa, txg); 4089789Sahrens dsl_pool_sync(dp, txg); 4090789Sahrens 4091789Sahrens dirty_vdevs = 0; 4092789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4093789Sahrens vdev_sync(vd, txg); 4094789Sahrens dirty_vdevs++; 4095789Sahrens } 4096789Sahrens 4097789Sahrens bplist_sync(bpl, tx); 4098789Sahrens } while (dirty_vdevs); 4099789Sahrens 4100789Sahrens bplist_close(bpl); 4101789Sahrens 4102789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4103789Sahrens 4104789Sahrens /* 4105789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4106789Sahrens * to commit the transaction group. 41071635Sbonwick * 4108*5688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 4109*5688Sbonwick * random top-level vdevs that are known to be visible in the 4110*5688Sbonwick * config cache (see spa_vdev_add() for details). If there *are* 4111*5688Sbonwick * dirty vdevs -- or if the sync to our random subset fails -- 4112*5688Sbonwick * then sync the uberblock to all vdevs. 4113789Sahrens */ 4114*5688Sbonwick if (list_is_empty(&spa->spa_dirty_list)) { 41151635Sbonwick int children = rvd->vdev_children; 41161635Sbonwick int c0 = spa_get_random(children); 41171635Sbonwick int c; 41181635Sbonwick 41191635Sbonwick for (c = 0; c < children; c++) { 41201635Sbonwick vd = rvd->vdev_child[(c0 + c) % children]; 4121*5688Sbonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 41221635Sbonwick continue; 4123*5688Sbonwick svd[svdcount++] = vd; 4124*5688Sbonwick if (svdcount == SPA_DVAS_PER_BP) 41251635Sbonwick break; 41261635Sbonwick } 41271635Sbonwick } 4128*5688Sbonwick if (svdcount == 0 || vdev_config_sync(svd, svdcount, txg) != 0) 4129*5688Sbonwick VERIFY3U(vdev_config_sync(rvd->vdev_child, 4130*5688Sbonwick rvd->vdev_children, txg), ==, 0); 41311635Sbonwick 41322082Seschrock dmu_tx_commit(tx); 41332082Seschrock 41341635Sbonwick /* 41351635Sbonwick * Clear the dirty config list. 41361635Sbonwick */ 41371635Sbonwick while ((vd = list_head(&spa->spa_dirty_list)) != NULL) 41381635Sbonwick vdev_config_clean(vd); 41391635Sbonwick 41401635Sbonwick /* 41411635Sbonwick * Now that the new config has synced transactionally, 41421635Sbonwick * let it become visible to the config cache. 41431635Sbonwick */ 41441635Sbonwick if (spa->spa_config_syncing != NULL) { 41451635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 41461635Sbonwick spa->spa_config_txg = txg; 41471635Sbonwick spa->spa_config_syncing = NULL; 41481635Sbonwick } 4149789Sahrens 4150789Sahrens /* 4151789Sahrens * Make a stable copy of the fully synced uberblock. 4152789Sahrens * We use this as the root for pool traversals. 4153789Sahrens */ 4154789Sahrens spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ 4155789Sahrens 4156789Sahrens spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ 4157789Sahrens 4158789Sahrens rw_enter(&spa->spa_traverse_lock, RW_WRITER); 4159789Sahrens spa->spa_traverse_wanted = 0; 4160789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4161789Sahrens rw_exit(&spa->spa_traverse_lock); 4162789Sahrens 4163789Sahrens spa_scrub_resume(spa); /* resume scrub with new ubsync */ 4164789Sahrens 4165789Sahrens /* 4166789Sahrens * Clean up the ZIL records for the synced txg. 4167789Sahrens */ 4168789Sahrens dsl_pool_zil_clean(dp); 4169789Sahrens 4170789Sahrens /* 4171789Sahrens * Update usable space statistics. 4172789Sahrens */ 4173789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4174789Sahrens vdev_sync_done(vd, txg); 4175789Sahrens 4176789Sahrens /* 4177789Sahrens * It had better be the case that we didn't dirty anything 41782082Seschrock * since vdev_config_sync(). 4179789Sahrens */ 4180789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4181789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4182789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4183789Sahrens ASSERT(bpl->bpl_queue == NULL); 4184789Sahrens 41851544Seschrock spa_config_exit(spa, FTAG); 41861544Seschrock 41871544Seschrock /* 41881544Seschrock * If any async tasks have been requested, kick them off. 41891544Seschrock */ 41901544Seschrock spa_async_dispatch(spa); 4191789Sahrens } 4192789Sahrens 4193789Sahrens /* 4194789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4195789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4196789Sahrens * sync. 4197789Sahrens */ 4198789Sahrens void 4199789Sahrens spa_sync_allpools(void) 4200789Sahrens { 4201789Sahrens spa_t *spa = NULL; 4202789Sahrens mutex_enter(&spa_namespace_lock); 4203789Sahrens while ((spa = spa_next(spa)) != NULL) { 4204789Sahrens if (spa_state(spa) != POOL_STATE_ACTIVE) 4205789Sahrens continue; 4206789Sahrens spa_open_ref(spa, FTAG); 4207789Sahrens mutex_exit(&spa_namespace_lock); 4208789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4209789Sahrens mutex_enter(&spa_namespace_lock); 4210789Sahrens spa_close(spa, FTAG); 4211789Sahrens } 4212789Sahrens mutex_exit(&spa_namespace_lock); 4213789Sahrens } 4214789Sahrens 4215789Sahrens /* 4216789Sahrens * ========================================================================== 4217789Sahrens * Miscellaneous routines 4218789Sahrens * ========================================================================== 4219789Sahrens */ 4220789Sahrens 4221789Sahrens /* 4222789Sahrens * Remove all pools in the system. 4223789Sahrens */ 4224789Sahrens void 4225789Sahrens spa_evict_all(void) 4226789Sahrens { 4227789Sahrens spa_t *spa; 4228789Sahrens 4229789Sahrens /* 4230789Sahrens * Remove all cached state. All pools should be closed now, 4231789Sahrens * so every spa in the AVL tree should be unreferenced. 4232789Sahrens */ 4233789Sahrens mutex_enter(&spa_namespace_lock); 4234789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4235789Sahrens /* 42361544Seschrock * Stop async tasks. The async thread may need to detach 42371544Seschrock * a device that's been replaced, which requires grabbing 42381544Seschrock * spa_namespace_lock, so we must drop it here. 4239789Sahrens */ 4240789Sahrens spa_open_ref(spa, FTAG); 4241789Sahrens mutex_exit(&spa_namespace_lock); 42421544Seschrock spa_async_suspend(spa); 42434808Sek110237 mutex_enter(&spa_namespace_lock); 4244789Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); 4245789Sahrens spa_close(spa, FTAG); 4246789Sahrens 4247789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4248789Sahrens spa_unload(spa); 4249789Sahrens spa_deactivate(spa); 4250789Sahrens } 4251789Sahrens spa_remove(spa); 4252789Sahrens } 4253789Sahrens mutex_exit(&spa_namespace_lock); 4254789Sahrens } 42551544Seschrock 42561544Seschrock vdev_t * 42571544Seschrock spa_lookup_by_guid(spa_t *spa, uint64_t guid) 42581544Seschrock { 42591544Seschrock return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); 42601544Seschrock } 42611760Seschrock 42621760Seschrock void 42635094Slling spa_upgrade(spa_t *spa, uint64_t version) 42641760Seschrock { 42651760Seschrock spa_config_enter(spa, RW_WRITER, FTAG); 42661760Seschrock 42671760Seschrock /* 42681760Seschrock * This should only be called for a non-faulted pool, and since a 42691760Seschrock * future version would result in an unopenable pool, this shouldn't be 42701760Seschrock * possible. 42711760Seschrock */ 42724577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 42735094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 42745094Slling 42755094Slling spa->spa_uberblock.ub_version = version; 42761760Seschrock vdev_config_dirty(spa->spa_root_vdev); 42771760Seschrock 42781760Seschrock spa_config_exit(spa, FTAG); 42792082Seschrock 42802082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 42811760Seschrock } 42822082Seschrock 42832082Seschrock boolean_t 42842082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 42852082Seschrock { 42862082Seschrock int i; 42873377Seschrock uint64_t spareguid; 42885450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 42895450Sbrendan 42905450Sbrendan for (i = 0; i < sav->sav_count; i++) 42915450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 42922082Seschrock return (B_TRUE); 42932082Seschrock 42945450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 42955450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 42965450Sbrendan &spareguid) == 0 && spareguid == guid) 42973377Seschrock return (B_TRUE); 42983377Seschrock } 42993377Seschrock 43002082Seschrock return (B_FALSE); 43012082Seschrock } 43023912Slling 43034451Seschrock /* 43044451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 43054451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 43064451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 43074451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 43084451Seschrock * or zdb as real changes. 43094451Seschrock */ 43104451Seschrock void 43114451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 43124451Seschrock { 43134451Seschrock #ifdef _KERNEL 43144451Seschrock sysevent_t *ev; 43154451Seschrock sysevent_attr_list_t *attr = NULL; 43164451Seschrock sysevent_value_t value; 43174451Seschrock sysevent_id_t eid; 43184451Seschrock 43194451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 43204451Seschrock SE_SLEEP); 43214451Seschrock 43224451Seschrock value.value_type = SE_DATA_TYPE_STRING; 43234451Seschrock value.value.sv_string = spa_name(spa); 43244451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 43254451Seschrock goto done; 43264451Seschrock 43274451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 43284451Seschrock value.value.sv_uint64 = spa_guid(spa); 43294451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 43304451Seschrock goto done; 43314451Seschrock 43324451Seschrock if (vd) { 43334451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 43344451Seschrock value.value.sv_uint64 = vd->vdev_guid; 43354451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 43364451Seschrock SE_SLEEP) != 0) 43374451Seschrock goto done; 43384451Seschrock 43394451Seschrock if (vd->vdev_path) { 43404451Seschrock value.value_type = SE_DATA_TYPE_STRING; 43414451Seschrock value.value.sv_string = vd->vdev_path; 43424451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 43434451Seschrock &value, SE_SLEEP) != 0) 43444451Seschrock goto done; 43454451Seschrock } 43464451Seschrock } 43474451Seschrock 43484451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 43494451Seschrock 43504451Seschrock done: 43514451Seschrock if (attr) 43524451Seschrock sysevent_free_attr(attr); 43534451Seschrock sysevent_free(ev); 43544451Seschrock #endif 43554451Seschrock } 4356