1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/zio_compress.h> 39789Sahrens #include <sys/dmu.h> 40789Sahrens #include <sys/dmu_tx.h> 41789Sahrens #include <sys/zap.h> 42789Sahrens #include <sys/zil.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 45789Sahrens #include <sys/uberblock_impl.h> 46789Sahrens #include <sys/txg.h> 47789Sahrens #include <sys/avl.h> 48789Sahrens #include <sys/dmu_traverse.h> 493912Slling #include <sys/dmu_objset.h> 50789Sahrens #include <sys/unique.h> 51789Sahrens #include <sys/dsl_pool.h> 523912Slling #include <sys/dsl_dataset.h> 53789Sahrens #include <sys/dsl_dir.h> 54789Sahrens #include <sys/dsl_prop.h> 553912Slling #include <sys/dsl_synctask.h> 56789Sahrens #include <sys/fs/zfs.h> 575450Sbrendan #include <sys/arc.h> 58789Sahrens #include <sys/callb.h> 593975Sek110237 #include <sys/systeminfo.h> 603975Sek110237 #include <sys/sunddi.h> 616423Sgw25295 #include <sys/spa_boot.h> 62789Sahrens 638662SJordan.Vaughan@Sun.com #ifdef _KERNEL 648662SJordan.Vaughan@Sun.com #include <sys/zone.h> 658662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 668662SJordan.Vaughan@Sun.com 675094Slling #include "zfs_prop.h" 685913Sperrin #include "zfs_comutil.h" 695094Slling 709515SJonathan.Adams@Sun.COM enum zti_modes { 719515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 729515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 739515SJonathan.Adams@Sun.COM zti_mode_tune, /* fill from zio_taskq_tune_* */ 749515SJonathan.Adams@Sun.COM zti_nmodes 757754SJeff.Bonwick@Sun.COM }; 762986Sek110237 779515SJonathan.Adams@Sun.COM #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 789515SJonathan.Adams@Sun.COM #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 799515SJonathan.Adams@Sun.COM #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 809515SJonathan.Adams@Sun.COM 819515SJonathan.Adams@Sun.COM #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 829515SJonathan.Adams@Sun.COM 839515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 849515SJonathan.Adams@Sun.COM const char *zti_name; 859515SJonathan.Adams@Sun.COM struct { 869515SJonathan.Adams@Sun.COM enum zti_modes zti_mode; 879515SJonathan.Adams@Sun.COM uint_t zti_value; 889515SJonathan.Adams@Sun.COM } zti_nthreads[ZIO_TASKQ_TYPES]; 899515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 909515SJonathan.Adams@Sun.COM 919515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 929515SJonathan.Adams@Sun.COM "issue", "intr" 939515SJonathan.Adams@Sun.COM }; 949515SJonathan.Adams@Sun.COM 959515SJonathan.Adams@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 969515SJonathan.Adams@Sun.COM /* ISSUE INTR */ 979515SJonathan.Adams@Sun.COM { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 989515SJonathan.Adams@Sun.COM { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 999515SJonathan.Adams@Sun.COM { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1009515SJonathan.Adams@Sun.COM { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1019515SJonathan.Adams@Sun.COM { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1029515SJonathan.Adams@Sun.COM { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1039515SJonathan.Adams@Sun.COM }; 1049515SJonathan.Adams@Sun.COM 1059515SJonathan.Adams@Sun.COM enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1069515SJonathan.Adams@Sun.COM uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1079515SJonathan.Adams@Sun.COM 1085094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1097214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1105094Slling 1115094Slling /* 1125094Slling * ========================================================================== 1135094Slling * SPA properties routines 1145094Slling * ========================================================================== 1155094Slling */ 1165094Slling 1175094Slling /* 1185094Slling * Add a (source=src, propname=propval) list to an nvlist. 1195094Slling */ 1205949Slling static void 1215094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1225094Slling uint64_t intval, zprop_source_t src) 1235094Slling { 1245094Slling const char *propname = zpool_prop_to_name(prop); 1255094Slling nvlist_t *propval; 1265949Slling 1275949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1285949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1295949Slling 1305949Slling if (strval != NULL) 1315949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1325949Slling else 1335949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1345949Slling 1355949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1365094Slling nvlist_free(propval); 1375094Slling } 1385094Slling 1395094Slling /* 1405094Slling * Get property values from the spa configuration. 1415094Slling */ 1425949Slling static void 1435094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1445094Slling { 1458525SEric.Schrock@Sun.COM uint64_t size; 1468525SEric.Schrock@Sun.COM uint64_t used; 1475094Slling uint64_t cap, version; 1485094Slling zprop_source_t src = ZPROP_SRC_NONE; 1496643Seschrock spa_config_dirent_t *dp; 1505094Slling 1517754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1527754SJeff.Bonwick@Sun.COM 1538525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 1548525SEric.Schrock@Sun.COM size = spa_get_space(spa); 1558525SEric.Schrock@Sun.COM used = spa_get_alloc(spa); 1568525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1578525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 1588525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 1598525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 1608525SEric.Schrock@Sun.COM size - used, src); 1618525SEric.Schrock@Sun.COM 1628525SEric.Schrock@Sun.COM cap = (size == 0) ? 0 : (used * 100 / size); 1638525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1648525SEric.Schrock@Sun.COM 1658525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1668525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1678525SEric.Schrock@Sun.COM 1688525SEric.Schrock@Sun.COM version = spa_version(spa); 1698525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1708525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1718525SEric.Schrock@Sun.COM else 1728525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1738525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1748525SEric.Schrock@Sun.COM } 1755949Slling 1765949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1775949Slling 1785949Slling if (spa->spa_root != NULL) 1795949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1805949Slling 0, ZPROP_SRC_LOCAL); 1815094Slling 1826643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 1836643Seschrock if (dp->scd_path == NULL) { 1845949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1856643Seschrock "none", 0, ZPROP_SRC_LOCAL); 1866643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1875949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1886643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1895363Seschrock } 1905363Seschrock } 1915094Slling } 1925094Slling 1935094Slling /* 1945094Slling * Get zpool property values. 1955094Slling */ 1965094Slling int 1975094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 1985094Slling { 1995094Slling zap_cursor_t zc; 2005094Slling zap_attribute_t za; 2015094Slling objset_t *mos = spa->spa_meta_objset; 2025094Slling int err; 2035094Slling 2045949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2055094Slling 2067754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2077754SJeff.Bonwick@Sun.COM 2085094Slling /* 2095094Slling * Get properties from the spa config. 2105094Slling */ 2115949Slling spa_prop_get_config(spa, nvp); 2125094Slling 2135094Slling /* If no pool property object, no more prop to get. */ 2145094Slling if (spa->spa_pool_props_object == 0) { 2155094Slling mutex_exit(&spa->spa_props_lock); 2165094Slling return (0); 2175094Slling } 2185094Slling 2195094Slling /* 2205094Slling * Get properties from the MOS pool property object. 2215094Slling */ 2225094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2235094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2245094Slling zap_cursor_advance(&zc)) { 2255094Slling uint64_t intval = 0; 2265094Slling char *strval = NULL; 2275094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2285094Slling zpool_prop_t prop; 2295094Slling 2305094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2315094Slling continue; 2325094Slling 2335094Slling switch (za.za_integer_length) { 2345094Slling case 8: 2355094Slling /* integer property */ 2365094Slling if (za.za_first_integer != 2375094Slling zpool_prop_default_numeric(prop)) 2385094Slling src = ZPROP_SRC_LOCAL; 2395094Slling 2405094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2415094Slling dsl_pool_t *dp; 2425094Slling dsl_dataset_t *ds = NULL; 2435094Slling 2445094Slling dp = spa_get_dsl(spa); 2455094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2466689Smaybee if (err = dsl_dataset_hold_obj(dp, 2476689Smaybee za.za_first_integer, FTAG, &ds)) { 2485094Slling rw_exit(&dp->dp_config_rwlock); 2495094Slling break; 2505094Slling } 2515094Slling 2525094Slling strval = kmem_alloc( 2535094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2545094Slling KM_SLEEP); 2555094Slling dsl_dataset_name(ds, strval); 2566689Smaybee dsl_dataset_rele(ds, FTAG); 2575094Slling rw_exit(&dp->dp_config_rwlock); 2585094Slling } else { 2595094Slling strval = NULL; 2605094Slling intval = za.za_first_integer; 2615094Slling } 2625094Slling 2635949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2645094Slling 2655094Slling if (strval != NULL) 2665094Slling kmem_free(strval, 2675094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2685094Slling 2695094Slling break; 2705094Slling 2715094Slling case 1: 2725094Slling /* string property */ 2735094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2745094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2755094Slling za.za_name, 1, za.za_num_integers, strval); 2765094Slling if (err) { 2775094Slling kmem_free(strval, za.za_num_integers); 2785094Slling break; 2795094Slling } 2805949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2815094Slling kmem_free(strval, za.za_num_integers); 2825094Slling break; 2835094Slling 2845094Slling default: 2855094Slling break; 2865094Slling } 2875094Slling } 2885094Slling zap_cursor_fini(&zc); 2895094Slling mutex_exit(&spa->spa_props_lock); 2905094Slling out: 2915094Slling if (err && err != ENOENT) { 2925094Slling nvlist_free(*nvp); 2935949Slling *nvp = NULL; 2945094Slling return (err); 2955094Slling } 2965094Slling 2975094Slling return (0); 2985094Slling } 2995094Slling 3005094Slling /* 3015094Slling * Validate the given pool properties nvlist and modify the list 3025094Slling * for the property values to be set. 3035094Slling */ 3045094Slling static int 3055094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3065094Slling { 3075094Slling nvpair_t *elem; 3085094Slling int error = 0, reset_bootfs = 0; 3095094Slling uint64_t objnum; 3105094Slling 3115094Slling elem = NULL; 3125094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3135094Slling zpool_prop_t prop; 3145094Slling char *propname, *strval; 3155094Slling uint64_t intval; 3165094Slling objset_t *os; 3175363Seschrock char *slash; 3185094Slling 3195094Slling propname = nvpair_name(elem); 3205094Slling 3215094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3225094Slling return (EINVAL); 3235094Slling 3245094Slling switch (prop) { 3255094Slling case ZPOOL_PROP_VERSION: 3265094Slling error = nvpair_value_uint64(elem, &intval); 3275094Slling if (!error && 3285094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3295094Slling error = EINVAL; 3305094Slling break; 3315094Slling 3325094Slling case ZPOOL_PROP_DELEGATION: 3335094Slling case ZPOOL_PROP_AUTOREPLACE: 3347538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3355094Slling error = nvpair_value_uint64(elem, &intval); 3365094Slling if (!error && intval > 1) 3375094Slling error = EINVAL; 3385094Slling break; 3395094Slling 3405094Slling case ZPOOL_PROP_BOOTFS: 3419630SJeff.Bonwick@Sun.COM /* 3429630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3439630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3449630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3459630SJeff.Bonwick@Sun.COM */ 3465094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3475094Slling error = ENOTSUP; 3485094Slling break; 3495094Slling } 3505094Slling 3515094Slling /* 3527042Sgw25295 * Make sure the vdev config is bootable 3535094Slling */ 3547042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3555094Slling error = ENOTSUP; 3565094Slling break; 3575094Slling } 3585094Slling 3595094Slling reset_bootfs = 1; 3605094Slling 3615094Slling error = nvpair_value_string(elem, &strval); 3625094Slling 3635094Slling if (!error) { 3647042Sgw25295 uint64_t compress; 3657042Sgw25295 3665094Slling if (strval == NULL || strval[0] == '\0') { 3675094Slling objnum = zpool_prop_default_numeric( 3685094Slling ZPOOL_PROP_BOOTFS); 3695094Slling break; 3705094Slling } 3715094Slling 3725094Slling if (error = dmu_objset_open(strval, DMU_OST_ZFS, 3736689Smaybee DS_MODE_USER | DS_MODE_READONLY, &os)) 3745094Slling break; 3757042Sgw25295 3767042Sgw25295 /* We don't support gzip bootable datasets */ 3777042Sgw25295 if ((error = dsl_prop_get_integer(strval, 3787042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 3797042Sgw25295 &compress, NULL)) == 0 && 3807042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 3817042Sgw25295 error = ENOTSUP; 3827042Sgw25295 } else { 3837042Sgw25295 objnum = dmu_objset_id(os); 3847042Sgw25295 } 3855094Slling dmu_objset_close(os); 3865094Slling } 3875094Slling break; 3887754SJeff.Bonwick@Sun.COM 3895329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3905329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3915329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3925329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3935329Sgw25295 error = EINVAL; 3945329Sgw25295 3955329Sgw25295 /* 3965329Sgw25295 * This is a special case which only occurs when 3975329Sgw25295 * the pool has completely failed. This allows 3985329Sgw25295 * the user to change the in-core failmode property 3995329Sgw25295 * without syncing it out to disk (I/Os might 4005329Sgw25295 * currently be blocked). We do this by returning 4015329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4025329Sgw25295 * into thinking we encountered a property validation 4035329Sgw25295 * error. 4045329Sgw25295 */ 4057754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4065329Sgw25295 spa->spa_failmode = intval; 4075329Sgw25295 error = EIO; 4085329Sgw25295 } 4095329Sgw25295 break; 4105363Seschrock 4115363Seschrock case ZPOOL_PROP_CACHEFILE: 4125363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4135363Seschrock break; 4145363Seschrock 4155363Seschrock if (strval[0] == '\0') 4165363Seschrock break; 4175363Seschrock 4185363Seschrock if (strcmp(strval, "none") == 0) 4195363Seschrock break; 4205363Seschrock 4215363Seschrock if (strval[0] != '/') { 4225363Seschrock error = EINVAL; 4235363Seschrock break; 4245363Seschrock } 4255363Seschrock 4265363Seschrock slash = strrchr(strval, '/'); 4275363Seschrock ASSERT(slash != NULL); 4285363Seschrock 4295363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4305363Seschrock strcmp(slash, "/..") == 0) 4315363Seschrock error = EINVAL; 4325363Seschrock break; 4335094Slling } 4345094Slling 4355094Slling if (error) 4365094Slling break; 4375094Slling } 4385094Slling 4395094Slling if (!error && reset_bootfs) { 4405094Slling error = nvlist_remove(props, 4415094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4425094Slling 4435094Slling if (!error) { 4445094Slling error = nvlist_add_uint64(props, 4455094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4465094Slling } 4475094Slling } 4485094Slling 4495094Slling return (error); 4505094Slling } 4515094Slling 4528525SEric.Schrock@Sun.COM void 4538525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4548525SEric.Schrock@Sun.COM { 4558525SEric.Schrock@Sun.COM char *cachefile; 4568525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4578525SEric.Schrock@Sun.COM 4588525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4598525SEric.Schrock@Sun.COM &cachefile) != 0) 4608525SEric.Schrock@Sun.COM return; 4618525SEric.Schrock@Sun.COM 4628525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4638525SEric.Schrock@Sun.COM KM_SLEEP); 4648525SEric.Schrock@Sun.COM 4658525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 4668525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 4678525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 4688525SEric.Schrock@Sun.COM dp->scd_path = NULL; 4698525SEric.Schrock@Sun.COM else 4708525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 4718525SEric.Schrock@Sun.COM 4728525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 4738525SEric.Schrock@Sun.COM if (need_sync) 4748525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 4758525SEric.Schrock@Sun.COM } 4768525SEric.Schrock@Sun.COM 4775094Slling int 4785094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4795094Slling { 4805094Slling int error; 4818525SEric.Schrock@Sun.COM nvpair_t *elem; 4828525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 4838525SEric.Schrock@Sun.COM zpool_prop_t prop; 4845094Slling 4855094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4865094Slling return (error); 4875094Slling 4888525SEric.Schrock@Sun.COM elem = NULL; 4898525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 4908525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 4918525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 4928525SEric.Schrock@Sun.COM return (EINVAL); 4938525SEric.Schrock@Sun.COM 4948525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 4958525SEric.Schrock@Sun.COM continue; 4968525SEric.Schrock@Sun.COM 4978525SEric.Schrock@Sun.COM need_sync = B_TRUE; 4988525SEric.Schrock@Sun.COM break; 4998525SEric.Schrock@Sun.COM } 5008525SEric.Schrock@Sun.COM 5018525SEric.Schrock@Sun.COM if (need_sync) 5028525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5038525SEric.Schrock@Sun.COM spa, nvp, 3)); 5048525SEric.Schrock@Sun.COM else 5058525SEric.Schrock@Sun.COM return (0); 5065094Slling } 5075094Slling 5085094Slling /* 5095094Slling * If the bootfs property value is dsobj, clear it. 5105094Slling */ 5115094Slling void 5125094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5135094Slling { 5145094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5155094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5165094Slling spa->spa_pool_props_object, 5175094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5185094Slling spa->spa_bootfs = 0; 5195094Slling } 5205094Slling } 5215094Slling 522789Sahrens /* 523789Sahrens * ========================================================================== 524789Sahrens * SPA state manipulation (open/create/destroy/import/export) 525789Sahrens * ========================================================================== 526789Sahrens */ 527789Sahrens 5281544Seschrock static int 5291544Seschrock spa_error_entry_compare(const void *a, const void *b) 5301544Seschrock { 5311544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5321544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5331544Seschrock int ret; 5341544Seschrock 5351544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5361544Seschrock sizeof (zbookmark_t)); 5371544Seschrock 5381544Seschrock if (ret < 0) 5391544Seschrock return (-1); 5401544Seschrock else if (ret > 0) 5411544Seschrock return (1); 5421544Seschrock else 5431544Seschrock return (0); 5441544Seschrock } 5451544Seschrock 5461544Seschrock /* 5471544Seschrock * Utility function which retrieves copies of the current logs and 5481544Seschrock * re-initializes them in the process. 5491544Seschrock */ 5501544Seschrock void 5511544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5521544Seschrock { 5531544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5541544Seschrock 5551544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5561544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5571544Seschrock 5581544Seschrock avl_create(&spa->spa_errlist_scrub, 5591544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5601544Seschrock offsetof(spa_error_entry_t, se_avl)); 5611544Seschrock avl_create(&spa->spa_errlist_last, 5621544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5631544Seschrock offsetof(spa_error_entry_t, se_avl)); 5641544Seschrock } 5651544Seschrock 566789Sahrens /* 567789Sahrens * Activate an uninitialized pool. 568789Sahrens */ 569789Sahrens static void 5708241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 571789Sahrens { 572789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 573789Sahrens 574789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5758241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 576789Sahrens 5779480SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops); 5789480SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops); 579789Sahrens 5807754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 5819515SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t]; 5827754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 5839515SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 5849515SJonathan.Adams@Sun.COM uint_t value = ztip->zti_nthreads[q].zti_value; 5859515SJonathan.Adams@Sun.COM char name[32]; 5869515SJonathan.Adams@Sun.COM 5879515SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 5889515SJonathan.Adams@Sun.COM "%s_%s", ztip->zti_name, zio_taskq_types[q]); 5899515SJonathan.Adams@Sun.COM 5909515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) { 5919515SJonathan.Adams@Sun.COM mode = zio_taskq_tune_mode; 5929515SJonathan.Adams@Sun.COM value = zio_taskq_tune_value; 5939515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) 5949515SJonathan.Adams@Sun.COM mode = zti_mode_online_percent; 5959515SJonathan.Adams@Sun.COM } 5969515SJonathan.Adams@Sun.COM 5979515SJonathan.Adams@Sun.COM switch (mode) { 5989515SJonathan.Adams@Sun.COM case zti_mode_fixed: 5999515SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 6009515SJonathan.Adams@Sun.COM value = MAX(value, 1); 6019515SJonathan.Adams@Sun.COM 6029515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6039515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6049515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE); 6059515SJonathan.Adams@Sun.COM break; 6069515SJonathan.Adams@Sun.COM 6079515SJonathan.Adams@Sun.COM case zti_mode_online_percent: 6089515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6099515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6109515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6119515SJonathan.Adams@Sun.COM break; 6129515SJonathan.Adams@Sun.COM 6139515SJonathan.Adams@Sun.COM case zti_mode_tune: 6149515SJonathan.Adams@Sun.COM default: 6159515SJonathan.Adams@Sun.COM panic("unrecognized mode for " 6169515SJonathan.Adams@Sun.COM "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6179515SJonathan.Adams@Sun.COM "in spa_activate()", 6189515SJonathan.Adams@Sun.COM t, q, mode, value); 6199515SJonathan.Adams@Sun.COM break; 6209515SJonathan.Adams@Sun.COM } 6217754SJeff.Bonwick@Sun.COM } 622789Sahrens } 623789Sahrens 6247754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 6257754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 6267754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 6277754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 628789Sahrens 629789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 630789Sahrens offsetof(struct vdev, vdev_txg_node)); 6311544Seschrock 6321544Seschrock avl_create(&spa->spa_errlist_scrub, 6331544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6341544Seschrock offsetof(spa_error_entry_t, se_avl)); 6351544Seschrock avl_create(&spa->spa_errlist_last, 6361544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6371544Seschrock offsetof(spa_error_entry_t, se_avl)); 638789Sahrens } 639789Sahrens 640789Sahrens /* 641789Sahrens * Opposite of spa_activate(). 642789Sahrens */ 643789Sahrens static void 644789Sahrens spa_deactivate(spa_t *spa) 645789Sahrens { 646789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 647789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 648789Sahrens ASSERT(spa->spa_root_vdev == NULL); 6499630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 650789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 651789Sahrens 652789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 653789Sahrens 6547754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 6557754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 6567754SJeff.Bonwick@Sun.COM 6577754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 6587754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6597754SJeff.Bonwick@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 6607754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 6617754SJeff.Bonwick@Sun.COM } 662789Sahrens } 663789Sahrens 664789Sahrens metaslab_class_destroy(spa->spa_normal_class); 665789Sahrens spa->spa_normal_class = NULL; 666789Sahrens 6674527Sperrin metaslab_class_destroy(spa->spa_log_class); 6684527Sperrin spa->spa_log_class = NULL; 6694527Sperrin 6701544Seschrock /* 6711544Seschrock * If this was part of an import or the open otherwise failed, we may 6721544Seschrock * still have errors left in the queues. Empty them just in case. 6731544Seschrock */ 6741544Seschrock spa_errlog_drain(spa); 6751544Seschrock 6761544Seschrock avl_destroy(&spa->spa_errlist_scrub); 6771544Seschrock avl_destroy(&spa->spa_errlist_last); 6781544Seschrock 679789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 680789Sahrens } 681789Sahrens 682789Sahrens /* 683789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 684789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 685789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 686789Sahrens * All vdev validation is done by the vdev_alloc() routine. 687789Sahrens */ 6882082Seschrock static int 6892082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 6902082Seschrock uint_t id, int atype) 691789Sahrens { 692789Sahrens nvlist_t **child; 693789Sahrens uint_t c, children; 6942082Seschrock int error; 6952082Seschrock 6962082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 6972082Seschrock return (error); 6982082Seschrock 6992082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 7002082Seschrock return (0); 701789Sahrens 7027754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 7037754SJeff.Bonwick@Sun.COM &child, &children); 7047754SJeff.Bonwick@Sun.COM 7057754SJeff.Bonwick@Sun.COM if (error == ENOENT) 7067754SJeff.Bonwick@Sun.COM return (0); 7077754SJeff.Bonwick@Sun.COM 7087754SJeff.Bonwick@Sun.COM if (error) { 7092082Seschrock vdev_free(*vdp); 7102082Seschrock *vdp = NULL; 7112082Seschrock return (EINVAL); 712789Sahrens } 713789Sahrens 714789Sahrens for (c = 0; c < children; c++) { 7152082Seschrock vdev_t *vd; 7162082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 7172082Seschrock atype)) != 0) { 7182082Seschrock vdev_free(*vdp); 7192082Seschrock *vdp = NULL; 7202082Seschrock return (error); 721789Sahrens } 722789Sahrens } 723789Sahrens 7242082Seschrock ASSERT(*vdp != NULL); 7252082Seschrock 7262082Seschrock return (0); 727789Sahrens } 728789Sahrens 729789Sahrens /* 730789Sahrens * Opposite of spa_load(). 731789Sahrens */ 732789Sahrens static void 733789Sahrens spa_unload(spa_t *spa) 734789Sahrens { 7352082Seschrock int i; 7362082Seschrock 7377754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 7387754SJeff.Bonwick@Sun.COM 739789Sahrens /* 7401544Seschrock * Stop async tasks. 7411544Seschrock */ 7421544Seschrock spa_async_suspend(spa); 7431544Seschrock 7441544Seschrock /* 745789Sahrens * Stop syncing. 746789Sahrens */ 747789Sahrens if (spa->spa_sync_on) { 748789Sahrens txg_sync_stop(spa->spa_dsl_pool); 749789Sahrens spa->spa_sync_on = B_FALSE; 750789Sahrens } 751789Sahrens 752789Sahrens /* 7537754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 754789Sahrens */ 7559234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 7569234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 7579234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 7589234SGeorge.Wilson@Sun.COM } 759789Sahrens 760789Sahrens /* 761789Sahrens * Close the dsl pool. 762789Sahrens */ 763789Sahrens if (spa->spa_dsl_pool) { 764789Sahrens dsl_pool_close(spa->spa_dsl_pool); 765789Sahrens spa->spa_dsl_pool = NULL; 766789Sahrens } 767789Sahrens 7688241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7698241SJeff.Bonwick@Sun.COM 7708241SJeff.Bonwick@Sun.COM /* 7718241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 7728241SJeff.Bonwick@Sun.COM */ 7738241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 7748241SJeff.Bonwick@Sun.COM 775789Sahrens /* 776789Sahrens * Close all vdevs. 777789Sahrens */ 7781585Sbonwick if (spa->spa_root_vdev) 779789Sahrens vdev_free(spa->spa_root_vdev); 7801585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 7811544Seschrock 7825450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7835450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 7845450Sbrendan if (spa->spa_spares.sav_vdevs) { 7855450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7865450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7875450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7885450Sbrendan } 7895450Sbrendan if (spa->spa_spares.sav_config) { 7905450Sbrendan nvlist_free(spa->spa_spares.sav_config); 7915450Sbrendan spa->spa_spares.sav_config = NULL; 7922082Seschrock } 7937377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 7945450Sbrendan 7955450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 7965450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 7975450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 7985450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 7995450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 8005450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 8015450Sbrendan } 8025450Sbrendan if (spa->spa_l2cache.sav_config) { 8035450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 8045450Sbrendan spa->spa_l2cache.sav_config = NULL; 8052082Seschrock } 8067377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 8072082Seschrock 8081544Seschrock spa->spa_async_suspended = 0; 8098241SJeff.Bonwick@Sun.COM 8108241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 811789Sahrens } 812789Sahrens 813789Sahrens /* 8142082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 8152082Seschrock * this pool. When this is called, we have some form of basic information in 8165450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 8175450Sbrendan * then re-generate a more complete list including status information. 8182082Seschrock */ 8192082Seschrock static void 8202082Seschrock spa_load_spares(spa_t *spa) 8212082Seschrock { 8222082Seschrock nvlist_t **spares; 8232082Seschrock uint_t nspares; 8242082Seschrock int i; 8253377Seschrock vdev_t *vd, *tvd; 8262082Seschrock 8277754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 8287754SJeff.Bonwick@Sun.COM 8292082Seschrock /* 8302082Seschrock * First, close and free any existing spare vdevs. 8312082Seschrock */ 8325450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8335450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 8343377Seschrock 8353377Seschrock /* Undo the call to spa_activate() below */ 8366643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8376643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 8383377Seschrock spa_spare_remove(tvd); 8393377Seschrock vdev_close(vd); 8403377Seschrock vdev_free(vd); 8412082Seschrock } 8423377Seschrock 8435450Sbrendan if (spa->spa_spares.sav_vdevs) 8445450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8455450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8465450Sbrendan 8475450Sbrendan if (spa->spa_spares.sav_config == NULL) 8482082Seschrock nspares = 0; 8492082Seschrock else 8505450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 8512082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8522082Seschrock 8535450Sbrendan spa->spa_spares.sav_count = (int)nspares; 8545450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8552082Seschrock 8562082Seschrock if (nspares == 0) 8572082Seschrock return; 8582082Seschrock 8592082Seschrock /* 8602082Seschrock * Construct the array of vdevs, opening them to get status in the 8613377Seschrock * process. For each spare, there is potentially two different vdev_t 8623377Seschrock * structures associated with it: one in the list of spares (used only 8633377Seschrock * for basic validation purposes) and one in the active vdev 8643377Seschrock * configuration (if it's spared in). During this phase we open and 8653377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 8663377Seschrock * active configuration, then we also mark this vdev as an active spare. 8672082Seschrock */ 8685450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 8695450Sbrendan KM_SLEEP); 8705450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8712082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 8722082Seschrock VDEV_ALLOC_SPARE) == 0); 8732082Seschrock ASSERT(vd != NULL); 8742082Seschrock 8755450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 8762082Seschrock 8776643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8786643Seschrock B_FALSE)) != NULL) { 8793377Seschrock if (!tvd->vdev_isspare) 8803377Seschrock spa_spare_add(tvd); 8813377Seschrock 8823377Seschrock /* 8833377Seschrock * We only mark the spare active if we were successfully 8843377Seschrock * able to load the vdev. Otherwise, importing a pool 8853377Seschrock * with a bad active spare would result in strange 8863377Seschrock * behavior, because multiple pool would think the spare 8873377Seschrock * is actively in use. 8883377Seschrock * 8893377Seschrock * There is a vulnerability here to an equally bizarre 8903377Seschrock * circumstance, where a dead active spare is later 8913377Seschrock * brought back to life (onlined or otherwise). Given 8923377Seschrock * the rarity of this scenario, and the extra complexity 8933377Seschrock * it adds, we ignore the possibility. 8943377Seschrock */ 8953377Seschrock if (!vdev_is_dead(tvd)) 8963377Seschrock spa_spare_activate(tvd); 8973377Seschrock } 8983377Seschrock 8997754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 9009425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 9017754SJeff.Bonwick@Sun.COM 9022082Seschrock if (vdev_open(vd) != 0) 9032082Seschrock continue; 9042082Seschrock 9055450Sbrendan if (vdev_validate_aux(vd) == 0) 9065450Sbrendan spa_spare_add(vd); 9072082Seschrock } 9082082Seschrock 9092082Seschrock /* 9102082Seschrock * Recompute the stashed list of spares, with status information 9112082Seschrock * this time. 9122082Seschrock */ 9135450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 9142082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 9152082Seschrock 9165450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 9175450Sbrendan KM_SLEEP); 9185450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9195450Sbrendan spares[i] = vdev_config_generate(spa, 9205450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 9215450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 9225450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 9235450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9242082Seschrock nvlist_free(spares[i]); 9255450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 9265450Sbrendan } 9275450Sbrendan 9285450Sbrendan /* 9295450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 9305450Sbrendan * this pool. When this is called, we have some form of basic information in 9315450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 9325450Sbrendan * then re-generate a more complete list including status information. 9335450Sbrendan * Devices which are already active have their details maintained, and are 9345450Sbrendan * not re-opened. 9355450Sbrendan */ 9365450Sbrendan static void 9375450Sbrendan spa_load_l2cache(spa_t *spa) 9385450Sbrendan { 9395450Sbrendan nvlist_t **l2cache; 9405450Sbrendan uint_t nl2cache; 9415450Sbrendan int i, j, oldnvdevs; 9426643Seschrock uint64_t guid, size; 9435450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 9445450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 9455450Sbrendan 9467754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 9477754SJeff.Bonwick@Sun.COM 9485450Sbrendan if (sav->sav_config != NULL) { 9495450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 9505450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 9515450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 9525450Sbrendan } else { 9535450Sbrendan nl2cache = 0; 9545450Sbrendan } 9555450Sbrendan 9565450Sbrendan oldvdevs = sav->sav_vdevs; 9575450Sbrendan oldnvdevs = sav->sav_count; 9585450Sbrendan sav->sav_vdevs = NULL; 9595450Sbrendan sav->sav_count = 0; 9605450Sbrendan 9615450Sbrendan /* 9625450Sbrendan * Process new nvlist of vdevs. 9635450Sbrendan */ 9645450Sbrendan for (i = 0; i < nl2cache; i++) { 9655450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 9665450Sbrendan &guid) == 0); 9675450Sbrendan 9685450Sbrendan newvdevs[i] = NULL; 9695450Sbrendan for (j = 0; j < oldnvdevs; j++) { 9705450Sbrendan vd = oldvdevs[j]; 9715450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 9725450Sbrendan /* 9735450Sbrendan * Retain previous vdev for add/remove ops. 9745450Sbrendan */ 9755450Sbrendan newvdevs[i] = vd; 9765450Sbrendan oldvdevs[j] = NULL; 9775450Sbrendan break; 9785450Sbrendan } 9795450Sbrendan } 9805450Sbrendan 9815450Sbrendan if (newvdevs[i] == NULL) { 9825450Sbrendan /* 9835450Sbrendan * Create new vdev 9845450Sbrendan */ 9855450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 9865450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 9875450Sbrendan ASSERT(vd != NULL); 9885450Sbrendan newvdevs[i] = vd; 9895450Sbrendan 9905450Sbrendan /* 9915450Sbrendan * Commit this vdev as an l2cache device, 9925450Sbrendan * even if it fails to open. 9935450Sbrendan */ 9945450Sbrendan spa_l2cache_add(vd); 9955450Sbrendan 9966643Seschrock vd->vdev_top = vd; 9976643Seschrock vd->vdev_aux = sav; 9986643Seschrock 9996643Seschrock spa_l2cache_activate(vd); 10006643Seschrock 10015450Sbrendan if (vdev_open(vd) != 0) 10025450Sbrendan continue; 10035450Sbrendan 10045450Sbrendan (void) vdev_validate_aux(vd); 10055450Sbrendan 10065450Sbrendan if (!vdev_is_dead(vd)) { 10075450Sbrendan size = vdev_get_rsize(vd); 10086643Seschrock l2arc_add_vdev(spa, vd, 10096643Seschrock VDEV_LABEL_START_SIZE, 10106643Seschrock size - VDEV_LABEL_START_SIZE); 10115450Sbrendan } 10125450Sbrendan } 10135450Sbrendan } 10145450Sbrendan 10155450Sbrendan /* 10165450Sbrendan * Purge vdevs that were dropped 10175450Sbrendan */ 10185450Sbrendan for (i = 0; i < oldnvdevs; i++) { 10195450Sbrendan uint64_t pool; 10205450Sbrendan 10215450Sbrendan vd = oldvdevs[i]; 10225450Sbrendan if (vd != NULL) { 10238241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10248241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 10255450Sbrendan l2arc_remove_vdev(vd); 10265450Sbrendan (void) vdev_close(vd); 10275450Sbrendan spa_l2cache_remove(vd); 10285450Sbrendan } 10295450Sbrendan } 10305450Sbrendan 10315450Sbrendan if (oldvdevs) 10325450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 10335450Sbrendan 10345450Sbrendan if (sav->sav_config == NULL) 10355450Sbrendan goto out; 10365450Sbrendan 10375450Sbrendan sav->sav_vdevs = newvdevs; 10385450Sbrendan sav->sav_count = (int)nl2cache; 10395450Sbrendan 10405450Sbrendan /* 10415450Sbrendan * Recompute the stashed list of l2cache devices, with status 10425450Sbrendan * information this time. 10435450Sbrendan */ 10445450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 10455450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 10465450Sbrendan 10475450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 10485450Sbrendan for (i = 0; i < sav->sav_count; i++) 10495450Sbrendan l2cache[i] = vdev_config_generate(spa, 10505450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 10515450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 10525450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 10535450Sbrendan out: 10545450Sbrendan for (i = 0; i < sav->sav_count; i++) 10555450Sbrendan nvlist_free(l2cache[i]); 10565450Sbrendan if (sav->sav_count) 10575450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 10582082Seschrock } 10592082Seschrock 10602082Seschrock static int 10612082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 10622082Seschrock { 10632082Seschrock dmu_buf_t *db; 10642082Seschrock char *packed = NULL; 10652082Seschrock size_t nvsize = 0; 10662082Seschrock int error; 10672082Seschrock *value = NULL; 10682082Seschrock 10692082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 10702082Seschrock nvsize = *(uint64_t *)db->db_data; 10712082Seschrock dmu_buf_rele(db, FTAG); 10722082Seschrock 10732082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10749512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10759512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 10762082Seschrock if (error == 0) 10772082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 10782082Seschrock kmem_free(packed, nvsize); 10792082Seschrock 10802082Seschrock return (error); 10812082Seschrock } 10822082Seschrock 10832082Seschrock /* 10844451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10854451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10864451Seschrock */ 10874451Seschrock static void 10884451Seschrock spa_check_removed(vdev_t *vd) 10894451Seschrock { 10904451Seschrock int c; 10914451Seschrock 10924451Seschrock for (c = 0; c < vd->vdev_children; c++) 10934451Seschrock spa_check_removed(vd->vdev_child[c]); 10944451Seschrock 10954451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10964451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10974451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10984451Seschrock } 10994451Seschrock } 11004451Seschrock 11014451Seschrock /* 11029701SGeorge.Wilson@Sun.COM * Load the slog device state from the config object since it's possible 11039701SGeorge.Wilson@Sun.COM * that the label does not contain the most up-to-date information. 11049701SGeorge.Wilson@Sun.COM */ 11059701SGeorge.Wilson@Sun.COM void 11069701SGeorge.Wilson@Sun.COM spa_load_log_state(spa_t *spa) 11079701SGeorge.Wilson@Sun.COM { 11089701SGeorge.Wilson@Sun.COM nvlist_t *nv, *nvroot, **child; 11099701SGeorge.Wilson@Sun.COM uint64_t is_log; 11109701SGeorge.Wilson@Sun.COM uint_t children, c; 11119701SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 11129701SGeorge.Wilson@Sun.COM 11139701SGeorge.Wilson@Sun.COM VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0); 11149701SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 11159701SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 11169701SGeorge.Wilson@Sun.COM &child, &children) == 0); 11179701SGeorge.Wilson@Sun.COM 11189701SGeorge.Wilson@Sun.COM for (c = 0; c < children; c++) { 11199701SGeorge.Wilson@Sun.COM vdev_t *tvd = rvd->vdev_child[c]; 11209701SGeorge.Wilson@Sun.COM 11219701SGeorge.Wilson@Sun.COM if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 11229701SGeorge.Wilson@Sun.COM &is_log) == 0 && is_log) 11239701SGeorge.Wilson@Sun.COM vdev_load_log_state(tvd, child[c]); 11249701SGeorge.Wilson@Sun.COM } 11259701SGeorge.Wilson@Sun.COM nvlist_free(nv); 11269701SGeorge.Wilson@Sun.COM } 11279701SGeorge.Wilson@Sun.COM 11289701SGeorge.Wilson@Sun.COM /* 11297294Sperrin * Check for missing log devices 11307294Sperrin */ 11317294Sperrin int 11327294Sperrin spa_check_logs(spa_t *spa) 11337294Sperrin { 11347294Sperrin switch (spa->spa_log_state) { 11357294Sperrin case SPA_LOG_MISSING: 11367294Sperrin /* need to recheck in case slog has been restored */ 11377294Sperrin case SPA_LOG_UNKNOWN: 11387294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 11397294Sperrin DS_FIND_CHILDREN)) { 11407294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 11417294Sperrin return (1); 11427294Sperrin } 11437294Sperrin break; 11447294Sperrin } 11457294Sperrin return (0); 11467294Sperrin } 11477294Sperrin 11487294Sperrin /* 1149789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 11501544Seschrock * source of configuration information. 1151789Sahrens */ 1152789Sahrens static int 11531544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1154789Sahrens { 1155789Sahrens int error = 0; 1156789Sahrens nvlist_t *nvroot = NULL; 1157789Sahrens vdev_t *rvd; 1158789Sahrens uberblock_t *ub = &spa->spa_uberblock; 11591635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1160789Sahrens uint64_t pool_guid; 11612082Seschrock uint64_t version; 11624451Seschrock uint64_t autoreplace = 0; 11638241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 11647294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1165789Sahrens 11668241SJeff.Bonwick@Sun.COM /* 11678241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 11688241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 11698241SJeff.Bonwick@Sun.COM */ 11708241SJeff.Bonwick@Sun.COM if (!mosconfig) 11718241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 11728241SJeff.Bonwick@Sun.COM 11737754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 11747754SJeff.Bonwick@Sun.COM 11751544Seschrock spa->spa_load_state = state; 11761635Sbonwick 1177789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 11781733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 11791544Seschrock error = EINVAL; 11801544Seschrock goto out; 11811544Seschrock } 1182789Sahrens 11832082Seschrock /* 11842082Seschrock * Versioning wasn't explicitly added to the label until later, so if 11852082Seschrock * it's not present treat it as the initial version. 11862082Seschrock */ 11872082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 11884577Sahrens version = SPA_VERSION_INITIAL; 11892082Seschrock 11901733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 11911733Sbonwick &spa->spa_config_txg); 11921733Sbonwick 11931635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 11941544Seschrock spa_guid_exists(pool_guid, 0)) { 11951544Seschrock error = EEXIST; 11961544Seschrock goto out; 11971544Seschrock } 1198789Sahrens 11992174Seschrock spa->spa_load_guid = pool_guid; 12002174Seschrock 1201789Sahrens /* 12029234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 12039234SGeorge.Wilson@Sun.COM */ 12049630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 12059630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 12069234SGeorge.Wilson@Sun.COM 12079234SGeorge.Wilson@Sun.COM /* 12082082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 12092082Seschrock * value that will be returned by spa_version() since parsing the 12102082Seschrock * configuration requires knowing the version number. 1211789Sahrens */ 12127754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12132082Seschrock spa->spa_ubsync.ub_version = version; 12142082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 12157754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1216789Sahrens 12172082Seschrock if (error != 0) 12181544Seschrock goto out; 1219789Sahrens 12201585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1221789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1222789Sahrens 1223789Sahrens /* 1224789Sahrens * Try to open all vdevs, loading each label in the process. 1225789Sahrens */ 12267754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12274070Smc142369 error = vdev_open(rvd); 12287754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12294070Smc142369 if (error != 0) 12301544Seschrock goto out; 1231789Sahrens 1232789Sahrens /* 12339276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 12349276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 12359276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 12369276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 12379276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 12389276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 12399276SMark.Musante@Sun.COM * the vdev config. 12401986Seschrock */ 12419276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12429276SMark.Musante@Sun.COM error = vdev_validate(rvd); 12439276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12449276SMark.Musante@Sun.COM if (error != 0) 12459276SMark.Musante@Sun.COM goto out; 12461986Seschrock 12471986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 12481986Seschrock error = ENXIO; 12491986Seschrock goto out; 12501986Seschrock } 12511986Seschrock 12521986Seschrock /* 1253789Sahrens * Find the best uberblock. 1254789Sahrens */ 12557754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1256789Sahrens 1257789Sahrens /* 1258789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1259789Sahrens */ 1260789Sahrens if (ub->ub_txg == 0) { 12611760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12621760Seschrock VDEV_AUX_CORRUPT_DATA); 12631544Seschrock error = ENXIO; 12641544Seschrock goto out; 12651544Seschrock } 12661544Seschrock 12671544Seschrock /* 12681544Seschrock * If the pool is newer than the code, we can't open it. 12691544Seschrock */ 12704577Sahrens if (ub->ub_version > SPA_VERSION) { 12711760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12721760Seschrock VDEV_AUX_VERSION_NEWER); 12731544Seschrock error = ENOTSUP; 12741544Seschrock goto out; 1275789Sahrens } 1276789Sahrens 1277789Sahrens /* 1278789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1279789Sahrens * incomplete configuration. 1280789Sahrens */ 12811732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 12821544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12831544Seschrock VDEV_AUX_BAD_GUID_SUM); 12841544Seschrock error = ENXIO; 12851544Seschrock goto out; 1286789Sahrens } 1287789Sahrens 1288789Sahrens /* 1289789Sahrens * Initialize internal SPA structures. 1290789Sahrens */ 1291789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1292789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1293789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 12941544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 12951544Seschrock if (error) { 12961544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12971544Seschrock VDEV_AUX_CORRUPT_DATA); 12981544Seschrock goto out; 12991544Seschrock } 1300789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1301789Sahrens 13021544Seschrock if (zap_lookup(spa->spa_meta_objset, 1303789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 13041544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 13051544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13061544Seschrock VDEV_AUX_CORRUPT_DATA); 13071544Seschrock error = EIO; 13081544Seschrock goto out; 13091544Seschrock } 1310789Sahrens 1311789Sahrens if (!mosconfig) { 13122082Seschrock nvlist_t *newconfig; 13133975Sek110237 uint64_t hostid; 13142082Seschrock 13152082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 13161544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13171544Seschrock VDEV_AUX_CORRUPT_DATA); 13181544Seschrock error = EIO; 13191544Seschrock goto out; 13201544Seschrock } 1321789Sahrens 13227706SLin.Ling@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig, 13237706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 13243975Sek110237 char *hostname; 13253975Sek110237 unsigned long myhostid = 0; 13263975Sek110237 13273975Sek110237 VERIFY(nvlist_lookup_string(newconfig, 13283975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 13293975Sek110237 13308662SJordan.Vaughan@Sun.com #ifdef _KERNEL 13318662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 13328662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 13338662SJordan.Vaughan@Sun.com /* 13348662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 13358662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 13368662SJordan.Vaughan@Sun.com */ 13373975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13388662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 13394178Slling if (hostid != 0 && myhostid != 0 && 13408662SJordan.Vaughan@Sun.com hostid != myhostid) { 13413975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 13423975Sek110237 "loaded as it was last accessed by " 13437706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 13443975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 13457754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 13463975Sek110237 (unsigned long)hostid); 13473975Sek110237 error = EBADF; 13483975Sek110237 goto out; 13493975Sek110237 } 13503975Sek110237 } 13513975Sek110237 1352789Sahrens spa_config_set(spa, newconfig); 1353789Sahrens spa_unload(spa); 1354789Sahrens spa_deactivate(spa); 13558241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1356789Sahrens 13571544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 13581544Seschrock } 13591544Seschrock 13601544Seschrock if (zap_lookup(spa->spa_meta_objset, 13611544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 13621544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 13631544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13641544Seschrock VDEV_AUX_CORRUPT_DATA); 13651544Seschrock error = EIO; 13661544Seschrock goto out; 1367789Sahrens } 1368789Sahrens 13691544Seschrock /* 13702082Seschrock * Load the bit that tells us to use the new accounting function 13712082Seschrock * (raid-z deflation). If we have an older pool, this will not 13722082Seschrock * be present. 13732082Seschrock */ 13742082Seschrock error = zap_lookup(spa->spa_meta_objset, 13752082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 13762082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 13772082Seschrock if (error != 0 && error != ENOENT) { 13782082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13792082Seschrock VDEV_AUX_CORRUPT_DATA); 13802082Seschrock error = EIO; 13812082Seschrock goto out; 13822082Seschrock } 13832082Seschrock 13842082Seschrock /* 13851544Seschrock * Load the persistent error log. If we have an older pool, this will 13861544Seschrock * not be present. 13871544Seschrock */ 13881544Seschrock error = zap_lookup(spa->spa_meta_objset, 13891544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 13901544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 13911807Sbonwick if (error != 0 && error != ENOENT) { 13921544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13931544Seschrock VDEV_AUX_CORRUPT_DATA); 13941544Seschrock error = EIO; 13951544Seschrock goto out; 13961544Seschrock } 13971544Seschrock 13981544Seschrock error = zap_lookup(spa->spa_meta_objset, 13991544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 14001544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 14011544Seschrock if (error != 0 && error != ENOENT) { 14021544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14031544Seschrock VDEV_AUX_CORRUPT_DATA); 14041544Seschrock error = EIO; 14051544Seschrock goto out; 14061544Seschrock } 1407789Sahrens 1408789Sahrens /* 14092926Sek110237 * Load the history object. If we have an older pool, this 14102926Sek110237 * will not be present. 14112926Sek110237 */ 14122926Sek110237 error = zap_lookup(spa->spa_meta_objset, 14132926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 14142926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 14152926Sek110237 if (error != 0 && error != ENOENT) { 14162926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14172926Sek110237 VDEV_AUX_CORRUPT_DATA); 14182926Sek110237 error = EIO; 14192926Sek110237 goto out; 14202926Sek110237 } 14212926Sek110237 14222926Sek110237 /* 14232082Seschrock * Load any hot spares for this pool. 14242082Seschrock */ 14252082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14265450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 14272082Seschrock if (error != 0 && error != ENOENT) { 14282082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14292082Seschrock VDEV_AUX_CORRUPT_DATA); 14302082Seschrock error = EIO; 14312082Seschrock goto out; 14322082Seschrock } 14332082Seschrock if (error == 0) { 14344577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 14355450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 14365450Sbrendan &spa->spa_spares.sav_config) != 0) { 14372082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14382082Seschrock VDEV_AUX_CORRUPT_DATA); 14392082Seschrock error = EIO; 14402082Seschrock goto out; 14412082Seschrock } 14422082Seschrock 14437754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14442082Seschrock spa_load_spares(spa); 14457754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14462082Seschrock } 14472082Seschrock 14485450Sbrendan /* 14495450Sbrendan * Load any level 2 ARC devices for this pool. 14505450Sbrendan */ 14515450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14525450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 14535450Sbrendan &spa->spa_l2cache.sav_object); 14545450Sbrendan if (error != 0 && error != ENOENT) { 14555450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14565450Sbrendan VDEV_AUX_CORRUPT_DATA); 14575450Sbrendan error = EIO; 14585450Sbrendan goto out; 14595450Sbrendan } 14605450Sbrendan if (error == 0) { 14615450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 14625450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 14635450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 14645450Sbrendan vdev_set_state(rvd, B_TRUE, 14655450Sbrendan VDEV_STATE_CANT_OPEN, 14665450Sbrendan VDEV_AUX_CORRUPT_DATA); 14675450Sbrendan error = EIO; 14685450Sbrendan goto out; 14695450Sbrendan } 14705450Sbrendan 14717754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14725450Sbrendan spa_load_l2cache(spa); 14737754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14745450Sbrendan } 14755450Sbrendan 14769701SGeorge.Wilson@Sun.COM spa_load_log_state(spa); 14779701SGeorge.Wilson@Sun.COM 14787294Sperrin if (spa_check_logs(spa)) { 14797294Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14807294Sperrin VDEV_AUX_BAD_LOG); 14817294Sperrin error = ENXIO; 14827294Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 14837294Sperrin goto out; 14847294Sperrin } 14857294Sperrin 14867294Sperrin 14875094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 14884543Smarks 14893912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14903912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 14913912Slling 14923912Slling if (error && error != ENOENT) { 14933912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14943912Slling VDEV_AUX_CORRUPT_DATA); 14953912Slling error = EIO; 14963912Slling goto out; 14973912Slling } 14983912Slling 14993912Slling if (error == 0) { 15003912Slling (void) zap_lookup(spa->spa_meta_objset, 15013912Slling spa->spa_pool_props_object, 15024451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 15033912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 15044451Seschrock (void) zap_lookup(spa->spa_meta_objset, 15054451Seschrock spa->spa_pool_props_object, 15064451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 15074451Seschrock sizeof (uint64_t), 1, &autoreplace); 15084543Smarks (void) zap_lookup(spa->spa_meta_objset, 15094543Smarks spa->spa_pool_props_object, 15104543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 15114543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 15125329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 15135329Sgw25295 spa->spa_pool_props_object, 15145329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 15155329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 15163912Slling } 15173912Slling 15182082Seschrock /* 15194451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 15204451Seschrock * the ZFS DE that it should not issue any faults for unopenable 15214451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 15224451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 15234451Seschrock * over. 15244451Seschrock */ 15255756Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 15264451Seschrock spa_check_removed(spa->spa_root_vdev); 15274451Seschrock 15284451Seschrock /* 15291986Seschrock * Load the vdev state for all toplevel vdevs. 1530789Sahrens */ 15311986Seschrock vdev_load(rvd); 1532789Sahrens 1533789Sahrens /* 1534789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1535789Sahrens */ 15367754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1537789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 15387754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1539789Sahrens 1540789Sahrens /* 1541789Sahrens * Check the state of the root vdev. If it can't be opened, it 1542789Sahrens * indicates one or more toplevel vdevs are faulted. 1543789Sahrens */ 15441544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 15451544Seschrock error = ENXIO; 15461544Seschrock goto out; 15471544Seschrock } 1548789Sahrens 15498241SJeff.Bonwick@Sun.COM if (spa_writeable(spa)) { 15501635Sbonwick dmu_tx_t *tx; 15511635Sbonwick int need_update = B_FALSE; 15528241SJeff.Bonwick@Sun.COM 15538241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 15541601Sbonwick 15551635Sbonwick /* 15561635Sbonwick * Claim log blocks that haven't been committed yet. 15571635Sbonwick * This must all happen in a single txg. 15581635Sbonwick */ 15591601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1560789Sahrens spa_first_txg(spa)); 15617754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 15622417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1563789Sahrens dmu_tx_commit(tx); 1564789Sahrens 15659701SGeorge.Wilson@Sun.COM spa->spa_log_state = SPA_LOG_GOOD; 1566789Sahrens spa->spa_sync_on = B_TRUE; 1567789Sahrens txg_sync_start(spa->spa_dsl_pool); 1568789Sahrens 1569789Sahrens /* 1570789Sahrens * Wait for all claims to sync. 1571789Sahrens */ 1572789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15731585Sbonwick 15741585Sbonwick /* 15751635Sbonwick * If the config cache is stale, or we have uninitialized 15761635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 15771585Sbonwick */ 15781635Sbonwick if (config_cache_txg != spa->spa_config_txg || 15791635Sbonwick state == SPA_LOAD_IMPORT) 15801635Sbonwick need_update = B_TRUE; 15811635Sbonwick 15828241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 15831635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 15841635Sbonwick need_update = B_TRUE; 15851585Sbonwick 15861585Sbonwick /* 15871635Sbonwick * Update the config cache asychronously in case we're the 15881635Sbonwick * root pool, in which case the config cache isn't writable yet. 15891585Sbonwick */ 15901635Sbonwick if (need_update) 15911635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 15928241SJeff.Bonwick@Sun.COM 15938241SJeff.Bonwick@Sun.COM /* 15948241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 15958241SJeff.Bonwick@Sun.COM */ 15968241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 15978241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 1598789Sahrens } 1599789Sahrens 16001544Seschrock error = 0; 16011544Seschrock out: 16027046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 16032082Seschrock if (error && error != EBADF) 16047294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 16051544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 16061544Seschrock spa->spa_ena = 0; 16071544Seschrock 16081544Seschrock return (error); 1609789Sahrens } 1610789Sahrens 1611789Sahrens /* 1612789Sahrens * Pool Open/Import 1613789Sahrens * 1614789Sahrens * The import case is identical to an open except that the configuration is sent 1615789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1616789Sahrens * case of an open, the pool configuration will exist in the 16174451Seschrock * POOL_STATE_UNINITIALIZED state. 1618789Sahrens * 1619789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1620789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1621789Sahrens * ambiguous state. 1622789Sahrens */ 1623789Sahrens static int 1624789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1625789Sahrens { 1626789Sahrens spa_t *spa; 1627789Sahrens int error; 1628789Sahrens int locked = B_FALSE; 1629789Sahrens 1630789Sahrens *spapp = NULL; 1631789Sahrens 1632789Sahrens /* 1633789Sahrens * As disgusting as this is, we need to support recursive calls to this 1634789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1635789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1636789Sahrens * avoid dsl_dir_open() calling this in the first place. 1637789Sahrens */ 1638789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1639789Sahrens mutex_enter(&spa_namespace_lock); 1640789Sahrens locked = B_TRUE; 1641789Sahrens } 1642789Sahrens 1643789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1644789Sahrens if (locked) 1645789Sahrens mutex_exit(&spa_namespace_lock); 1646789Sahrens return (ENOENT); 1647789Sahrens } 1648789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1649789Sahrens 16508241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 1651789Sahrens 16521635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1653789Sahrens 1654789Sahrens if (error == EBADF) { 1655789Sahrens /* 16561986Seschrock * If vdev_validate() returns failure (indicated by 16571986Seschrock * EBADF), it indicates that one of the vdevs indicates 16581986Seschrock * that the pool has been exported or destroyed. If 16591986Seschrock * this is the case, the config cache is out of sync and 16601986Seschrock * we should remove the pool from the namespace. 1661789Sahrens */ 1662789Sahrens spa_unload(spa); 1663789Sahrens spa_deactivate(spa); 16646643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1665789Sahrens spa_remove(spa); 1666789Sahrens if (locked) 1667789Sahrens mutex_exit(&spa_namespace_lock); 1668789Sahrens return (ENOENT); 16691544Seschrock } 16701544Seschrock 16711544Seschrock if (error) { 1672789Sahrens /* 1673789Sahrens * We can't open the pool, but we still have useful 1674789Sahrens * information: the state of each vdev after the 1675789Sahrens * attempted vdev_open(). Return this to the user. 1676789Sahrens */ 16777754SJeff.Bonwick@Sun.COM if (config != NULL && spa->spa_root_vdev != NULL) 1678789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1679789Sahrens B_TRUE); 1680789Sahrens spa_unload(spa); 1681789Sahrens spa_deactivate(spa); 16821544Seschrock spa->spa_last_open_failed = B_TRUE; 1683789Sahrens if (locked) 1684789Sahrens mutex_exit(&spa_namespace_lock); 1685789Sahrens *spapp = NULL; 1686789Sahrens return (error); 16871544Seschrock } else { 16881544Seschrock spa->spa_last_open_failed = B_FALSE; 1689789Sahrens } 1690789Sahrens } 1691789Sahrens 1692789Sahrens spa_open_ref(spa, tag); 16934451Seschrock 1694789Sahrens if (locked) 1695789Sahrens mutex_exit(&spa_namespace_lock); 1696789Sahrens 1697789Sahrens *spapp = spa; 1698789Sahrens 16997754SJeff.Bonwick@Sun.COM if (config != NULL) 1700789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1701789Sahrens 1702789Sahrens return (0); 1703789Sahrens } 1704789Sahrens 1705789Sahrens int 1706789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1707789Sahrens { 1708789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1709789Sahrens } 1710789Sahrens 17111544Seschrock /* 17121544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 17131544Seschrock * preventing it from being exported or destroyed. 17141544Seschrock */ 17151544Seschrock spa_t * 17161544Seschrock spa_inject_addref(char *name) 17171544Seschrock { 17181544Seschrock spa_t *spa; 17191544Seschrock 17201544Seschrock mutex_enter(&spa_namespace_lock); 17211544Seschrock if ((spa = spa_lookup(name)) == NULL) { 17221544Seschrock mutex_exit(&spa_namespace_lock); 17231544Seschrock return (NULL); 17241544Seschrock } 17251544Seschrock spa->spa_inject_ref++; 17261544Seschrock mutex_exit(&spa_namespace_lock); 17271544Seschrock 17281544Seschrock return (spa); 17291544Seschrock } 17301544Seschrock 17311544Seschrock void 17321544Seschrock spa_inject_delref(spa_t *spa) 17331544Seschrock { 17341544Seschrock mutex_enter(&spa_namespace_lock); 17351544Seschrock spa->spa_inject_ref--; 17361544Seschrock mutex_exit(&spa_namespace_lock); 17371544Seschrock } 17381544Seschrock 17395450Sbrendan /* 17405450Sbrendan * Add spares device information to the nvlist. 17415450Sbrendan */ 17422082Seschrock static void 17432082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 17442082Seschrock { 17452082Seschrock nvlist_t **spares; 17462082Seschrock uint_t i, nspares; 17472082Seschrock nvlist_t *nvroot; 17482082Seschrock uint64_t guid; 17492082Seschrock vdev_stat_t *vs; 17502082Seschrock uint_t vsc; 17513377Seschrock uint64_t pool; 17522082Seschrock 17539425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17549425SEric.Schrock@Sun.COM 17555450Sbrendan if (spa->spa_spares.sav_count == 0) 17562082Seschrock return; 17572082Seschrock 17582082Seschrock VERIFY(nvlist_lookup_nvlist(config, 17592082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 17605450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 17612082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17622082Seschrock if (nspares != 0) { 17632082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 17642082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 17652082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 17662082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17672082Seschrock 17682082Seschrock /* 17692082Seschrock * Go through and find any spares which have since been 17702082Seschrock * repurposed as an active spare. If this is the case, update 17712082Seschrock * their status appropriately. 17722082Seschrock */ 17732082Seschrock for (i = 0; i < nspares; i++) { 17742082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 17752082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 17767214Slling if (spa_spare_exists(guid, &pool, NULL) && 17777214Slling pool != 0ULL) { 17782082Seschrock VERIFY(nvlist_lookup_uint64_array( 17792082Seschrock spares[i], ZPOOL_CONFIG_STATS, 17802082Seschrock (uint64_t **)&vs, &vsc) == 0); 17812082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 17822082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 17832082Seschrock } 17842082Seschrock } 17852082Seschrock } 17862082Seschrock } 17872082Seschrock 17885450Sbrendan /* 17895450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 17905450Sbrendan */ 17915450Sbrendan static void 17925450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 17935450Sbrendan { 17945450Sbrendan nvlist_t **l2cache; 17955450Sbrendan uint_t i, j, nl2cache; 17965450Sbrendan nvlist_t *nvroot; 17975450Sbrendan uint64_t guid; 17985450Sbrendan vdev_t *vd; 17995450Sbrendan vdev_stat_t *vs; 18005450Sbrendan uint_t vsc; 18015450Sbrendan 18029425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 18039425SEric.Schrock@Sun.COM 18045450Sbrendan if (spa->spa_l2cache.sav_count == 0) 18055450Sbrendan return; 18065450Sbrendan 18075450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 18085450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 18095450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 18105450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18115450Sbrendan if (nl2cache != 0) { 18125450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 18135450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 18145450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 18155450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18165450Sbrendan 18175450Sbrendan /* 18185450Sbrendan * Update level 2 cache device stats. 18195450Sbrendan */ 18205450Sbrendan 18215450Sbrendan for (i = 0; i < nl2cache; i++) { 18225450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 18235450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 18245450Sbrendan 18255450Sbrendan vd = NULL; 18265450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 18275450Sbrendan if (guid == 18285450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 18295450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 18305450Sbrendan break; 18315450Sbrendan } 18325450Sbrendan } 18335450Sbrendan ASSERT(vd != NULL); 18345450Sbrendan 18355450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 18365450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 18375450Sbrendan vdev_get_stats(vd, vs); 18385450Sbrendan } 18395450Sbrendan } 18405450Sbrendan } 18415450Sbrendan 1842789Sahrens int 18431544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1844789Sahrens { 1845789Sahrens int error; 1846789Sahrens spa_t *spa; 1847789Sahrens 1848789Sahrens *config = NULL; 1849789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1850789Sahrens 18519425SEric.Schrock@Sun.COM if (spa != NULL) { 18529425SEric.Schrock@Sun.COM /* 18539425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 18549425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 18559425SEric.Schrock@Sun.COM * self-inconsistent. 18569425SEric.Schrock@Sun.COM */ 18579425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 18589425SEric.Schrock@Sun.COM 18599425SEric.Schrock@Sun.COM if (*config != NULL) { 18607754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 18619425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 18629425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 18639425SEric.Schrock@Sun.COM 18649425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 18659425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 18669425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 18679425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 18689425SEric.Schrock@Sun.COM 18699425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 18709425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 18719425SEric.Schrock@Sun.COM } 18722082Seschrock } 18732082Seschrock 18741544Seschrock /* 18751544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 18761544Seschrock * and call spa_lookup() directly. 18771544Seschrock */ 18781544Seschrock if (altroot) { 18791544Seschrock if (spa == NULL) { 18801544Seschrock mutex_enter(&spa_namespace_lock); 18811544Seschrock spa = spa_lookup(name); 18821544Seschrock if (spa) 18831544Seschrock spa_altroot(spa, altroot, buflen); 18841544Seschrock else 18851544Seschrock altroot[0] = '\0'; 18861544Seschrock spa = NULL; 18871544Seschrock mutex_exit(&spa_namespace_lock); 18881544Seschrock } else { 18891544Seschrock spa_altroot(spa, altroot, buflen); 18901544Seschrock } 18911544Seschrock } 18921544Seschrock 18939425SEric.Schrock@Sun.COM if (spa != NULL) { 18949425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 1895789Sahrens spa_close(spa, FTAG); 18969425SEric.Schrock@Sun.COM } 1897789Sahrens 1898789Sahrens return (error); 1899789Sahrens } 1900789Sahrens 1901789Sahrens /* 19025450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 19035450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 19045450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 19055450Sbrendan * specified, as long as they are well-formed. 19062082Seschrock */ 19072082Seschrock static int 19085450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 19095450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 19105450Sbrendan vdev_labeltype_t label) 19112082Seschrock { 19125450Sbrendan nvlist_t **dev; 19135450Sbrendan uint_t i, ndev; 19142082Seschrock vdev_t *vd; 19152082Seschrock int error; 19162082Seschrock 19177754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 19187754SJeff.Bonwick@Sun.COM 19192082Seschrock /* 19205450Sbrendan * It's acceptable to have no devs specified. 19212082Seschrock */ 19225450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 19232082Seschrock return (0); 19242082Seschrock 19255450Sbrendan if (ndev == 0) 19262082Seschrock return (EINVAL); 19272082Seschrock 19282082Seschrock /* 19295450Sbrendan * Make sure the pool is formatted with a version that supports this 19305450Sbrendan * device type. 19312082Seschrock */ 19325450Sbrendan if (spa_version(spa) < version) 19332082Seschrock return (ENOTSUP); 19342082Seschrock 19353377Seschrock /* 19365450Sbrendan * Set the pending device list so we correctly handle device in-use 19373377Seschrock * checking. 19383377Seschrock */ 19395450Sbrendan sav->sav_pending = dev; 19405450Sbrendan sav->sav_npending = ndev; 19415450Sbrendan 19425450Sbrendan for (i = 0; i < ndev; i++) { 19435450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 19442082Seschrock mode)) != 0) 19453377Seschrock goto out; 19462082Seschrock 19472082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 19482082Seschrock vdev_free(vd); 19493377Seschrock error = EINVAL; 19503377Seschrock goto out; 19512082Seschrock } 19522082Seschrock 19535450Sbrendan /* 19547754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 19557754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 19565450Sbrendan */ 19577754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 19585450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 19595450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 19605450Sbrendan error = ENOTBLK; 19615450Sbrendan goto out; 19625450Sbrendan } 19637754SJeff.Bonwick@Sun.COM #endif 19642082Seschrock vd->vdev_top = vd; 19653377Seschrock 19663377Seschrock if ((error = vdev_open(vd)) == 0 && 19675450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 19685450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 19693377Seschrock vd->vdev_guid) == 0); 19702082Seschrock } 19712082Seschrock 19722082Seschrock vdev_free(vd); 19733377Seschrock 19745450Sbrendan if (error && 19755450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 19763377Seschrock goto out; 19773377Seschrock else 19783377Seschrock error = 0; 19792082Seschrock } 19802082Seschrock 19813377Seschrock out: 19825450Sbrendan sav->sav_pending = NULL; 19835450Sbrendan sav->sav_npending = 0; 19843377Seschrock return (error); 19852082Seschrock } 19862082Seschrock 19875450Sbrendan static int 19885450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 19895450Sbrendan { 19905450Sbrendan int error; 19915450Sbrendan 19927754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 19937754SJeff.Bonwick@Sun.COM 19945450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 19955450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 19965450Sbrendan VDEV_LABEL_SPARE)) != 0) { 19975450Sbrendan return (error); 19985450Sbrendan } 19995450Sbrendan 20005450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20015450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 20025450Sbrendan VDEV_LABEL_L2CACHE)); 20035450Sbrendan } 20045450Sbrendan 20055450Sbrendan static void 20065450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 20075450Sbrendan const char *config) 20085450Sbrendan { 20095450Sbrendan int i; 20105450Sbrendan 20115450Sbrendan if (sav->sav_config != NULL) { 20125450Sbrendan nvlist_t **olddevs; 20135450Sbrendan uint_t oldndevs; 20145450Sbrendan nvlist_t **newdevs; 20155450Sbrendan 20165450Sbrendan /* 20175450Sbrendan * Generate new dev list by concatentating with the 20185450Sbrendan * current dev list. 20195450Sbrendan */ 20205450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 20215450Sbrendan &olddevs, &oldndevs) == 0); 20225450Sbrendan 20235450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 20245450Sbrendan (ndevs + oldndevs), KM_SLEEP); 20255450Sbrendan for (i = 0; i < oldndevs; i++) 20265450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 20275450Sbrendan KM_SLEEP) == 0); 20285450Sbrendan for (i = 0; i < ndevs; i++) 20295450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 20305450Sbrendan KM_SLEEP) == 0); 20315450Sbrendan 20325450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 20335450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 20345450Sbrendan 20355450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 20365450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 20375450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 20385450Sbrendan nvlist_free(newdevs[i]); 20395450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 20405450Sbrendan } else { 20415450Sbrendan /* 20425450Sbrendan * Generate a new dev list. 20435450Sbrendan */ 20445450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 20455450Sbrendan KM_SLEEP) == 0); 20465450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 20475450Sbrendan devs, ndevs) == 0); 20485450Sbrendan } 20495450Sbrendan } 20505450Sbrendan 20515450Sbrendan /* 20525450Sbrendan * Stop and drop level 2 ARC devices 20535450Sbrendan */ 20545450Sbrendan void 20555450Sbrendan spa_l2cache_drop(spa_t *spa) 20565450Sbrendan { 20575450Sbrendan vdev_t *vd; 20585450Sbrendan int i; 20595450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 20605450Sbrendan 20615450Sbrendan for (i = 0; i < sav->sav_count; i++) { 20625450Sbrendan uint64_t pool; 20635450Sbrendan 20645450Sbrendan vd = sav->sav_vdevs[i]; 20655450Sbrendan ASSERT(vd != NULL); 20665450Sbrendan 20678241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 20688241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 20695450Sbrendan l2arc_remove_vdev(vd); 20705450Sbrendan if (vd->vdev_isl2cache) 20715450Sbrendan spa_l2cache_remove(vd); 20725450Sbrendan vdev_clear_stats(vd); 20735450Sbrendan (void) vdev_close(vd); 20745450Sbrendan } 20755450Sbrendan } 20765450Sbrendan 20772082Seschrock /* 2078789Sahrens * Pool Creation 2079789Sahrens */ 2080789Sahrens int 20815094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 20827184Stimh const char *history_str, nvlist_t *zplprops) 2083789Sahrens { 2084789Sahrens spa_t *spa; 20855094Slling char *altroot = NULL; 20861635Sbonwick vdev_t *rvd; 2087789Sahrens dsl_pool_t *dp; 2088789Sahrens dmu_tx_t *tx; 20892082Seschrock int c, error = 0; 2090789Sahrens uint64_t txg = TXG_INITIAL; 20915450Sbrendan nvlist_t **spares, **l2cache; 20925450Sbrendan uint_t nspares, nl2cache; 20935094Slling uint64_t version; 2094789Sahrens 2095789Sahrens /* 2096789Sahrens * If this pool already exists, return failure. 2097789Sahrens */ 2098789Sahrens mutex_enter(&spa_namespace_lock); 2099789Sahrens if (spa_lookup(pool) != NULL) { 2100789Sahrens mutex_exit(&spa_namespace_lock); 2101789Sahrens return (EEXIST); 2102789Sahrens } 2103789Sahrens 2104789Sahrens /* 2105789Sahrens * Allocate a new spa_t structure. 2106789Sahrens */ 21075094Slling (void) nvlist_lookup_string(props, 21085094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21091635Sbonwick spa = spa_add(pool, altroot); 21108241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2111789Sahrens 2112789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 21135094Slling 21145094Slling if (props && (error = spa_prop_validate(spa, props))) { 21155094Slling spa_deactivate(spa); 21165094Slling spa_remove(spa); 21176643Seschrock mutex_exit(&spa_namespace_lock); 21185094Slling return (error); 21195094Slling } 21205094Slling 21215094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 21225094Slling &version) != 0) 21235094Slling version = SPA_VERSION; 21245094Slling ASSERT(version <= SPA_VERSION); 21255094Slling spa->spa_uberblock.ub_version = version; 2126789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2127789Sahrens 21281635Sbonwick /* 21299234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 21309234SGeorge.Wilson@Sun.COM */ 21319630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 21329630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 21339234SGeorge.Wilson@Sun.COM 21349234SGeorge.Wilson@Sun.COM /* 21351635Sbonwick * Create the root vdev. 21361635Sbonwick */ 21377754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21381635Sbonwick 21392082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21402082Seschrock 21412082Seschrock ASSERT(error != 0 || rvd != NULL); 21422082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21432082Seschrock 21445913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21451635Sbonwick error = EINVAL; 21462082Seschrock 21472082Seschrock if (error == 0 && 21482082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 21495450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 21502082Seschrock VDEV_ALLOC_ADD)) == 0) { 21512082Seschrock for (c = 0; c < rvd->vdev_children; c++) 21522082Seschrock vdev_init(rvd->vdev_child[c], txg); 21532082Seschrock vdev_config_dirty(rvd); 21541635Sbonwick } 21551635Sbonwick 21567754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2157789Sahrens 21582082Seschrock if (error != 0) { 2159789Sahrens spa_unload(spa); 2160789Sahrens spa_deactivate(spa); 2161789Sahrens spa_remove(spa); 2162789Sahrens mutex_exit(&spa_namespace_lock); 2163789Sahrens return (error); 2164789Sahrens } 2165789Sahrens 21662082Seschrock /* 21672082Seschrock * Get the list of spares, if specified. 21682082Seschrock */ 21692082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 21702082Seschrock &spares, &nspares) == 0) { 21715450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 21722082Seschrock KM_SLEEP) == 0); 21735450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 21742082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 21757754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21762082Seschrock spa_load_spares(spa); 21777754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 21785450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 21795450Sbrendan } 21805450Sbrendan 21815450Sbrendan /* 21825450Sbrendan * Get the list of level 2 cache devices, if specified. 21835450Sbrendan */ 21845450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 21855450Sbrendan &l2cache, &nl2cache) == 0) { 21865450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 21875450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 21885450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 21895450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 21907754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21915450Sbrendan spa_load_l2cache(spa); 21927754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 21935450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 21942082Seschrock } 21952082Seschrock 21967184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2197789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2198789Sahrens 2199789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2200789Sahrens 2201789Sahrens /* 2202789Sahrens * Create the pool config object. 2203789Sahrens */ 2204789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 22057497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2206789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2207789Sahrens 22081544Seschrock if (zap_add(spa->spa_meta_objset, 2209789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 22101544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 22111544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 22121544Seschrock } 2213789Sahrens 22145094Slling /* Newly created pools with the right version are always deflated. */ 22155094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 22165094Slling spa->spa_deflate = TRUE; 22175094Slling if (zap_add(spa->spa_meta_objset, 22185094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 22195094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 22205094Slling cmn_err(CE_PANIC, "failed to add deflate"); 22215094Slling } 22222082Seschrock } 22232082Seschrock 2224789Sahrens /* 2225789Sahrens * Create the deferred-free bplist object. Turn off compression 2226789Sahrens * because sync-to-convergence takes longer if the blocksize 2227789Sahrens * keeps changing. 2228789Sahrens */ 2229789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2230789Sahrens 1 << 14, tx); 2231789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2232789Sahrens ZIO_COMPRESS_OFF, tx); 2233789Sahrens 22341544Seschrock if (zap_add(spa->spa_meta_objset, 2235789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 22361544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 22371544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 22381544Seschrock } 2239789Sahrens 22402926Sek110237 /* 22412926Sek110237 * Create the pool's history object. 22422926Sek110237 */ 22435094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 22445094Slling spa_history_create_obj(spa, tx); 22455094Slling 22465094Slling /* 22475094Slling * Set pool properties. 22485094Slling */ 22495094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 22505094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22515329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 22528525SEric.Schrock@Sun.COM if (props != NULL) { 22538525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 22545094Slling spa_sync_props(spa, props, CRED(), tx); 22558525SEric.Schrock@Sun.COM } 22562926Sek110237 2257789Sahrens dmu_tx_commit(tx); 2258789Sahrens 2259789Sahrens spa->spa_sync_on = B_TRUE; 2260789Sahrens txg_sync_start(spa->spa_dsl_pool); 2261789Sahrens 2262789Sahrens /* 2263789Sahrens * We explicitly wait for the first transaction to complete so that our 2264789Sahrens * bean counters are appropriately updated. 2265789Sahrens */ 2266789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2267789Sahrens 22686643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2269789Sahrens 22705094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 22714715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 22724715Sek110237 22738667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 22748667SGeorge.Wilson@Sun.COM 2275789Sahrens mutex_exit(&spa_namespace_lock); 2276789Sahrens 2277789Sahrens return (0); 2278789Sahrens } 2279789Sahrens 22806423Sgw25295 #ifdef _KERNEL 22816423Sgw25295 /* 22826423Sgw25295 * Build a "root" vdev for a top level vdev read in from a rootpool 22836423Sgw25295 * device label. 22846423Sgw25295 */ 22856423Sgw25295 static void 22866423Sgw25295 spa_build_rootpool_config(nvlist_t *config) 22876423Sgw25295 { 22886423Sgw25295 nvlist_t *nvtop, *nvroot; 22896423Sgw25295 uint64_t pgid; 22906423Sgw25295 22916423Sgw25295 /* 22926423Sgw25295 * Add this top-level vdev to the child array. 22936423Sgw25295 */ 22946423Sgw25295 VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) 22956423Sgw25295 == 0); 22966423Sgw25295 VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) 22976423Sgw25295 == 0); 22986423Sgw25295 22996423Sgw25295 /* 23006423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 23016423Sgw25295 */ 23026423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 23036423Sgw25295 VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) 23046423Sgw25295 == 0); 23056423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 23066423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 23076423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 23086423Sgw25295 &nvtop, 1) == 0); 23096423Sgw25295 23106423Sgw25295 /* 23116423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 23126423Sgw25295 * this pool's configuration (remove the old, add the new). 23136423Sgw25295 */ 23146423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 23156423Sgw25295 nvlist_free(nvroot); 23166423Sgw25295 } 23176423Sgw25295 23186423Sgw25295 /* 23196423Sgw25295 * Get the root pool information from the root disk, then import the root pool 23206423Sgw25295 * during the system boot up time. 23216423Sgw25295 */ 23227539SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 23237147Staylor 23247147Staylor int 23257147Staylor spa_check_rootconf(char *devpath, char *devid, nvlist_t **bestconf, 23266423Sgw25295 uint64_t *besttxg) 23276423Sgw25295 { 23286423Sgw25295 nvlist_t *config; 23296423Sgw25295 uint64_t txg; 23307539SLin.Ling@Sun.COM int error; 23317539SLin.Ling@Sun.COM 23327539SLin.Ling@Sun.COM if (error = vdev_disk_read_rootlabel(devpath, devid, &config)) 23337539SLin.Ling@Sun.COM return (error); 23346423Sgw25295 23356423Sgw25295 VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 23366423Sgw25295 23377147Staylor if (bestconf != NULL) 23386423Sgw25295 *bestconf = config; 23397539SLin.Ling@Sun.COM else 23407539SLin.Ling@Sun.COM nvlist_free(config); 23417147Staylor *besttxg = txg; 23427147Staylor return (0); 23436423Sgw25295 } 23446423Sgw25295 23456423Sgw25295 boolean_t 23466423Sgw25295 spa_rootdev_validate(nvlist_t *nv) 23476423Sgw25295 { 23486423Sgw25295 uint64_t ival; 23496423Sgw25295 23506423Sgw25295 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || 23516423Sgw25295 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || 23526423Sgw25295 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) 23536423Sgw25295 return (B_FALSE); 23546423Sgw25295 23556423Sgw25295 return (B_TRUE); 23566423Sgw25295 } 23576423Sgw25295 23587147Staylor 23597147Staylor /* 23607147Staylor * Given the boot device's physical path or devid, check if the device 23617147Staylor * is in a valid state. If so, return the configuration from the vdev 23627147Staylor * label. 23637147Staylor */ 23647147Staylor int 23657147Staylor spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf) 23667147Staylor { 23677147Staylor nvlist_t *conf = NULL; 23687147Staylor uint64_t txg = 0; 23697147Staylor nvlist_t *nvtop, **child; 23707147Staylor char *type; 23717147Staylor char *bootpath = NULL; 23727147Staylor uint_t children, c; 23737147Staylor char *tmp; 23747539SLin.Ling@Sun.COM int error; 23757147Staylor 23767147Staylor if (devpath && ((tmp = strchr(devpath, ' ')) != NULL)) 23777147Staylor *tmp = '\0'; 23787539SLin.Ling@Sun.COM if (error = spa_check_rootconf(devpath, devid, &conf, &txg)) { 23797147Staylor cmn_err(CE_NOTE, "error reading device label"); 23807539SLin.Ling@Sun.COM return (error); 23817147Staylor } 23827147Staylor if (txg == 0) { 23837147Staylor cmn_err(CE_NOTE, "this device is detached"); 23847147Staylor nvlist_free(conf); 23857147Staylor return (EINVAL); 23867147Staylor } 23877147Staylor 23887147Staylor VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE, 23897147Staylor &nvtop) == 0); 23907147Staylor VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0); 23917147Staylor 23927147Staylor if (strcmp(type, VDEV_TYPE_DISK) == 0) { 23937147Staylor if (spa_rootdev_validate(nvtop)) { 23947147Staylor goto out; 23957147Staylor } else { 23967147Staylor nvlist_free(conf); 23977147Staylor return (EINVAL); 23987147Staylor } 23997147Staylor } 24007147Staylor 24017147Staylor ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0); 24027147Staylor 24037147Staylor VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN, 24047147Staylor &child, &children) == 0); 24057147Staylor 24067147Staylor /* 24077147Staylor * Go thru vdevs in the mirror to see if the given device 24087147Staylor * has the most recent txg. Only the device with the most 24097147Staylor * recent txg has valid information and should be booted. 24107147Staylor */ 24117147Staylor for (c = 0; c < children; c++) { 24127147Staylor char *cdevid, *cpath; 24137147Staylor uint64_t tmptxg; 24147147Staylor 24158242SLin.Ling@Sun.COM cpath = NULL; 24168242SLin.Ling@Sun.COM cdevid = NULL; 24179616SEric.Taylor@Sun.COM (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH, 24189616SEric.Taylor@Sun.COM &cpath); 24199616SEric.Taylor@Sun.COM (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_DEVID, 24209616SEric.Taylor@Sun.COM &cdevid); 24219616SEric.Taylor@Sun.COM if (cpath == NULL && cdevid == NULL) 24227147Staylor return (EINVAL); 24237687SLin.Ling@Sun.COM if ((spa_check_rootconf(cpath, cdevid, NULL, 24247687SLin.Ling@Sun.COM &tmptxg) == 0) && (tmptxg > txg)) { 24257147Staylor txg = tmptxg; 24267147Staylor VERIFY(nvlist_lookup_string(child[c], 24277147Staylor ZPOOL_CONFIG_PATH, &bootpath) == 0); 24287147Staylor } 24297147Staylor } 24307147Staylor 24317147Staylor /* Does the best device match the one we've booted from? */ 24327147Staylor if (bootpath) { 24337147Staylor cmn_err(CE_NOTE, "try booting from '%s'", bootpath); 24347147Staylor return (EINVAL); 24357147Staylor } 24367147Staylor out: 24377147Staylor *bestconf = conf; 24387147Staylor return (0); 24397147Staylor } 24407147Staylor 24416423Sgw25295 /* 24426423Sgw25295 * Import a root pool. 24436423Sgw25295 * 24447147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 24457147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 24467147Staylor * The GRUB "findroot" command will return the vdev we should boot. 24476423Sgw25295 * 24486423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 24496423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 24506423Sgw25295 * e.g. 24516423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 24526423Sgw25295 */ 24536423Sgw25295 int 24547147Staylor spa_import_rootpool(char *devpath, char *devid) 24556423Sgw25295 { 24566423Sgw25295 nvlist_t *conf = NULL; 24576423Sgw25295 char *pname; 24586423Sgw25295 int error; 24599425SEric.Schrock@Sun.COM spa_t *spa; 24606423Sgw25295 24616423Sgw25295 /* 24626423Sgw25295 * Get the vdev pathname and configuation from the most 24636423Sgw25295 * recently updated vdev (highest txg). 24646423Sgw25295 */ 24657147Staylor if (error = spa_get_rootconf(devpath, devid, &conf)) 24666423Sgw25295 goto msg_out; 24676423Sgw25295 24686423Sgw25295 /* 24696423Sgw25295 * Add type "root" vdev to the config. 24706423Sgw25295 */ 24716423Sgw25295 spa_build_rootpool_config(conf); 24726423Sgw25295 24736423Sgw25295 VERIFY(nvlist_lookup_string(conf, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); 24746423Sgw25295 24759425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 24769425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 24779425SEric.Schrock@Sun.COM /* 24789425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 24799425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 24809425SEric.Schrock@Sun.COM */ 24819425SEric.Schrock@Sun.COM spa_remove(spa); 24829425SEric.Schrock@Sun.COM } 24839425SEric.Schrock@Sun.COM 24849425SEric.Schrock@Sun.COM spa = spa_add(pname, NULL); 24859425SEric.Schrock@Sun.COM 24869425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 24879425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(conf, &spa->spa_config, 0) == 0); 24889425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 24896423Sgw25295 24906423Sgw25295 nvlist_free(conf); 24919425SEric.Schrock@Sun.COM return (0); 24926423Sgw25295 24936423Sgw25295 msg_out: 24947147Staylor cmn_err(CE_NOTE, "\n" 24956423Sgw25295 " *************************************************** \n" 24966423Sgw25295 " * This device is not bootable! * \n" 24976423Sgw25295 " * It is either offlined or detached or faulted. * \n" 24986423Sgw25295 " * Please try to boot from a different device. * \n" 24997147Staylor " *************************************************** "); 25006423Sgw25295 25016423Sgw25295 return (error); 25026423Sgw25295 } 25036423Sgw25295 #endif 25046423Sgw25295 25056423Sgw25295 /* 25069425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 25079425SEric.Schrock@Sun.COM * boot. 25089425SEric.Schrock@Sun.COM */ 25099425SEric.Schrock@Sun.COM int 25109425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 25119425SEric.Schrock@Sun.COM { 25129425SEric.Schrock@Sun.COM spa_t *spa; 25139425SEric.Schrock@Sun.COM char *altroot = NULL; 25149425SEric.Schrock@Sun.COM 25159425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25169425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 25179425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25189425SEric.Schrock@Sun.COM return (EEXIST); 25199425SEric.Schrock@Sun.COM } 25209425SEric.Schrock@Sun.COM 25219425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25229425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25239425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25249425SEric.Schrock@Sun.COM 25259425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25269425SEric.Schrock@Sun.COM 25279425SEric.Schrock@Sun.COM if (props != NULL) 25289425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25299425SEric.Schrock@Sun.COM 25309425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 25319425SEric.Schrock@Sun.COM 25329425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25339425SEric.Schrock@Sun.COM 25349425SEric.Schrock@Sun.COM return (0); 25359425SEric.Schrock@Sun.COM } 25369425SEric.Schrock@Sun.COM 25379425SEric.Schrock@Sun.COM /* 25386423Sgw25295 * Import a non-root pool into the system. 25396423Sgw25295 */ 25406423Sgw25295 int 25416423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 25426423Sgw25295 { 25439425SEric.Schrock@Sun.COM spa_t *spa; 25449425SEric.Schrock@Sun.COM char *altroot = NULL; 25459425SEric.Schrock@Sun.COM int error; 25469425SEric.Schrock@Sun.COM nvlist_t *nvroot; 25479425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 25489425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 25499425SEric.Schrock@Sun.COM 25509425SEric.Schrock@Sun.COM /* 25519425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 25529425SEric.Schrock@Sun.COM */ 25539425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25549425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 25559425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25569425SEric.Schrock@Sun.COM return (EEXIST); 25579425SEric.Schrock@Sun.COM } 25589425SEric.Schrock@Sun.COM 25599425SEric.Schrock@Sun.COM /* 25609425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 25619425SEric.Schrock@Sun.COM */ 25629425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25639425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25649425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25659425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 25669425SEric.Schrock@Sun.COM 25679425SEric.Schrock@Sun.COM /* 25689630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 25699630SJeff.Bonwick@Sun.COM */ 25709630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 25719630SJeff.Bonwick@Sun.COM 25729630SJeff.Bonwick@Sun.COM /* 25739425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25749425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 25759425SEric.Schrock@Sun.COM * doing an import. 25769425SEric.Schrock@Sun.COM */ 25779425SEric.Schrock@Sun.COM error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 25789425SEric.Schrock@Sun.COM 25799425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 25809425SEric.Schrock@Sun.COM /* 25819425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 25829425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 25839425SEric.Schrock@Sun.COM */ 25849425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 25859425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 25869425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 25879425SEric.Schrock@Sun.COM spa_load_spares(spa); 25889425SEric.Schrock@Sun.COM } 25899425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 25909425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 25919425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 25929425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 25939425SEric.Schrock@Sun.COM } 25949425SEric.Schrock@Sun.COM 25959425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 25969425SEric.Schrock@Sun.COM &nvroot) == 0); 25979425SEric.Schrock@Sun.COM if (error == 0) 25989425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25999425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 26009425SEric.Schrock@Sun.COM if (error == 0) 26019425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 26029425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 26039425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26049425SEric.Schrock@Sun.COM 26059425SEric.Schrock@Sun.COM if (props != NULL) 26069425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 26079425SEric.Schrock@Sun.COM 26089425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 26099425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 26109425SEric.Schrock@Sun.COM spa_unload(spa); 26119425SEric.Schrock@Sun.COM spa_deactivate(spa); 26129425SEric.Schrock@Sun.COM spa_remove(spa); 26139425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26149425SEric.Schrock@Sun.COM return (error); 26159425SEric.Schrock@Sun.COM } 26169425SEric.Schrock@Sun.COM 26179630SJeff.Bonwick@Sun.COM spa_async_resume(spa); 26189630SJeff.Bonwick@Sun.COM 26199425SEric.Schrock@Sun.COM /* 26209425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 26219425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 26229425SEric.Schrock@Sun.COM */ 26239425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26249425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 26259425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 26269425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 26279425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 26289425SEric.Schrock@Sun.COM else 26299425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 26309425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26319425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26329425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26339425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26349425SEric.Schrock@Sun.COM spa_load_spares(spa); 26359425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26369425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 26379425SEric.Schrock@Sun.COM } 26389425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26399425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 26409425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 26419425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26429425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26439425SEric.Schrock@Sun.COM else 26449425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26459425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26469425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26479425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26489425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26499425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 26509425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26519425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 26529425SEric.Schrock@Sun.COM } 26539425SEric.Schrock@Sun.COM 26549425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 26559425SEric.Schrock@Sun.COM /* 26569425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 26579425SEric.Schrock@Sun.COM */ 26589425SEric.Schrock@Sun.COM spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, B_FALSE); 26599425SEric.Schrock@Sun.COM } 26609425SEric.Schrock@Sun.COM 26619425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26629425SEric.Schrock@Sun.COM 26639425SEric.Schrock@Sun.COM return (0); 26646643Seschrock } 26656643Seschrock 26666643Seschrock 2667789Sahrens /* 2668789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2669789Sahrens * to get the vdev stats associated with the imported devices. 2670789Sahrens */ 2671789Sahrens #define TRYIMPORT_NAME "$import" 2672789Sahrens 2673789Sahrens nvlist_t * 2674789Sahrens spa_tryimport(nvlist_t *tryconfig) 2675789Sahrens { 2676789Sahrens nvlist_t *config = NULL; 2677789Sahrens char *poolname; 2678789Sahrens spa_t *spa; 2679789Sahrens uint64_t state; 26808680SLin.Ling@Sun.COM int error; 2681789Sahrens 2682789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2683789Sahrens return (NULL); 2684789Sahrens 2685789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2686789Sahrens return (NULL); 2687789Sahrens 26881635Sbonwick /* 26891635Sbonwick * Create and initialize the spa structure. 26901635Sbonwick */ 2691789Sahrens mutex_enter(&spa_namespace_lock); 26921635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 26938241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 2694789Sahrens 2695789Sahrens /* 26961635Sbonwick * Pass off the heavy lifting to spa_load(). 26971732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 26981732Sbonwick * is actually the one to trust when doing an import. 2699789Sahrens */ 27008680SLin.Ling@Sun.COM error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2701789Sahrens 2702789Sahrens /* 2703789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2704789Sahrens */ 2705789Sahrens if (spa->spa_root_vdev != NULL) { 2706789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2707789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2708789Sahrens poolname) == 0); 2709789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2710789Sahrens state) == 0); 27113975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 27123975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 27132082Seschrock 27142082Seschrock /* 27156423Sgw25295 * If the bootfs property exists on this pool then we 27166423Sgw25295 * copy it out so that external consumers can tell which 27176423Sgw25295 * pools are bootable. 27186423Sgw25295 */ 27198680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 27206423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27216423Sgw25295 27226423Sgw25295 /* 27236423Sgw25295 * We have to play games with the name since the 27246423Sgw25295 * pool was opened as TRYIMPORT_NAME. 27256423Sgw25295 */ 27267754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 27276423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 27286423Sgw25295 char *cp; 27296423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27306423Sgw25295 27316423Sgw25295 cp = strchr(tmpname, '/'); 27326423Sgw25295 if (cp == NULL) { 27336423Sgw25295 (void) strlcpy(dsname, tmpname, 27346423Sgw25295 MAXPATHLEN); 27356423Sgw25295 } else { 27366423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 27376423Sgw25295 "%s/%s", poolname, ++cp); 27386423Sgw25295 } 27396423Sgw25295 VERIFY(nvlist_add_string(config, 27406423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 27416423Sgw25295 kmem_free(dsname, MAXPATHLEN); 27426423Sgw25295 } 27436423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 27446423Sgw25295 } 27456423Sgw25295 27466423Sgw25295 /* 27475450Sbrendan * Add the list of hot spares and level 2 cache devices. 27482082Seschrock */ 27499425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 27502082Seschrock spa_add_spares(spa, config); 27515450Sbrendan spa_add_l2cache(spa, config); 27529425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2753789Sahrens } 2754789Sahrens 2755789Sahrens spa_unload(spa); 2756789Sahrens spa_deactivate(spa); 2757789Sahrens spa_remove(spa); 2758789Sahrens mutex_exit(&spa_namespace_lock); 2759789Sahrens 2760789Sahrens return (config); 2761789Sahrens } 2762789Sahrens 2763789Sahrens /* 2764789Sahrens * Pool export/destroy 2765789Sahrens * 2766789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2767789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2768789Sahrens * update the pool state and sync all the labels to disk, removing the 27698211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 27708211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 2771789Sahrens */ 2772789Sahrens static int 27737214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 27748211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 2775789Sahrens { 2776789Sahrens spa_t *spa; 2777789Sahrens 27781775Sbillm if (oldconfig) 27791775Sbillm *oldconfig = NULL; 27801775Sbillm 27818241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 2782789Sahrens return (EROFS); 2783789Sahrens 2784789Sahrens mutex_enter(&spa_namespace_lock); 2785789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2786789Sahrens mutex_exit(&spa_namespace_lock); 2787789Sahrens return (ENOENT); 2788789Sahrens } 2789789Sahrens 2790789Sahrens /* 27911544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 27921544Seschrock * reacquire the namespace lock, and see if we can export. 27931544Seschrock */ 27941544Seschrock spa_open_ref(spa, FTAG); 27951544Seschrock mutex_exit(&spa_namespace_lock); 27961544Seschrock spa_async_suspend(spa); 27971544Seschrock mutex_enter(&spa_namespace_lock); 27981544Seschrock spa_close(spa, FTAG); 27991544Seschrock 28001544Seschrock /* 2801789Sahrens * The pool will be in core if it's openable, 2802789Sahrens * in which case we can modify its state. 2803789Sahrens */ 2804789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2805789Sahrens /* 2806789Sahrens * Objsets may be open only because they're dirty, so we 2807789Sahrens * have to force it to sync before checking spa_refcnt. 2808789Sahrens */ 2809789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2810789Sahrens 28111544Seschrock /* 28121544Seschrock * A pool cannot be exported or destroyed if there are active 28131544Seschrock * references. If we are resetting a pool, allow references by 28141544Seschrock * fault injection handlers. 28151544Seschrock */ 28161544Seschrock if (!spa_refcount_zero(spa) || 28171544Seschrock (spa->spa_inject_ref != 0 && 28181544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 28191544Seschrock spa_async_resume(spa); 2820789Sahrens mutex_exit(&spa_namespace_lock); 2821789Sahrens return (EBUSY); 2822789Sahrens } 2823789Sahrens 2824789Sahrens /* 28257214Slling * A pool cannot be exported if it has an active shared spare. 28267214Slling * This is to prevent other pools stealing the active spare 28277214Slling * from an exported pool. At user's own will, such pool can 28287214Slling * be forcedly exported. 28297214Slling */ 28307214Slling if (!force && new_state == POOL_STATE_EXPORTED && 28317214Slling spa_has_active_shared_spare(spa)) { 28327214Slling spa_async_resume(spa); 28337214Slling mutex_exit(&spa_namespace_lock); 28347214Slling return (EXDEV); 28357214Slling } 28367214Slling 28377214Slling /* 2838789Sahrens * We want this to be reflected on every label, 2839789Sahrens * so mark them all dirty. spa_unload() will do the 2840789Sahrens * final sync that pushes these changes out. 2841789Sahrens */ 28428211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 28437754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28441544Seschrock spa->spa_state = new_state; 28451635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 28461544Seschrock vdev_config_dirty(spa->spa_root_vdev); 28477754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 28481544Seschrock } 2849789Sahrens } 2850789Sahrens 28514451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28524451Seschrock 2853789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2854789Sahrens spa_unload(spa); 2855789Sahrens spa_deactivate(spa); 2856789Sahrens } 2857789Sahrens 28581775Sbillm if (oldconfig && spa->spa_config) 28591775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 28601775Sbillm 28611544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 28628211SGeorge.Wilson@Sun.COM if (!hardforce) 28638211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 28641544Seschrock spa_remove(spa); 28651544Seschrock } 2866789Sahrens mutex_exit(&spa_namespace_lock); 2867789Sahrens 2868789Sahrens return (0); 2869789Sahrens } 2870789Sahrens 2871789Sahrens /* 2872789Sahrens * Destroy a storage pool. 2873789Sahrens */ 2874789Sahrens int 2875789Sahrens spa_destroy(char *pool) 2876789Sahrens { 28778211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 28788211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 2879789Sahrens } 2880789Sahrens 2881789Sahrens /* 2882789Sahrens * Export a storage pool. 2883789Sahrens */ 2884789Sahrens int 28858211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 28868211SGeorge.Wilson@Sun.COM boolean_t hardforce) 2887789Sahrens { 28888211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 28898211SGeorge.Wilson@Sun.COM force, hardforce)); 2890789Sahrens } 2891789Sahrens 2892789Sahrens /* 28931544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 28941544Seschrock * from the namespace in any way. 28951544Seschrock */ 28961544Seschrock int 28971544Seschrock spa_reset(char *pool) 28981544Seschrock { 28997214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 29008211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 29011544Seschrock } 29021544Seschrock 29031544Seschrock /* 2904789Sahrens * ========================================================================== 2905789Sahrens * Device manipulation 2906789Sahrens * ========================================================================== 2907789Sahrens */ 2908789Sahrens 2909789Sahrens /* 29104527Sperrin * Add a device to a storage pool. 2911789Sahrens */ 2912789Sahrens int 2913789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2914789Sahrens { 2915789Sahrens uint64_t txg; 29168241SJeff.Bonwick@Sun.COM int error; 2917789Sahrens vdev_t *rvd = spa->spa_root_vdev; 29181585Sbonwick vdev_t *vd, *tvd; 29195450Sbrendan nvlist_t **spares, **l2cache; 29205450Sbrendan uint_t nspares, nl2cache; 2921789Sahrens 2922789Sahrens txg = spa_vdev_enter(spa); 2923789Sahrens 29242082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 29252082Seschrock VDEV_ALLOC_ADD)) != 0) 29262082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 29272082Seschrock 29287754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 2929789Sahrens 29305450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 29315450Sbrendan &nspares) != 0) 29322082Seschrock nspares = 0; 29332082Seschrock 29345450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 29355450Sbrendan &nl2cache) != 0) 29365450Sbrendan nl2cache = 0; 29375450Sbrendan 29387754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 29392082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 29407754SJeff.Bonwick@Sun.COM 29417754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 29427754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 29437754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 29442082Seschrock 29453377Seschrock /* 29465450Sbrendan * We must validate the spares and l2cache devices after checking the 29475450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 29483377Seschrock */ 29497754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 29503377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 29513377Seschrock 29523377Seschrock /* 29533377Seschrock * Transfer each new top-level vdev from vd to rvd. 29543377Seschrock */ 29558241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 29563377Seschrock tvd = vd->vdev_child[c]; 29573377Seschrock vdev_remove_child(vd, tvd); 29583377Seschrock tvd->vdev_id = rvd->vdev_children; 29593377Seschrock vdev_add_child(rvd, tvd); 29603377Seschrock vdev_config_dirty(tvd); 29613377Seschrock } 29623377Seschrock 29632082Seschrock if (nspares != 0) { 29645450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 29655450Sbrendan ZPOOL_CONFIG_SPARES); 29662082Seschrock spa_load_spares(spa); 29675450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 29685450Sbrendan } 29695450Sbrendan 29705450Sbrendan if (nl2cache != 0) { 29715450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 29725450Sbrendan ZPOOL_CONFIG_L2CACHE); 29735450Sbrendan spa_load_l2cache(spa); 29745450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2975789Sahrens } 2976789Sahrens 2977789Sahrens /* 29781585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 29791585Sbonwick * If other threads start allocating from these vdevs before we 29801585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 29811585Sbonwick * fail to open the pool because there are DVAs that the config cache 29821585Sbonwick * can't translate. Therefore, we first add the vdevs without 29831585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 29841635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 29851585Sbonwick * 29861585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 29871585Sbonwick * if we lose power at any point in this sequence, the remaining 29881585Sbonwick * steps will be completed the next time we load the pool. 2989789Sahrens */ 29901635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 29911585Sbonwick 29921635Sbonwick mutex_enter(&spa_namespace_lock); 29931635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 29941635Sbonwick mutex_exit(&spa_namespace_lock); 2995789Sahrens 29961635Sbonwick return (0); 2997789Sahrens } 2998789Sahrens 2999789Sahrens /* 3000789Sahrens * Attach a device to a mirror. The arguments are the path to any device 3001789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3002789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3003789Sahrens * 3004789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3005789Sahrens * existing device; in this case the two devices are made into their own 30064451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3007789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3008789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3009789Sahrens * completion of resilvering, the first disk (the one being replaced) 3010789Sahrens * is automatically detached. 3011789Sahrens */ 3012789Sahrens int 30131544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3014789Sahrens { 3015789Sahrens uint64_t txg, open_txg; 3016789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3017789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 30182082Seschrock vdev_ops_t *pvops; 30197313SEric.Kustarz@Sun.COM dmu_tx_t *tx; 30207313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 30217313SEric.Kustarz@Sun.COM int newvd_isspare; 30227313SEric.Kustarz@Sun.COM int error; 3023789Sahrens 3024789Sahrens txg = spa_vdev_enter(spa); 3025789Sahrens 30266643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3027789Sahrens 3028789Sahrens if (oldvd == NULL) 3029789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3030789Sahrens 30311585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 30321585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30331585Sbonwick 3034789Sahrens pvd = oldvd->vdev_parent; 3035789Sahrens 30362082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30374451Seschrock VDEV_ALLOC_ADD)) != 0) 30384451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30394451Seschrock 30404451Seschrock if (newrootvd->vdev_children != 1) 3041789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3042789Sahrens 3043789Sahrens newvd = newrootvd->vdev_child[0]; 3044789Sahrens 3045789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3046789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3047789Sahrens 30482082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3049789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3050789Sahrens 30514527Sperrin /* 30524527Sperrin * Spares can't replace logs 30534527Sperrin */ 30547326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 30554527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30564527Sperrin 30572082Seschrock if (!replacing) { 30582082Seschrock /* 30592082Seschrock * For attach, the only allowable parent is a mirror or the root 30602082Seschrock * vdev. 30612082Seschrock */ 30622082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 30632082Seschrock pvd->vdev_ops != &vdev_root_ops) 30642082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30652082Seschrock 30662082Seschrock pvops = &vdev_mirror_ops; 30672082Seschrock } else { 30682082Seschrock /* 30692082Seschrock * Active hot spares can only be replaced by inactive hot 30702082Seschrock * spares. 30712082Seschrock */ 30722082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 30732082Seschrock pvd->vdev_child[1] == oldvd && 30742082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 30752082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30762082Seschrock 30772082Seschrock /* 30782082Seschrock * If the source is a hot spare, and the parent isn't already a 30792082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 30803377Seschrock * want to create a replacing vdev. The user is not allowed to 30813377Seschrock * attach to a spared vdev child unless the 'isspare' state is 30823377Seschrock * the same (spare replaces spare, non-spare replaces 30833377Seschrock * non-spare). 30842082Seschrock */ 30852082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 30862082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30873377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 30883377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 30893377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30902082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 30912082Seschrock newvd->vdev_isspare) 30922082Seschrock pvops = &vdev_spare_ops; 30932082Seschrock else 30942082Seschrock pvops = &vdev_replacing_ops; 30952082Seschrock } 30962082Seschrock 30971175Slling /* 30981175Slling * Compare the new device size with the replaceable/attachable 30991175Slling * device size. 31001175Slling */ 31011175Slling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 3102789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3103789Sahrens 31041732Sbonwick /* 31051732Sbonwick * The new device cannot have a higher alignment requirement 31061732Sbonwick * than the top-level vdev. 31071732Sbonwick */ 31081732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3109789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3110789Sahrens 3111789Sahrens /* 3112789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3113789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3114789Sahrens */ 3115789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3116789Sahrens spa_strfree(oldvd->vdev_path); 3117789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3118789Sahrens KM_SLEEP); 3119789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3120789Sahrens newvd->vdev_path, "old"); 3121789Sahrens if (oldvd->vdev_devid != NULL) { 3122789Sahrens spa_strfree(oldvd->vdev_devid); 3123789Sahrens oldvd->vdev_devid = NULL; 3124789Sahrens } 3125789Sahrens } 3126789Sahrens 3127789Sahrens /* 31282082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 31292082Seschrock * mirror/replacing/spare vdev above oldvd. 3130789Sahrens */ 3131789Sahrens if (pvd->vdev_ops != pvops) 3132789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3133789Sahrens 3134789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3135789Sahrens ASSERT(pvd->vdev_ops == pvops); 3136789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3137789Sahrens 3138789Sahrens /* 3139789Sahrens * Extract the new device from its root and add it to pvd. 3140789Sahrens */ 3141789Sahrens vdev_remove_child(newrootvd, newvd); 3142789Sahrens newvd->vdev_id = pvd->vdev_children; 3143789Sahrens vdev_add_child(pvd, newvd); 3144789Sahrens 31451544Seschrock /* 31461544Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 31471544Seschrock * the addition of newvd may have decreased our parent's asize. 31481544Seschrock */ 31491544Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 31501544Seschrock 3151789Sahrens tvd = newvd->vdev_top; 3152789Sahrens ASSERT(pvd->vdev_top == tvd); 3153789Sahrens ASSERT(tvd->vdev_parent == rvd); 3154789Sahrens 3155789Sahrens vdev_config_dirty(tvd); 3156789Sahrens 3157789Sahrens /* 3158789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3159789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3160789Sahrens */ 3161789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3162789Sahrens 31638241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 31648241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3165789Sahrens 31669425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 31673377Seschrock spa_spare_activate(newvd); 31689425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 31699425SEric.Schrock@Sun.COM } 31709425SEric.Schrock@Sun.COM 31717754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 31727754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 31737313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 31741544Seschrock 3175789Sahrens /* 3176789Sahrens * Mark newvd's DTL dirty in this txg. 3177789Sahrens */ 31781732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3179789Sahrens 3180789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3181789Sahrens 31827313SEric.Kustarz@Sun.COM tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 31837313SEric.Kustarz@Sun.COM if (dmu_tx_assign(tx, TXG_WAIT) == 0) { 31847313SEric.Kustarz@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, tx, 31857313SEric.Kustarz@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 31867313SEric.Kustarz@Sun.COM replacing && newvd_isspare ? "spare in" : 31877313SEric.Kustarz@Sun.COM replacing ? "replace" : "attach", newvdpath, 31887313SEric.Kustarz@Sun.COM replacing ? "for" : "to", oldvdpath); 31897313SEric.Kustarz@Sun.COM dmu_tx_commit(tx); 31907313SEric.Kustarz@Sun.COM } else { 31917313SEric.Kustarz@Sun.COM dmu_tx_abort(tx); 31927313SEric.Kustarz@Sun.COM } 31937313SEric.Kustarz@Sun.COM 31947313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 31957313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 31967313SEric.Kustarz@Sun.COM 3197789Sahrens /* 31987046Sahrens * Kick off a resilver to update newvd. 3199789Sahrens */ 32007046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3201789Sahrens 3202789Sahrens return (0); 3203789Sahrens } 3204789Sahrens 3205789Sahrens /* 3206789Sahrens * Detach a device from a mirror or replacing vdev. 3207789Sahrens * If 'replace_done' is specified, only detach if the parent 3208789Sahrens * is a replacing vdev. 3209789Sahrens */ 3210789Sahrens int 32118241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3212789Sahrens { 3213789Sahrens uint64_t txg; 32148241SJeff.Bonwick@Sun.COM int error; 3215789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3216789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 32172082Seschrock boolean_t unspare = B_FALSE; 32182082Seschrock uint64_t unspare_guid; 32196673Seschrock size_t len; 3220789Sahrens 3221789Sahrens txg = spa_vdev_enter(spa); 3222789Sahrens 32236643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3224789Sahrens 3225789Sahrens if (vd == NULL) 3226789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3227789Sahrens 32281585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 32291585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32301585Sbonwick 3231789Sahrens pvd = vd->vdev_parent; 3232789Sahrens 3233789Sahrens /* 32348241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 32358241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32368241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 32378241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 32388241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 32398241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 32408241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 32418241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 32428241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 32438241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 32448241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 32458241SJeff.Bonwick@Sun.COM */ 32468241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 32478241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32488241SJeff.Bonwick@Sun.COM 32498241SJeff.Bonwick@Sun.COM /* 3250789Sahrens * If replace_done is specified, only remove this device if it's 32512082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 32522082Seschrock * disk can be removed. 3253789Sahrens */ 32542082Seschrock if (replace_done) { 32552082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 32562082Seschrock if (vd->vdev_id != 0) 32572082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32582082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 32592082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32602082Seschrock } 32612082Seschrock } 32622082Seschrock 32632082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 32644577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3265789Sahrens 3266789Sahrens /* 32672082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3268789Sahrens */ 3269789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 32702082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 32712082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3272789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3273789Sahrens 3274789Sahrens /* 32758241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 32768241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3277789Sahrens */ 32788241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3279789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3280789Sahrens 32818241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 32828241SJeff.Bonwick@Sun.COM 3283789Sahrens /* 32846673Seschrock * If we are detaching the second disk from a replacing vdev, then 32856673Seschrock * check to see if we changed the original vdev's path to have "/old" 32866673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 32876673Seschrock */ 32886673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 32896673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 32906673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 32916673Seschrock ASSERT(pvd->vdev_child[1] == vd); 32926673Seschrock cvd = pvd->vdev_child[0]; 32936673Seschrock len = strlen(vd->vdev_path); 32946673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 32956673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 32966673Seschrock spa_strfree(cvd->vdev_path); 32976673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 32986673Seschrock } 32996673Seschrock } 33006673Seschrock 33016673Seschrock /* 33022082Seschrock * If we are detaching the original disk from a spare, then it implies 33032082Seschrock * that the spare should become a real disk, and be removed from the 33042082Seschrock * active spare list for the pool. 33052082Seschrock */ 33062082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 33078241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 33082082Seschrock unspare = B_TRUE; 33092082Seschrock 33102082Seschrock /* 3311789Sahrens * Erase the disk labels so the disk can be used for other things. 3312789Sahrens * This must be done after all other error cases are handled, 3313789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3314789Sahrens * But if we can't do it, don't treat the error as fatal -- 3315789Sahrens * it may be that the unwritability of the disk is the reason 3316789Sahrens * it's being detached! 3317789Sahrens */ 33183377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3319789Sahrens 3320789Sahrens /* 3321789Sahrens * Remove vd from its parent and compact the parent's children. 3322789Sahrens */ 3323789Sahrens vdev_remove_child(pvd, vd); 3324789Sahrens vdev_compact_children(pvd); 3325789Sahrens 3326789Sahrens /* 3327789Sahrens * Remember one of the remaining children so we can get tvd below. 3328789Sahrens */ 3329789Sahrens cvd = pvd->vdev_child[0]; 3330789Sahrens 3331789Sahrens /* 33322082Seschrock * If we need to remove the remaining child from the list of hot spares, 33338241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 33348241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 33358241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 33368241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 33378241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 33388241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 33392082Seschrock */ 33402082Seschrock if (unspare) { 33412082Seschrock ASSERT(cvd->vdev_isspare); 33423377Seschrock spa_spare_remove(cvd); 33432082Seschrock unspare_guid = cvd->vdev_guid; 33448241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33452082Seschrock } 33462082Seschrock 33472082Seschrock /* 3348789Sahrens * If the parent mirror/replacing vdev only has one child, 3349789Sahrens * the parent is no longer needed. Remove it from the tree. 3350789Sahrens */ 3351789Sahrens if (pvd->vdev_children == 1) 3352789Sahrens vdev_remove_parent(cvd); 3353789Sahrens 3354789Sahrens /* 3355789Sahrens * We don't set tvd until now because the parent we just removed 3356789Sahrens * may have been the previous top-level vdev. 3357789Sahrens */ 3358789Sahrens tvd = cvd->vdev_top; 3359789Sahrens ASSERT(tvd->vdev_parent == rvd); 3360789Sahrens 3361789Sahrens /* 33623377Seschrock * Reevaluate the parent vdev state. 3363789Sahrens */ 33644451Seschrock vdev_propagate_state(cvd); 3365789Sahrens 3366789Sahrens /* 33673377Seschrock * If the device we just detached was smaller than the others, it may be 33683377Seschrock * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 33693377Seschrock * can't fail because the existing metaslabs are already in core, so 33703377Seschrock * there's nothing to read from disk. 3371789Sahrens */ 33721732Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 3373789Sahrens 3374789Sahrens vdev_config_dirty(tvd); 3375789Sahrens 3376789Sahrens /* 33773377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 33783377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 33793377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 33803377Seschrock * prevent vd from being accessed after it's freed. 3381789Sahrens */ 33828241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3383789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 33841732Sbonwick vd->vdev_detached = B_TRUE; 33851732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3386789Sahrens 33874451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 33884451Seschrock 33892082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 33902082Seschrock 33912082Seschrock /* 33923377Seschrock * If this was the removal of the original device in a hot spare vdev, 33933377Seschrock * then we want to go through and remove the device from the hot spare 33943377Seschrock * list of every other pool. 33952082Seschrock */ 33962082Seschrock if (unspare) { 33978241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 33982082Seschrock spa = NULL; 33992082Seschrock mutex_enter(&spa_namespace_lock); 34002082Seschrock while ((spa = spa_next(spa)) != NULL) { 34012082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 34022082Seschrock continue; 34038241SJeff.Bonwick@Sun.COM if (spa == myspa) 34048241SJeff.Bonwick@Sun.COM continue; 34057793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 34067793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 34072082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 34087793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 34097793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 34102082Seschrock } 34112082Seschrock mutex_exit(&spa_namespace_lock); 34122082Seschrock } 34132082Seschrock 34142082Seschrock return (error); 34152082Seschrock } 34162082Seschrock 34177754SJeff.Bonwick@Sun.COM static nvlist_t * 34187754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 34192082Seschrock { 34207754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 34217754SJeff.Bonwick@Sun.COM uint64_t guid; 34227754SJeff.Bonwick@Sun.COM 34237754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 34247754SJeff.Bonwick@Sun.COM &guid) == 0); 34257754SJeff.Bonwick@Sun.COM 34267754SJeff.Bonwick@Sun.COM if (guid == target_guid) 34277754SJeff.Bonwick@Sun.COM return (nvpp[i]); 34282082Seschrock } 34292082Seschrock 34307754SJeff.Bonwick@Sun.COM return (NULL); 34315450Sbrendan } 34325450Sbrendan 34337754SJeff.Bonwick@Sun.COM static void 34347754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 34357754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 34365450Sbrendan { 34377754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 34387754SJeff.Bonwick@Sun.COM 34397754SJeff.Bonwick@Sun.COM if (count > 1) 34407754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 34417754SJeff.Bonwick@Sun.COM 34427754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 34437754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 34447754SJeff.Bonwick@Sun.COM continue; 34457754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 34465450Sbrendan } 34475450Sbrendan 34487754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 34497754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 34507754SJeff.Bonwick@Sun.COM 34517754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 34527754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 34537754SJeff.Bonwick@Sun.COM 34547754SJeff.Bonwick@Sun.COM if (count > 1) 34557754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 34565450Sbrendan } 34575450Sbrendan 34585450Sbrendan /* 34595450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 34605450Sbrendan * spares and level 2 ARC devices. 34615450Sbrendan */ 34625450Sbrendan int 34635450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 34645450Sbrendan { 34655450Sbrendan vdev_t *vd; 34667754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 34675450Sbrendan uint_t nspares, nl2cache; 34688241SJeff.Bonwick@Sun.COM uint64_t txg = 0; 34695450Sbrendan int error = 0; 34708241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 34718241SJeff.Bonwick@Sun.COM 34728241SJeff.Bonwick@Sun.COM if (!locked) 34738241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 34745450Sbrendan 34756643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 34765450Sbrendan 34775450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 34785450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 34797754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 34807754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 34817754SJeff.Bonwick@Sun.COM /* 34827754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 34837754SJeff.Bonwick@Sun.COM * in this pool. 34847754SJeff.Bonwick@Sun.COM */ 34857754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 34867754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 34877754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 34887754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 34897754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 34907754SJeff.Bonwick@Sun.COM } else { 34917754SJeff.Bonwick@Sun.COM error = EBUSY; 34927754SJeff.Bonwick@Sun.COM } 34937754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 34945450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 34957754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 34967754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 34977754SJeff.Bonwick@Sun.COM /* 34987754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 34997754SJeff.Bonwick@Sun.COM */ 35007754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 35017754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 35025450Sbrendan spa_load_l2cache(spa); 35035450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 35047754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 35057754SJeff.Bonwick@Sun.COM /* 35067754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 35077754SJeff.Bonwick@Sun.COM */ 35087754SJeff.Bonwick@Sun.COM error = ENOTSUP; 35097754SJeff.Bonwick@Sun.COM } else { 35107754SJeff.Bonwick@Sun.COM /* 35117754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 35127754SJeff.Bonwick@Sun.COM */ 35137754SJeff.Bonwick@Sun.COM error = ENOENT; 35145450Sbrendan } 35152082Seschrock 35168241SJeff.Bonwick@Sun.COM if (!locked) 35178241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 35188241SJeff.Bonwick@Sun.COM 35198241SJeff.Bonwick@Sun.COM return (error); 3520789Sahrens } 3521789Sahrens 3522789Sahrens /* 35234451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 35244451Seschrock * current spared, so we can detach it. 3525789Sahrens */ 35261544Seschrock static vdev_t * 35274451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3528789Sahrens { 35291544Seschrock vdev_t *newvd, *oldvd; 3530789Sahrens int c; 3531789Sahrens 35321544Seschrock for (c = 0; c < vd->vdev_children; c++) { 35334451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 35341544Seschrock if (oldvd != NULL) 35351544Seschrock return (oldvd); 35361544Seschrock } 3537789Sahrens 35384451Seschrock /* 35394451Seschrock * Check for a completed replacement. 35404451Seschrock */ 3541789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 35421544Seschrock oldvd = vd->vdev_child[0]; 35431544Seschrock newvd = vd->vdev_child[1]; 3544789Sahrens 35458241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 35468241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 35471544Seschrock return (oldvd); 35481544Seschrock } 3549789Sahrens 35504451Seschrock /* 35514451Seschrock * Check for a completed resilver with the 'unspare' flag set. 35524451Seschrock */ 35534451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 35544451Seschrock newvd = vd->vdev_child[0]; 35554451Seschrock oldvd = vd->vdev_child[1]; 35564451Seschrock 35574451Seschrock if (newvd->vdev_unspare && 35588241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 35598241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 35604451Seschrock newvd->vdev_unspare = 0; 35614451Seschrock return (oldvd); 35624451Seschrock } 35634451Seschrock } 35644451Seschrock 35651544Seschrock return (NULL); 3566789Sahrens } 3567789Sahrens 35681544Seschrock static void 35694451Seschrock spa_vdev_resilver_done(spa_t *spa) 3570789Sahrens { 35718241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 35728241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 35738241SJeff.Bonwick@Sun.COM 35748241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3575789Sahrens 35764451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 35778241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 35788241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 35791544Seschrock guid = vd->vdev_guid; 35808241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 35818241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 35828241SJeff.Bonwick@Sun.COM sguid = 0; 35832082Seschrock /* 35842082Seschrock * If we have just finished replacing a hot spared device, then 35852082Seschrock * we need to detach the parent's first child (the original hot 35862082Seschrock * spare) as well. 35872082Seschrock */ 35888241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 35892082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 35908241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 35918241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 35922082Seschrock } 35938241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 35948241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 35951544Seschrock return; 35968241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 35972082Seschrock return; 35988241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3599789Sahrens } 3600789Sahrens 36018241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 3602789Sahrens } 3603789Sahrens 3604789Sahrens /* 36059425SEric.Schrock@Sun.COM * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 36069425SEric.Schrock@Sun.COM * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 36071354Seschrock */ 36081354Seschrock int 36099425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 36109425SEric.Schrock@Sun.COM boolean_t ispath) 36111354Seschrock { 36126643Seschrock vdev_t *vd; 36131354Seschrock uint64_t txg; 36141354Seschrock 36151354Seschrock txg = spa_vdev_enter(spa); 36161354Seschrock 36179425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 36185450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 36191354Seschrock 36201585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 36211585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 36221585Sbonwick 36239425SEric.Schrock@Sun.COM if (ispath) { 36249425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 36259425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 36269425SEric.Schrock@Sun.COM } else { 36279425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 36289425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 36299425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 36309425SEric.Schrock@Sun.COM } 36311354Seschrock 36321354Seschrock vdev_config_dirty(vd->vdev_top); 36331354Seschrock 36341354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 36351354Seschrock } 36361354Seschrock 36379425SEric.Schrock@Sun.COM int 36389425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 36399425SEric.Schrock@Sun.COM { 36409425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 36419425SEric.Schrock@Sun.COM } 36429425SEric.Schrock@Sun.COM 36439425SEric.Schrock@Sun.COM int 36449425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 36459425SEric.Schrock@Sun.COM { 36469425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 36479425SEric.Schrock@Sun.COM } 36489425SEric.Schrock@Sun.COM 36491354Seschrock /* 3650789Sahrens * ========================================================================== 3651789Sahrens * SPA Scrubbing 3652789Sahrens * ========================================================================== 3653789Sahrens */ 3654789Sahrens 36557046Sahrens int 36567046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3657789Sahrens { 36587754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 36594808Sek110237 3660789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3661789Sahrens return (ENOTSUP); 3662789Sahrens 3663789Sahrens /* 36647046Sahrens * If a resilver was requested, but there is no DTL on a 36657046Sahrens * writeable leaf device, we have nothing to do. 3666789Sahrens */ 36677046Sahrens if (type == POOL_SCRUB_RESILVER && 36687046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 36697046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 36701544Seschrock return (0); 36711544Seschrock } 3672789Sahrens 36737046Sahrens if (type == POOL_SCRUB_EVERYTHING && 36747046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 36757046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 36767046Sahrens return (EBUSY); 36777046Sahrens 36787046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 36797046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 36807046Sahrens } else if (type == POOL_SCRUB_NONE) { 36817046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 36821544Seschrock } else { 36837046Sahrens return (EINVAL); 36841544Seschrock } 3685789Sahrens } 3686789Sahrens 36871544Seschrock /* 36881544Seschrock * ========================================================================== 36891544Seschrock * SPA async task processing 36901544Seschrock * ========================================================================== 36911544Seschrock */ 36921544Seschrock 36931544Seschrock static void 36944451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3695789Sahrens { 36967361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 36977361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 36987361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 36997754SJeff.Bonwick@Sun.COM vdev_clear(spa, vd); 37007754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 37011544Seschrock } 37027361SBrendan.Gregg@Sun.COM 37037754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 37047361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 37051544Seschrock } 37061544Seschrock 37071544Seschrock static void 37087754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 37097754SJeff.Bonwick@Sun.COM { 37107754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 37117754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 37127754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 37137754SJeff.Bonwick@Sun.COM } 37147754SJeff.Bonwick@Sun.COM 37157754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 37167754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 37177754SJeff.Bonwick@Sun.COM } 37187754SJeff.Bonwick@Sun.COM 37197754SJeff.Bonwick@Sun.COM static void 37201544Seschrock spa_async_thread(spa_t *spa) 37211544Seschrock { 37227754SJeff.Bonwick@Sun.COM int tasks; 37231544Seschrock 37241544Seschrock ASSERT(spa->spa_sync_on); 3725789Sahrens 37261544Seschrock mutex_enter(&spa->spa_async_lock); 37271544Seschrock tasks = spa->spa_async_tasks; 37281544Seschrock spa->spa_async_tasks = 0; 37291544Seschrock mutex_exit(&spa->spa_async_lock); 37301544Seschrock 37311544Seschrock /* 37321635Sbonwick * See if the config needs to be updated. 37331635Sbonwick */ 37341635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 37351635Sbonwick mutex_enter(&spa_namespace_lock); 37361635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 37371635Sbonwick mutex_exit(&spa_namespace_lock); 37381635Sbonwick } 37391635Sbonwick 37401635Sbonwick /* 37414451Seschrock * See if any devices need to be marked REMOVED. 37421544Seschrock */ 37437754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 37447754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 37454451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 37467754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 37477361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 37487754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 37497361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 37507754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 37517754SJeff.Bonwick@Sun.COM } 37527754SJeff.Bonwick@Sun.COM 37537754SJeff.Bonwick@Sun.COM /* 37547754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 37557754SJeff.Bonwick@Sun.COM */ 37567754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 37577754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 37587754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 37597754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 37604451Seschrock } 37611544Seschrock 37621544Seschrock /* 37631544Seschrock * If any devices are done replacing, detach them. 37641544Seschrock */ 37654451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 37664451Seschrock spa_vdev_resilver_done(spa); 3767789Sahrens 37681544Seschrock /* 37691544Seschrock * Kick off a resilver. 37701544Seschrock */ 37717046Sahrens if (tasks & SPA_ASYNC_RESILVER) 37727046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 37731544Seschrock 37741544Seschrock /* 37751544Seschrock * Let the world know that we're done. 37761544Seschrock */ 37771544Seschrock mutex_enter(&spa->spa_async_lock); 37781544Seschrock spa->spa_async_thread = NULL; 37791544Seschrock cv_broadcast(&spa->spa_async_cv); 37801544Seschrock mutex_exit(&spa->spa_async_lock); 37811544Seschrock thread_exit(); 37821544Seschrock } 37831544Seschrock 37841544Seschrock void 37851544Seschrock spa_async_suspend(spa_t *spa) 37861544Seschrock { 37871544Seschrock mutex_enter(&spa->spa_async_lock); 37881544Seschrock spa->spa_async_suspended++; 37891544Seschrock while (spa->spa_async_thread != NULL) 37901544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 37911544Seschrock mutex_exit(&spa->spa_async_lock); 37921544Seschrock } 37931544Seschrock 37941544Seschrock void 37951544Seschrock spa_async_resume(spa_t *spa) 37961544Seschrock { 37971544Seschrock mutex_enter(&spa->spa_async_lock); 37981544Seschrock ASSERT(spa->spa_async_suspended != 0); 37991544Seschrock spa->spa_async_suspended--; 38001544Seschrock mutex_exit(&spa->spa_async_lock); 38011544Seschrock } 38021544Seschrock 38031544Seschrock static void 38041544Seschrock spa_async_dispatch(spa_t *spa) 38051544Seschrock { 38061544Seschrock mutex_enter(&spa->spa_async_lock); 38071544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 38081635Sbonwick spa->spa_async_thread == NULL && 38091635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 38101544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 38111544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 38121544Seschrock mutex_exit(&spa->spa_async_lock); 38131544Seschrock } 38141544Seschrock 38151544Seschrock void 38161544Seschrock spa_async_request(spa_t *spa, int task) 38171544Seschrock { 38181544Seschrock mutex_enter(&spa->spa_async_lock); 38191544Seschrock spa->spa_async_tasks |= task; 38201544Seschrock mutex_exit(&spa->spa_async_lock); 3821789Sahrens } 3822789Sahrens 3823789Sahrens /* 3824789Sahrens * ========================================================================== 3825789Sahrens * SPA syncing routines 3826789Sahrens * ========================================================================== 3827789Sahrens */ 3828789Sahrens 3829789Sahrens static void 3830789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3831789Sahrens { 3832789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 3833789Sahrens dmu_tx_t *tx; 3834789Sahrens blkptr_t blk; 3835789Sahrens uint64_t itor = 0; 3836789Sahrens zio_t *zio; 3837789Sahrens int error; 3838789Sahrens uint8_t c = 1; 3839789Sahrens 38407754SJeff.Bonwick@Sun.COM zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 38417754SJeff.Bonwick@Sun.COM 38427754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 38437754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 38447754SJeff.Bonwick@Sun.COM zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 38457754SJeff.Bonwick@Sun.COM ZIO_FLAG_MUSTSUCCEED)); 38467754SJeff.Bonwick@Sun.COM } 3847789Sahrens 3848789Sahrens error = zio_wait(zio); 3849789Sahrens ASSERT3U(error, ==, 0); 3850789Sahrens 3851789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3852789Sahrens bplist_vacate(bpl, tx); 3853789Sahrens 3854789Sahrens /* 3855789Sahrens * Pre-dirty the first block so we sync to convergence faster. 3856789Sahrens * (Usually only the first block is needed.) 3857789Sahrens */ 3858789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3859789Sahrens dmu_tx_commit(tx); 3860789Sahrens } 3861789Sahrens 3862789Sahrens static void 38632082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 38642082Seschrock { 38652082Seschrock char *packed = NULL; 38667497STim.Haley@Sun.COM size_t bufsize; 38672082Seschrock size_t nvsize = 0; 38682082Seschrock dmu_buf_t *db; 38692082Seschrock 38702082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 38712082Seschrock 38727497STim.Haley@Sun.COM /* 38737497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 38747497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 38757497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 38767497STim.Haley@Sun.COM */ 38777497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 38787497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 38792082Seschrock 38802082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 38812082Seschrock KM_SLEEP) == 0); 38827497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 38837497STim.Haley@Sun.COM 38847497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 38857497STim.Haley@Sun.COM 38867497STim.Haley@Sun.COM kmem_free(packed, bufsize); 38872082Seschrock 38882082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 38892082Seschrock dmu_buf_will_dirty(db, tx); 38902082Seschrock *(uint64_t *)db->db_data = nvsize; 38912082Seschrock dmu_buf_rele(db, FTAG); 38922082Seschrock } 38932082Seschrock 38942082Seschrock static void 38955450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 38965450Sbrendan const char *config, const char *entry) 38972082Seschrock { 38982082Seschrock nvlist_t *nvroot; 38995450Sbrendan nvlist_t **list; 39002082Seschrock int i; 39012082Seschrock 39025450Sbrendan if (!sav->sav_sync) 39032082Seschrock return; 39042082Seschrock 39052082Seschrock /* 39065450Sbrendan * Update the MOS nvlist describing the list of available devices. 39075450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 39084451Seschrock * valid and the vdevs are labeled appropriately. 39092082Seschrock */ 39105450Sbrendan if (sav->sav_object == 0) { 39115450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 39125450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 39135450Sbrendan sizeof (uint64_t), tx); 39142082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 39155450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 39165450Sbrendan &sav->sav_object, tx) == 0); 39172082Seschrock } 39182082Seschrock 39192082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 39205450Sbrendan if (sav->sav_count == 0) { 39215450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 39222082Seschrock } else { 39235450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 39245450Sbrendan for (i = 0; i < sav->sav_count; i++) 39255450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 39265450Sbrendan B_FALSE, B_FALSE, B_TRUE); 39275450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 39285450Sbrendan sav->sav_count) == 0); 39295450Sbrendan for (i = 0; i < sav->sav_count; i++) 39305450Sbrendan nvlist_free(list[i]); 39315450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 39322082Seschrock } 39332082Seschrock 39345450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 39352926Sek110237 nvlist_free(nvroot); 39362082Seschrock 39375450Sbrendan sav->sav_sync = B_FALSE; 39382082Seschrock } 39392082Seschrock 39402082Seschrock static void 3941789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 3942789Sahrens { 3943789Sahrens nvlist_t *config; 3944789Sahrens 39457754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 3946789Sahrens return; 3947789Sahrens 39487754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 39497754SJeff.Bonwick@Sun.COM 39507754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 39517754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 39527754SJeff.Bonwick@Sun.COM 39537754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 3954789Sahrens 39551635Sbonwick if (spa->spa_config_syncing) 39561635Sbonwick nvlist_free(spa->spa_config_syncing); 39571635Sbonwick spa->spa_config_syncing = config; 3958789Sahrens 39592082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 3960789Sahrens } 3961789Sahrens 39625094Slling /* 39635094Slling * Set zpool properties. 39645094Slling */ 39653912Slling static void 39664543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 39673912Slling { 39683912Slling spa_t *spa = arg1; 39695094Slling objset_t *mos = spa->spa_meta_objset; 39703912Slling nvlist_t *nvp = arg2; 39715094Slling nvpair_t *elem; 39724451Seschrock uint64_t intval; 39736643Seschrock char *strval; 39745094Slling zpool_prop_t prop; 39755094Slling const char *propname; 39765094Slling zprop_type_t proptype; 39775094Slling 39787754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 39797754SJeff.Bonwick@Sun.COM 39805094Slling elem = NULL; 39815094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 39825094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 39835094Slling case ZPOOL_PROP_VERSION: 39845094Slling /* 39855094Slling * Only set version for non-zpool-creation cases 39865094Slling * (set/import). spa_create() needs special care 39875094Slling * for version setting. 39885094Slling */ 39895094Slling if (tx->tx_txg != TXG_INITIAL) { 39905094Slling VERIFY(nvpair_value_uint64(elem, 39915094Slling &intval) == 0); 39925094Slling ASSERT(intval <= SPA_VERSION); 39935094Slling ASSERT(intval >= spa_version(spa)); 39945094Slling spa->spa_uberblock.ub_version = intval; 39955094Slling vdev_config_dirty(spa->spa_root_vdev); 39965094Slling } 39975094Slling break; 39985094Slling 39995094Slling case ZPOOL_PROP_ALTROOT: 40005094Slling /* 40015094Slling * 'altroot' is a non-persistent property. It should 40025094Slling * have been set temporarily at creation or import time. 40035094Slling */ 40045094Slling ASSERT(spa->spa_root != NULL); 40055094Slling break; 40065094Slling 40075363Seschrock case ZPOOL_PROP_CACHEFILE: 40085094Slling /* 40098525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 40105094Slling */ 40114543Smarks break; 40125094Slling default: 40135094Slling /* 40145094Slling * Set pool property values in the poolprops mos object. 40155094Slling */ 40165094Slling if (spa->spa_pool_props_object == 0) { 40175094Slling objset_t *mos = spa->spa_meta_objset; 40185094Slling 40195094Slling VERIFY((spa->spa_pool_props_object = 40205094Slling zap_create(mos, DMU_OT_POOL_PROPS, 40215094Slling DMU_OT_NONE, 0, tx)) > 0); 40225094Slling 40235094Slling VERIFY(zap_update(mos, 40245094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 40255094Slling 8, 1, &spa->spa_pool_props_object, tx) 40265094Slling == 0); 40275094Slling } 40285094Slling 40295094Slling /* normalize the property name */ 40305094Slling propname = zpool_prop_to_name(prop); 40315094Slling proptype = zpool_prop_get_type(prop); 40325094Slling 40335094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 40345094Slling ASSERT(proptype == PROP_TYPE_STRING); 40355094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 40365094Slling VERIFY(zap_update(mos, 40375094Slling spa->spa_pool_props_object, propname, 40385094Slling 1, strlen(strval) + 1, strval, tx) == 0); 40395094Slling 40405094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 40415094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 40425094Slling 40435094Slling if (proptype == PROP_TYPE_INDEX) { 40445094Slling const char *unused; 40455094Slling VERIFY(zpool_prop_index_to_string( 40465094Slling prop, intval, &unused) == 0); 40475094Slling } 40485094Slling VERIFY(zap_update(mos, 40495094Slling spa->spa_pool_props_object, propname, 40505094Slling 8, 1, &intval, tx) == 0); 40515094Slling } else { 40525094Slling ASSERT(0); /* not allowed */ 40535094Slling } 40545094Slling 40555329Sgw25295 switch (prop) { 40565329Sgw25295 case ZPOOL_PROP_DELEGATION: 40575094Slling spa->spa_delegation = intval; 40585329Sgw25295 break; 40595329Sgw25295 case ZPOOL_PROP_BOOTFS: 40605094Slling spa->spa_bootfs = intval; 40615329Sgw25295 break; 40625329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 40635329Sgw25295 spa->spa_failmode = intval; 40645329Sgw25295 break; 40655329Sgw25295 default: 40665329Sgw25295 break; 40675329Sgw25295 } 40683912Slling } 40695094Slling 40705094Slling /* log internal history if this is not a zpool create */ 40715094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 40725094Slling tx->tx_txg != TXG_INITIAL) { 40735094Slling spa_history_internal_log(LOG_POOL_PROPSET, 40745094Slling spa, tx, cr, "%s %lld %s", 40757754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 40765094Slling } 40773912Slling } 40787754SJeff.Bonwick@Sun.COM 40797754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 40803912Slling } 40813912Slling 4082789Sahrens /* 4083789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4084789Sahrens * part of the process, so we iterate until it converges. 4085789Sahrens */ 4086789Sahrens void 4087789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4088789Sahrens { 4089789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4090789Sahrens objset_t *mos = spa->spa_meta_objset; 4091789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 40921635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4093789Sahrens vdev_t *vd; 4094789Sahrens dmu_tx_t *tx; 4095789Sahrens int dirty_vdevs; 40967754SJeff.Bonwick@Sun.COM int error; 4097789Sahrens 4098789Sahrens /* 4099789Sahrens * Lock out configuration changes. 4100789Sahrens */ 41017754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4102789Sahrens 4103789Sahrens spa->spa_syncing_txg = txg; 4104789Sahrens spa->spa_sync_pass = 0; 4105789Sahrens 41067754SJeff.Bonwick@Sun.COM /* 41077754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 41087754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 41097754SJeff.Bonwick@Sun.COM */ 41107754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 41118241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 41128241SJeff.Bonwick@Sun.COM /* 41138241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 41148241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 41158241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 41168241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 41178241SJeff.Bonwick@Sun.COM * into the root vdev tree. 41188241SJeff.Bonwick@Sun.COM */ 41198241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 41208241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 41218241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 41228241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 41238241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 41248241SJeff.Bonwick@Sun.COM } 41258241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 41268241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 41277754SJeff.Bonwick@Sun.COM } 41287754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 41297754SJeff.Bonwick@Sun.COM 41301544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4131789Sahrens 41322082Seschrock tx = dmu_tx_create_assigned(dp, txg); 41332082Seschrock 41342082Seschrock /* 41354577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 41362082Seschrock * set spa_deflate if we have no raid-z vdevs. 41372082Seschrock */ 41384577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 41394577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 41402082Seschrock int i; 41412082Seschrock 41422082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 41432082Seschrock vd = rvd->vdev_child[i]; 41442082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 41452082Seschrock break; 41462082Seschrock } 41472082Seschrock if (i == rvd->vdev_children) { 41482082Seschrock spa->spa_deflate = TRUE; 41492082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 41502082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 41512082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 41522082Seschrock } 41532082Seschrock } 41542082Seschrock 41557046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 41567046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 41577046Sahrens dsl_pool_create_origin(dp, tx); 41587046Sahrens 41597046Sahrens /* Keeping the origin open increases spa_minref */ 41607046Sahrens spa->spa_minref += 3; 41617046Sahrens } 41627046Sahrens 41637046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 41647046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 41657046Sahrens dsl_pool_upgrade_clones(dp, tx); 41667046Sahrens } 41677046Sahrens 4168789Sahrens /* 4169789Sahrens * If anything has changed in this txg, push the deferred frees 4170789Sahrens * from the previous txg. If not, leave them alone so that we 4171789Sahrens * don't generate work on an otherwise idle system. 4172789Sahrens */ 4173789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 41742329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 41752329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4176789Sahrens spa_sync_deferred_frees(spa, txg); 4177789Sahrens 4178789Sahrens /* 4179789Sahrens * Iterate to convergence. 4180789Sahrens */ 4181789Sahrens do { 4182789Sahrens spa->spa_sync_pass++; 4183789Sahrens 4184789Sahrens spa_sync_config_object(spa, tx); 41855450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 41865450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 41875450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 41885450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 41891544Seschrock spa_errlog_sync(spa, txg); 4190789Sahrens dsl_pool_sync(dp, txg); 4191789Sahrens 4192789Sahrens dirty_vdevs = 0; 4193789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4194789Sahrens vdev_sync(vd, txg); 4195789Sahrens dirty_vdevs++; 4196789Sahrens } 4197789Sahrens 4198789Sahrens bplist_sync(bpl, tx); 4199789Sahrens } while (dirty_vdevs); 4200789Sahrens 4201789Sahrens bplist_close(bpl); 4202789Sahrens 4203789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4204789Sahrens 4205789Sahrens /* 4206789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4207789Sahrens * to commit the transaction group. 42081635Sbonwick * 42095688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 42105688Sbonwick * random top-level vdevs that are known to be visible in the 42117754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 42127754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4213789Sahrens */ 42147754SJeff.Bonwick@Sun.COM for (;;) { 42157754SJeff.Bonwick@Sun.COM /* 42167754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 42177754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 42187754SJeff.Bonwick@Sun.COM */ 42197754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 42207754SJeff.Bonwick@Sun.COM 42217754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 42227754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 42237754SJeff.Bonwick@Sun.COM int svdcount = 0; 42247754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 42257754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 42267754SJeff.Bonwick@Sun.COM int c; 42277754SJeff.Bonwick@Sun.COM 42287754SJeff.Bonwick@Sun.COM for (c = 0; c < children; c++) { 42297754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 42307754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 42317754SJeff.Bonwick@Sun.COM continue; 42327754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 42337754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 42347754SJeff.Bonwick@Sun.COM break; 42357754SJeff.Bonwick@Sun.COM } 4236*9725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 4237*9725SEric.Schrock@Sun.COM if (error != 0) 4238*9725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 4239*9725SEric.Schrock@Sun.COM B_TRUE); 42407754SJeff.Bonwick@Sun.COM } else { 42417754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 4242*9725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 4243*9725SEric.Schrock@Sun.COM if (error != 0) 4244*9725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 4245*9725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 42461635Sbonwick } 42477754SJeff.Bonwick@Sun.COM 42487754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 42497754SJeff.Bonwick@Sun.COM 42507754SJeff.Bonwick@Sun.COM if (error == 0) 42517754SJeff.Bonwick@Sun.COM break; 42527754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 42537754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 42541635Sbonwick } 42552082Seschrock dmu_tx_commit(tx); 42562082Seschrock 42571635Sbonwick /* 42581635Sbonwick * Clear the dirty config list. 42591635Sbonwick */ 42607754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 42611635Sbonwick vdev_config_clean(vd); 42621635Sbonwick 42631635Sbonwick /* 42641635Sbonwick * Now that the new config has synced transactionally, 42651635Sbonwick * let it become visible to the config cache. 42661635Sbonwick */ 42671635Sbonwick if (spa->spa_config_syncing != NULL) { 42681635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 42691635Sbonwick spa->spa_config_txg = txg; 42701635Sbonwick spa->spa_config_syncing = NULL; 42711635Sbonwick } 4272789Sahrens 4273789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4274789Sahrens 4275789Sahrens /* 4276789Sahrens * Clean up the ZIL records for the synced txg. 4277789Sahrens */ 4278789Sahrens dsl_pool_zil_clean(dp); 4279789Sahrens 4280789Sahrens /* 4281789Sahrens * Update usable space statistics. 4282789Sahrens */ 4283789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4284789Sahrens vdev_sync_done(vd, txg); 4285789Sahrens 4286789Sahrens /* 4287789Sahrens * It had better be the case that we didn't dirty anything 42882082Seschrock * since vdev_config_sync(). 4289789Sahrens */ 4290789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4291789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4292789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4293789Sahrens ASSERT(bpl->bpl_queue == NULL); 4294789Sahrens 42957754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 42961544Seschrock 42971544Seschrock /* 42981544Seschrock * If any async tasks have been requested, kick them off. 42991544Seschrock */ 43001544Seschrock spa_async_dispatch(spa); 4301789Sahrens } 4302789Sahrens 4303789Sahrens /* 4304789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4305789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4306789Sahrens * sync. 4307789Sahrens */ 4308789Sahrens void 4309789Sahrens spa_sync_allpools(void) 4310789Sahrens { 4311789Sahrens spa_t *spa = NULL; 4312789Sahrens mutex_enter(&spa_namespace_lock); 4313789Sahrens while ((spa = spa_next(spa)) != NULL) { 43147754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4315789Sahrens continue; 4316789Sahrens spa_open_ref(spa, FTAG); 4317789Sahrens mutex_exit(&spa_namespace_lock); 4318789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4319789Sahrens mutex_enter(&spa_namespace_lock); 4320789Sahrens spa_close(spa, FTAG); 4321789Sahrens } 4322789Sahrens mutex_exit(&spa_namespace_lock); 4323789Sahrens } 4324789Sahrens 4325789Sahrens /* 4326789Sahrens * ========================================================================== 4327789Sahrens * Miscellaneous routines 4328789Sahrens * ========================================================================== 4329789Sahrens */ 4330789Sahrens 4331789Sahrens /* 4332789Sahrens * Remove all pools in the system. 4333789Sahrens */ 4334789Sahrens void 4335789Sahrens spa_evict_all(void) 4336789Sahrens { 4337789Sahrens spa_t *spa; 4338789Sahrens 4339789Sahrens /* 4340789Sahrens * Remove all cached state. All pools should be closed now, 4341789Sahrens * so every spa in the AVL tree should be unreferenced. 4342789Sahrens */ 4343789Sahrens mutex_enter(&spa_namespace_lock); 4344789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4345789Sahrens /* 43461544Seschrock * Stop async tasks. The async thread may need to detach 43471544Seschrock * a device that's been replaced, which requires grabbing 43481544Seschrock * spa_namespace_lock, so we must drop it here. 4349789Sahrens */ 4350789Sahrens spa_open_ref(spa, FTAG); 4351789Sahrens mutex_exit(&spa_namespace_lock); 43521544Seschrock spa_async_suspend(spa); 43534808Sek110237 mutex_enter(&spa_namespace_lock); 4354789Sahrens spa_close(spa, FTAG); 4355789Sahrens 4356789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4357789Sahrens spa_unload(spa); 4358789Sahrens spa_deactivate(spa); 4359789Sahrens } 4360789Sahrens spa_remove(spa); 4361789Sahrens } 4362789Sahrens mutex_exit(&spa_namespace_lock); 4363789Sahrens } 43641544Seschrock 43651544Seschrock vdev_t * 43669425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 43671544Seschrock { 43686643Seschrock vdev_t *vd; 43696643Seschrock int i; 43706643Seschrock 43716643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 43726643Seschrock return (vd); 43736643Seschrock 43749425SEric.Schrock@Sun.COM if (aux) { 43756643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 43766643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 43776643Seschrock if (vd->vdev_guid == guid) 43786643Seschrock return (vd); 43796643Seschrock } 43809425SEric.Schrock@Sun.COM 43819425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 43829425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 43839425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 43849425SEric.Schrock@Sun.COM return (vd); 43859425SEric.Schrock@Sun.COM } 43866643Seschrock } 43876643Seschrock 43886643Seschrock return (NULL); 43891544Seschrock } 43901760Seschrock 43911760Seschrock void 43925094Slling spa_upgrade(spa_t *spa, uint64_t version) 43931760Seschrock { 43947754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 43951760Seschrock 43961760Seschrock /* 43971760Seschrock * This should only be called for a non-faulted pool, and since a 43981760Seschrock * future version would result in an unopenable pool, this shouldn't be 43991760Seschrock * possible. 44001760Seschrock */ 44014577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 44025094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 44035094Slling 44045094Slling spa->spa_uberblock.ub_version = version; 44051760Seschrock vdev_config_dirty(spa->spa_root_vdev); 44061760Seschrock 44077754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 44082082Seschrock 44092082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 44101760Seschrock } 44112082Seschrock 44122082Seschrock boolean_t 44132082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 44142082Seschrock { 44152082Seschrock int i; 44163377Seschrock uint64_t spareguid; 44175450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 44185450Sbrendan 44195450Sbrendan for (i = 0; i < sav->sav_count; i++) 44205450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 44212082Seschrock return (B_TRUE); 44222082Seschrock 44235450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 44245450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 44255450Sbrendan &spareguid) == 0 && spareguid == guid) 44263377Seschrock return (B_TRUE); 44273377Seschrock } 44283377Seschrock 44292082Seschrock return (B_FALSE); 44302082Seschrock } 44313912Slling 44324451Seschrock /* 44337214Slling * Check if a pool has an active shared spare device. 44347214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 44357214Slling */ 44367214Slling static boolean_t 44377214Slling spa_has_active_shared_spare(spa_t *spa) 44387214Slling { 44397214Slling int i, refcnt; 44407214Slling uint64_t pool; 44417214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 44427214Slling 44437214Slling for (i = 0; i < sav->sav_count; i++) { 44447214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 44457214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 44467214Slling refcnt > 2) 44477214Slling return (B_TRUE); 44487214Slling } 44497214Slling 44507214Slling return (B_FALSE); 44517214Slling } 44527214Slling 44537214Slling /* 44544451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 44554451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 44564451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 44574451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 44584451Seschrock * or zdb as real changes. 44594451Seschrock */ 44604451Seschrock void 44614451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 44624451Seschrock { 44634451Seschrock #ifdef _KERNEL 44644451Seschrock sysevent_t *ev; 44654451Seschrock sysevent_attr_list_t *attr = NULL; 44664451Seschrock sysevent_value_t value; 44674451Seschrock sysevent_id_t eid; 44684451Seschrock 44694451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 44704451Seschrock SE_SLEEP); 44714451Seschrock 44724451Seschrock value.value_type = SE_DATA_TYPE_STRING; 44734451Seschrock value.value.sv_string = spa_name(spa); 44744451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 44754451Seschrock goto done; 44764451Seschrock 44774451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 44784451Seschrock value.value.sv_uint64 = spa_guid(spa); 44794451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 44804451Seschrock goto done; 44814451Seschrock 44824451Seschrock if (vd) { 44834451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 44844451Seschrock value.value.sv_uint64 = vd->vdev_guid; 44854451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 44864451Seschrock SE_SLEEP) != 0) 44874451Seschrock goto done; 44884451Seschrock 44894451Seschrock if (vd->vdev_path) { 44904451Seschrock value.value_type = SE_DATA_TYPE_STRING; 44914451Seschrock value.value.sv_string = vd->vdev_path; 44924451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 44934451Seschrock &value, SE_SLEEP) != 0) 44944451Seschrock goto done; 44954451Seschrock } 44964451Seschrock } 44974451Seschrock 44985756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 44995756Seschrock goto done; 45005756Seschrock attr = NULL; 45015756Seschrock 45024451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 45034451Seschrock 45044451Seschrock done: 45054451Seschrock if (attr) 45064451Seschrock sysevent_free_attr(attr); 45074451Seschrock sysevent_free(ev); 45084451Seschrock #endif 45094451Seschrock } 4510