1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/zio_compress.h> 39789Sahrens #include <sys/dmu.h> 40789Sahrens #include <sys/dmu_tx.h> 41789Sahrens #include <sys/zap.h> 42789Sahrens #include <sys/zil.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 45789Sahrens #include <sys/uberblock_impl.h> 46789Sahrens #include <sys/txg.h> 47789Sahrens #include <sys/avl.h> 48789Sahrens #include <sys/dmu_traverse.h> 493912Slling #include <sys/dmu_objset.h> 50789Sahrens #include <sys/unique.h> 51789Sahrens #include <sys/dsl_pool.h> 523912Slling #include <sys/dsl_dataset.h> 53789Sahrens #include <sys/dsl_dir.h> 54789Sahrens #include <sys/dsl_prop.h> 553912Slling #include <sys/dsl_synctask.h> 56789Sahrens #include <sys/fs/zfs.h> 575450Sbrendan #include <sys/arc.h> 58789Sahrens #include <sys/callb.h> 593975Sek110237 #include <sys/systeminfo.h> 603975Sek110237 #include <sys/sunddi.h> 616423Sgw25295 #include <sys/spa_boot.h> 62789Sahrens 638662SJordan.Vaughan@Sun.com #ifdef _KERNEL 648662SJordan.Vaughan@Sun.com #include <sys/zone.h> 658662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 668662SJordan.Vaughan@Sun.com 675094Slling #include "zfs_prop.h" 685913Sperrin #include "zfs_comutil.h" 695094Slling 70*9515SJonathan.Adams@Sun.COM enum zti_modes { 71*9515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 72*9515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 73*9515SJonathan.Adams@Sun.COM zti_mode_tune, /* fill from zio_taskq_tune_* */ 74*9515SJonathan.Adams@Sun.COM zti_nmodes 757754SJeff.Bonwick@Sun.COM }; 762986Sek110237 77*9515SJonathan.Adams@Sun.COM #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 78*9515SJonathan.Adams@Sun.COM #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 79*9515SJonathan.Adams@Sun.COM #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 80*9515SJonathan.Adams@Sun.COM 81*9515SJonathan.Adams@Sun.COM #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 82*9515SJonathan.Adams@Sun.COM 83*9515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 84*9515SJonathan.Adams@Sun.COM const char *zti_name; 85*9515SJonathan.Adams@Sun.COM struct { 86*9515SJonathan.Adams@Sun.COM enum zti_modes zti_mode; 87*9515SJonathan.Adams@Sun.COM uint_t zti_value; 88*9515SJonathan.Adams@Sun.COM } zti_nthreads[ZIO_TASKQ_TYPES]; 89*9515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 90*9515SJonathan.Adams@Sun.COM 91*9515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 92*9515SJonathan.Adams@Sun.COM "issue", "intr" 93*9515SJonathan.Adams@Sun.COM }; 94*9515SJonathan.Adams@Sun.COM 95*9515SJonathan.Adams@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 96*9515SJonathan.Adams@Sun.COM /* ISSUE INTR */ 97*9515SJonathan.Adams@Sun.COM { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 98*9515SJonathan.Adams@Sun.COM { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 99*9515SJonathan.Adams@Sun.COM { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 100*9515SJonathan.Adams@Sun.COM { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 101*9515SJonathan.Adams@Sun.COM { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 102*9515SJonathan.Adams@Sun.COM { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 103*9515SJonathan.Adams@Sun.COM }; 104*9515SJonathan.Adams@Sun.COM 105*9515SJonathan.Adams@Sun.COM enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 106*9515SJonathan.Adams@Sun.COM uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 107*9515SJonathan.Adams@Sun.COM 1085094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1097214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1105094Slling 1115094Slling /* 1125094Slling * ========================================================================== 1135094Slling * SPA properties routines 1145094Slling * ========================================================================== 1155094Slling */ 1165094Slling 1175094Slling /* 1185094Slling * Add a (source=src, propname=propval) list to an nvlist. 1195094Slling */ 1205949Slling static void 1215094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1225094Slling uint64_t intval, zprop_source_t src) 1235094Slling { 1245094Slling const char *propname = zpool_prop_to_name(prop); 1255094Slling nvlist_t *propval; 1265949Slling 1275949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1285949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1295949Slling 1305949Slling if (strval != NULL) 1315949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1325949Slling else 1335949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1345949Slling 1355949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1365094Slling nvlist_free(propval); 1375094Slling } 1385094Slling 1395094Slling /* 1405094Slling * Get property values from the spa configuration. 1415094Slling */ 1425949Slling static void 1435094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1445094Slling { 1458525SEric.Schrock@Sun.COM uint64_t size; 1468525SEric.Schrock@Sun.COM uint64_t used; 1475094Slling uint64_t cap, version; 1485094Slling zprop_source_t src = ZPROP_SRC_NONE; 1496643Seschrock spa_config_dirent_t *dp; 1505094Slling 1517754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1527754SJeff.Bonwick@Sun.COM 1538525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 1548525SEric.Schrock@Sun.COM size = spa_get_space(spa); 1558525SEric.Schrock@Sun.COM used = spa_get_alloc(spa); 1568525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1578525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 1588525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 1598525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 1608525SEric.Schrock@Sun.COM size - used, src); 1618525SEric.Schrock@Sun.COM 1628525SEric.Schrock@Sun.COM cap = (size == 0) ? 0 : (used * 100 / size); 1638525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1648525SEric.Schrock@Sun.COM 1658525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1668525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1678525SEric.Schrock@Sun.COM 1688525SEric.Schrock@Sun.COM version = spa_version(spa); 1698525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1708525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1718525SEric.Schrock@Sun.COM else 1728525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1738525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1748525SEric.Schrock@Sun.COM } 1755949Slling 1765949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1775949Slling 1785949Slling if (spa->spa_root != NULL) 1795949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1805949Slling 0, ZPROP_SRC_LOCAL); 1815094Slling 1826643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 1836643Seschrock if (dp->scd_path == NULL) { 1845949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1856643Seschrock "none", 0, ZPROP_SRC_LOCAL); 1866643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1875949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1886643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1895363Seschrock } 1905363Seschrock } 1915094Slling } 1925094Slling 1935094Slling /* 1945094Slling * Get zpool property values. 1955094Slling */ 1965094Slling int 1975094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 1985094Slling { 1995094Slling zap_cursor_t zc; 2005094Slling zap_attribute_t za; 2015094Slling objset_t *mos = spa->spa_meta_objset; 2025094Slling int err; 2035094Slling 2045949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2055094Slling 2067754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2077754SJeff.Bonwick@Sun.COM 2085094Slling /* 2095094Slling * Get properties from the spa config. 2105094Slling */ 2115949Slling spa_prop_get_config(spa, nvp); 2125094Slling 2135094Slling /* If no pool property object, no more prop to get. */ 2145094Slling if (spa->spa_pool_props_object == 0) { 2155094Slling mutex_exit(&spa->spa_props_lock); 2165094Slling return (0); 2175094Slling } 2185094Slling 2195094Slling /* 2205094Slling * Get properties from the MOS pool property object. 2215094Slling */ 2225094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2235094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2245094Slling zap_cursor_advance(&zc)) { 2255094Slling uint64_t intval = 0; 2265094Slling char *strval = NULL; 2275094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2285094Slling zpool_prop_t prop; 2295094Slling 2305094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2315094Slling continue; 2325094Slling 2335094Slling switch (za.za_integer_length) { 2345094Slling case 8: 2355094Slling /* integer property */ 2365094Slling if (za.za_first_integer != 2375094Slling zpool_prop_default_numeric(prop)) 2385094Slling src = ZPROP_SRC_LOCAL; 2395094Slling 2405094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2415094Slling dsl_pool_t *dp; 2425094Slling dsl_dataset_t *ds = NULL; 2435094Slling 2445094Slling dp = spa_get_dsl(spa); 2455094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2466689Smaybee if (err = dsl_dataset_hold_obj(dp, 2476689Smaybee za.za_first_integer, FTAG, &ds)) { 2485094Slling rw_exit(&dp->dp_config_rwlock); 2495094Slling break; 2505094Slling } 2515094Slling 2525094Slling strval = kmem_alloc( 2535094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2545094Slling KM_SLEEP); 2555094Slling dsl_dataset_name(ds, strval); 2566689Smaybee dsl_dataset_rele(ds, FTAG); 2575094Slling rw_exit(&dp->dp_config_rwlock); 2585094Slling } else { 2595094Slling strval = NULL; 2605094Slling intval = za.za_first_integer; 2615094Slling } 2625094Slling 2635949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2645094Slling 2655094Slling if (strval != NULL) 2665094Slling kmem_free(strval, 2675094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2685094Slling 2695094Slling break; 2705094Slling 2715094Slling case 1: 2725094Slling /* string property */ 2735094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2745094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2755094Slling za.za_name, 1, za.za_num_integers, strval); 2765094Slling if (err) { 2775094Slling kmem_free(strval, za.za_num_integers); 2785094Slling break; 2795094Slling } 2805949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2815094Slling kmem_free(strval, za.za_num_integers); 2825094Slling break; 2835094Slling 2845094Slling default: 2855094Slling break; 2865094Slling } 2875094Slling } 2885094Slling zap_cursor_fini(&zc); 2895094Slling mutex_exit(&spa->spa_props_lock); 2905094Slling out: 2915094Slling if (err && err != ENOENT) { 2925094Slling nvlist_free(*nvp); 2935949Slling *nvp = NULL; 2945094Slling return (err); 2955094Slling } 2965094Slling 2975094Slling return (0); 2985094Slling } 2995094Slling 3005094Slling /* 3015094Slling * Validate the given pool properties nvlist and modify the list 3025094Slling * for the property values to be set. 3035094Slling */ 3045094Slling static int 3055094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3065094Slling { 3075094Slling nvpair_t *elem; 3085094Slling int error = 0, reset_bootfs = 0; 3095094Slling uint64_t objnum; 3105094Slling 3115094Slling elem = NULL; 3125094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3135094Slling zpool_prop_t prop; 3145094Slling char *propname, *strval; 3155094Slling uint64_t intval; 3165094Slling objset_t *os; 3175363Seschrock char *slash; 3185094Slling 3195094Slling propname = nvpair_name(elem); 3205094Slling 3215094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3225094Slling return (EINVAL); 3235094Slling 3245094Slling switch (prop) { 3255094Slling case ZPOOL_PROP_VERSION: 3265094Slling error = nvpair_value_uint64(elem, &intval); 3275094Slling if (!error && 3285094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3295094Slling error = EINVAL; 3305094Slling break; 3315094Slling 3325094Slling case ZPOOL_PROP_DELEGATION: 3335094Slling case ZPOOL_PROP_AUTOREPLACE: 3347538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3355094Slling error = nvpair_value_uint64(elem, &intval); 3365094Slling if (!error && intval > 1) 3375094Slling error = EINVAL; 3385094Slling break; 3395094Slling 3405094Slling case ZPOOL_PROP_BOOTFS: 3415094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3425094Slling error = ENOTSUP; 3435094Slling break; 3445094Slling } 3455094Slling 3465094Slling /* 3477042Sgw25295 * Make sure the vdev config is bootable 3485094Slling */ 3497042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3505094Slling error = ENOTSUP; 3515094Slling break; 3525094Slling } 3535094Slling 3545094Slling reset_bootfs = 1; 3555094Slling 3565094Slling error = nvpair_value_string(elem, &strval); 3575094Slling 3585094Slling if (!error) { 3597042Sgw25295 uint64_t compress; 3607042Sgw25295 3615094Slling if (strval == NULL || strval[0] == '\0') { 3625094Slling objnum = zpool_prop_default_numeric( 3635094Slling ZPOOL_PROP_BOOTFS); 3645094Slling break; 3655094Slling } 3665094Slling 3675094Slling if (error = dmu_objset_open(strval, DMU_OST_ZFS, 3686689Smaybee DS_MODE_USER | DS_MODE_READONLY, &os)) 3695094Slling break; 3707042Sgw25295 3717042Sgw25295 /* We don't support gzip bootable datasets */ 3727042Sgw25295 if ((error = dsl_prop_get_integer(strval, 3737042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 3747042Sgw25295 &compress, NULL)) == 0 && 3757042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 3767042Sgw25295 error = ENOTSUP; 3777042Sgw25295 } else { 3787042Sgw25295 objnum = dmu_objset_id(os); 3797042Sgw25295 } 3805094Slling dmu_objset_close(os); 3815094Slling } 3825094Slling break; 3837754SJeff.Bonwick@Sun.COM 3845329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3855329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3865329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3875329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3885329Sgw25295 error = EINVAL; 3895329Sgw25295 3905329Sgw25295 /* 3915329Sgw25295 * This is a special case which only occurs when 3925329Sgw25295 * the pool has completely failed. This allows 3935329Sgw25295 * the user to change the in-core failmode property 3945329Sgw25295 * without syncing it out to disk (I/Os might 3955329Sgw25295 * currently be blocked). We do this by returning 3965329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 3975329Sgw25295 * into thinking we encountered a property validation 3985329Sgw25295 * error. 3995329Sgw25295 */ 4007754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4015329Sgw25295 spa->spa_failmode = intval; 4025329Sgw25295 error = EIO; 4035329Sgw25295 } 4045329Sgw25295 break; 4055363Seschrock 4065363Seschrock case ZPOOL_PROP_CACHEFILE: 4075363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4085363Seschrock break; 4095363Seschrock 4105363Seschrock if (strval[0] == '\0') 4115363Seschrock break; 4125363Seschrock 4135363Seschrock if (strcmp(strval, "none") == 0) 4145363Seschrock break; 4155363Seschrock 4165363Seschrock if (strval[0] != '/') { 4175363Seschrock error = EINVAL; 4185363Seschrock break; 4195363Seschrock } 4205363Seschrock 4215363Seschrock slash = strrchr(strval, '/'); 4225363Seschrock ASSERT(slash != NULL); 4235363Seschrock 4245363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4255363Seschrock strcmp(slash, "/..") == 0) 4265363Seschrock error = EINVAL; 4275363Seschrock break; 4285094Slling } 4295094Slling 4305094Slling if (error) 4315094Slling break; 4325094Slling } 4335094Slling 4345094Slling if (!error && reset_bootfs) { 4355094Slling error = nvlist_remove(props, 4365094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4375094Slling 4385094Slling if (!error) { 4395094Slling error = nvlist_add_uint64(props, 4405094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4415094Slling } 4425094Slling } 4435094Slling 4445094Slling return (error); 4455094Slling } 4465094Slling 4478525SEric.Schrock@Sun.COM void 4488525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4498525SEric.Schrock@Sun.COM { 4508525SEric.Schrock@Sun.COM char *cachefile; 4518525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4528525SEric.Schrock@Sun.COM 4538525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4548525SEric.Schrock@Sun.COM &cachefile) != 0) 4558525SEric.Schrock@Sun.COM return; 4568525SEric.Schrock@Sun.COM 4578525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4588525SEric.Schrock@Sun.COM KM_SLEEP); 4598525SEric.Schrock@Sun.COM 4608525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 4618525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 4628525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 4638525SEric.Schrock@Sun.COM dp->scd_path = NULL; 4648525SEric.Schrock@Sun.COM else 4658525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 4668525SEric.Schrock@Sun.COM 4678525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 4688525SEric.Schrock@Sun.COM if (need_sync) 4698525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 4708525SEric.Schrock@Sun.COM } 4718525SEric.Schrock@Sun.COM 4725094Slling int 4735094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4745094Slling { 4755094Slling int error; 4768525SEric.Schrock@Sun.COM nvpair_t *elem; 4778525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 4788525SEric.Schrock@Sun.COM zpool_prop_t prop; 4795094Slling 4805094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4815094Slling return (error); 4825094Slling 4838525SEric.Schrock@Sun.COM elem = NULL; 4848525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 4858525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 4868525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 4878525SEric.Schrock@Sun.COM return (EINVAL); 4888525SEric.Schrock@Sun.COM 4898525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 4908525SEric.Schrock@Sun.COM continue; 4918525SEric.Schrock@Sun.COM 4928525SEric.Schrock@Sun.COM need_sync = B_TRUE; 4938525SEric.Schrock@Sun.COM break; 4948525SEric.Schrock@Sun.COM } 4958525SEric.Schrock@Sun.COM 4968525SEric.Schrock@Sun.COM if (need_sync) 4978525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 4988525SEric.Schrock@Sun.COM spa, nvp, 3)); 4998525SEric.Schrock@Sun.COM else 5008525SEric.Schrock@Sun.COM return (0); 5015094Slling } 5025094Slling 5035094Slling /* 5045094Slling * If the bootfs property value is dsobj, clear it. 5055094Slling */ 5065094Slling void 5075094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5085094Slling { 5095094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5105094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5115094Slling spa->spa_pool_props_object, 5125094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5135094Slling spa->spa_bootfs = 0; 5145094Slling } 5155094Slling } 5165094Slling 517789Sahrens /* 518789Sahrens * ========================================================================== 519789Sahrens * SPA state manipulation (open/create/destroy/import/export) 520789Sahrens * ========================================================================== 521789Sahrens */ 522789Sahrens 5231544Seschrock static int 5241544Seschrock spa_error_entry_compare(const void *a, const void *b) 5251544Seschrock { 5261544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5271544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5281544Seschrock int ret; 5291544Seschrock 5301544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5311544Seschrock sizeof (zbookmark_t)); 5321544Seschrock 5331544Seschrock if (ret < 0) 5341544Seschrock return (-1); 5351544Seschrock else if (ret > 0) 5361544Seschrock return (1); 5371544Seschrock else 5381544Seschrock return (0); 5391544Seschrock } 5401544Seschrock 5411544Seschrock /* 5421544Seschrock * Utility function which retrieves copies of the current logs and 5431544Seschrock * re-initializes them in the process. 5441544Seschrock */ 5451544Seschrock void 5461544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5471544Seschrock { 5481544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5491544Seschrock 5501544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5511544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5521544Seschrock 5531544Seschrock avl_create(&spa->spa_errlist_scrub, 5541544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5551544Seschrock offsetof(spa_error_entry_t, se_avl)); 5561544Seschrock avl_create(&spa->spa_errlist_last, 5571544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5581544Seschrock offsetof(spa_error_entry_t, se_avl)); 5591544Seschrock } 5601544Seschrock 561789Sahrens /* 562789Sahrens * Activate an uninitialized pool. 563789Sahrens */ 564789Sahrens static void 5658241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 566789Sahrens { 567789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 568789Sahrens 569789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5708241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 571789Sahrens 5729480SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops); 5739480SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops); 574789Sahrens 5757754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 576*9515SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t]; 5777754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 578*9515SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 579*9515SJonathan.Adams@Sun.COM uint_t value = ztip->zti_nthreads[q].zti_value; 580*9515SJonathan.Adams@Sun.COM char name[32]; 581*9515SJonathan.Adams@Sun.COM 582*9515SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 583*9515SJonathan.Adams@Sun.COM "%s_%s", ztip->zti_name, zio_taskq_types[q]); 584*9515SJonathan.Adams@Sun.COM 585*9515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) { 586*9515SJonathan.Adams@Sun.COM mode = zio_taskq_tune_mode; 587*9515SJonathan.Adams@Sun.COM value = zio_taskq_tune_value; 588*9515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) 589*9515SJonathan.Adams@Sun.COM mode = zti_mode_online_percent; 590*9515SJonathan.Adams@Sun.COM } 591*9515SJonathan.Adams@Sun.COM 592*9515SJonathan.Adams@Sun.COM switch (mode) { 593*9515SJonathan.Adams@Sun.COM case zti_mode_fixed: 594*9515SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 595*9515SJonathan.Adams@Sun.COM value = MAX(value, 1); 596*9515SJonathan.Adams@Sun.COM 597*9515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 598*9515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 599*9515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE); 600*9515SJonathan.Adams@Sun.COM break; 601*9515SJonathan.Adams@Sun.COM 602*9515SJonathan.Adams@Sun.COM case zti_mode_online_percent: 603*9515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 604*9515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 605*9515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 606*9515SJonathan.Adams@Sun.COM break; 607*9515SJonathan.Adams@Sun.COM 608*9515SJonathan.Adams@Sun.COM case zti_mode_tune: 609*9515SJonathan.Adams@Sun.COM default: 610*9515SJonathan.Adams@Sun.COM panic("unrecognized mode for " 611*9515SJonathan.Adams@Sun.COM "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 612*9515SJonathan.Adams@Sun.COM "in spa_activate()", 613*9515SJonathan.Adams@Sun.COM t, q, mode, value); 614*9515SJonathan.Adams@Sun.COM break; 615*9515SJonathan.Adams@Sun.COM } 6167754SJeff.Bonwick@Sun.COM } 617789Sahrens } 618789Sahrens 6197754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 6207754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 6217754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 6227754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 623789Sahrens 624789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 625789Sahrens offsetof(struct vdev, vdev_txg_node)); 6261544Seschrock 6271544Seschrock avl_create(&spa->spa_errlist_scrub, 6281544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6291544Seschrock offsetof(spa_error_entry_t, se_avl)); 6301544Seschrock avl_create(&spa->spa_errlist_last, 6311544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6321544Seschrock offsetof(spa_error_entry_t, se_avl)); 633789Sahrens } 634789Sahrens 635789Sahrens /* 636789Sahrens * Opposite of spa_activate(). 637789Sahrens */ 638789Sahrens static void 639789Sahrens spa_deactivate(spa_t *spa) 640789Sahrens { 641789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 642789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 643789Sahrens ASSERT(spa->spa_root_vdev == NULL); 644789Sahrens 645789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 646789Sahrens 647789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 648789Sahrens 6497754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 6507754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 6517754SJeff.Bonwick@Sun.COM 6527754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 6537754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6547754SJeff.Bonwick@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 6557754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 6567754SJeff.Bonwick@Sun.COM } 657789Sahrens } 658789Sahrens 659789Sahrens metaslab_class_destroy(spa->spa_normal_class); 660789Sahrens spa->spa_normal_class = NULL; 661789Sahrens 6624527Sperrin metaslab_class_destroy(spa->spa_log_class); 6634527Sperrin spa->spa_log_class = NULL; 6644527Sperrin 6651544Seschrock /* 6661544Seschrock * If this was part of an import or the open otherwise failed, we may 6671544Seschrock * still have errors left in the queues. Empty them just in case. 6681544Seschrock */ 6691544Seschrock spa_errlog_drain(spa); 6701544Seschrock 6711544Seschrock avl_destroy(&spa->spa_errlist_scrub); 6721544Seschrock avl_destroy(&spa->spa_errlist_last); 6731544Seschrock 674789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 675789Sahrens } 676789Sahrens 677789Sahrens /* 678789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 679789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 680789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 681789Sahrens * All vdev validation is done by the vdev_alloc() routine. 682789Sahrens */ 6832082Seschrock static int 6842082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 6852082Seschrock uint_t id, int atype) 686789Sahrens { 687789Sahrens nvlist_t **child; 688789Sahrens uint_t c, children; 6892082Seschrock int error; 6902082Seschrock 6912082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 6922082Seschrock return (error); 6932082Seschrock 6942082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 6952082Seschrock return (0); 696789Sahrens 6977754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 6987754SJeff.Bonwick@Sun.COM &child, &children); 6997754SJeff.Bonwick@Sun.COM 7007754SJeff.Bonwick@Sun.COM if (error == ENOENT) 7017754SJeff.Bonwick@Sun.COM return (0); 7027754SJeff.Bonwick@Sun.COM 7037754SJeff.Bonwick@Sun.COM if (error) { 7042082Seschrock vdev_free(*vdp); 7052082Seschrock *vdp = NULL; 7062082Seschrock return (EINVAL); 707789Sahrens } 708789Sahrens 709789Sahrens for (c = 0; c < children; c++) { 7102082Seschrock vdev_t *vd; 7112082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 7122082Seschrock atype)) != 0) { 7132082Seschrock vdev_free(*vdp); 7142082Seschrock *vdp = NULL; 7152082Seschrock return (error); 716789Sahrens } 717789Sahrens } 718789Sahrens 7192082Seschrock ASSERT(*vdp != NULL); 7202082Seschrock 7212082Seschrock return (0); 722789Sahrens } 723789Sahrens 724789Sahrens /* 725789Sahrens * Opposite of spa_load(). 726789Sahrens */ 727789Sahrens static void 728789Sahrens spa_unload(spa_t *spa) 729789Sahrens { 7302082Seschrock int i; 7312082Seschrock 7327754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 7337754SJeff.Bonwick@Sun.COM 734789Sahrens /* 7351544Seschrock * Stop async tasks. 7361544Seschrock */ 7371544Seschrock spa_async_suspend(spa); 7381544Seschrock 7391544Seschrock /* 740789Sahrens * Stop syncing. 741789Sahrens */ 742789Sahrens if (spa->spa_sync_on) { 743789Sahrens txg_sync_stop(spa->spa_dsl_pool); 744789Sahrens spa->spa_sync_on = B_FALSE; 745789Sahrens } 746789Sahrens 747789Sahrens /* 7487754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 749789Sahrens */ 7509234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 7519234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 7529234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 7539234SGeorge.Wilson@Sun.COM } 754789Sahrens 755789Sahrens /* 756789Sahrens * Close the dsl pool. 757789Sahrens */ 758789Sahrens if (spa->spa_dsl_pool) { 759789Sahrens dsl_pool_close(spa->spa_dsl_pool); 760789Sahrens spa->spa_dsl_pool = NULL; 761789Sahrens } 762789Sahrens 7638241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7648241SJeff.Bonwick@Sun.COM 7658241SJeff.Bonwick@Sun.COM /* 7668241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 7678241SJeff.Bonwick@Sun.COM */ 7688241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 7698241SJeff.Bonwick@Sun.COM 770789Sahrens /* 771789Sahrens * Close all vdevs. 772789Sahrens */ 7731585Sbonwick if (spa->spa_root_vdev) 774789Sahrens vdev_free(spa->spa_root_vdev); 7751585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 7761544Seschrock 7775450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7785450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 7795450Sbrendan if (spa->spa_spares.sav_vdevs) { 7805450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7815450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7825450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7835450Sbrendan } 7845450Sbrendan if (spa->spa_spares.sav_config) { 7855450Sbrendan nvlist_free(spa->spa_spares.sav_config); 7865450Sbrendan spa->spa_spares.sav_config = NULL; 7872082Seschrock } 7887377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 7895450Sbrendan 7905450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 7915450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 7925450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 7935450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 7945450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 7955450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 7965450Sbrendan } 7975450Sbrendan if (spa->spa_l2cache.sav_config) { 7985450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 7995450Sbrendan spa->spa_l2cache.sav_config = NULL; 8002082Seschrock } 8017377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 8022082Seschrock 8031544Seschrock spa->spa_async_suspended = 0; 8048241SJeff.Bonwick@Sun.COM 8058241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 806789Sahrens } 807789Sahrens 808789Sahrens /* 8092082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 8102082Seschrock * this pool. When this is called, we have some form of basic information in 8115450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 8125450Sbrendan * then re-generate a more complete list including status information. 8132082Seschrock */ 8142082Seschrock static void 8152082Seschrock spa_load_spares(spa_t *spa) 8162082Seschrock { 8172082Seschrock nvlist_t **spares; 8182082Seschrock uint_t nspares; 8192082Seschrock int i; 8203377Seschrock vdev_t *vd, *tvd; 8212082Seschrock 8227754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 8237754SJeff.Bonwick@Sun.COM 8242082Seschrock /* 8252082Seschrock * First, close and free any existing spare vdevs. 8262082Seschrock */ 8275450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8285450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 8293377Seschrock 8303377Seschrock /* Undo the call to spa_activate() below */ 8316643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8326643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 8333377Seschrock spa_spare_remove(tvd); 8343377Seschrock vdev_close(vd); 8353377Seschrock vdev_free(vd); 8362082Seschrock } 8373377Seschrock 8385450Sbrendan if (spa->spa_spares.sav_vdevs) 8395450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8405450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8415450Sbrendan 8425450Sbrendan if (spa->spa_spares.sav_config == NULL) 8432082Seschrock nspares = 0; 8442082Seschrock else 8455450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 8462082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8472082Seschrock 8485450Sbrendan spa->spa_spares.sav_count = (int)nspares; 8495450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8502082Seschrock 8512082Seschrock if (nspares == 0) 8522082Seschrock return; 8532082Seschrock 8542082Seschrock /* 8552082Seschrock * Construct the array of vdevs, opening them to get status in the 8563377Seschrock * process. For each spare, there is potentially two different vdev_t 8573377Seschrock * structures associated with it: one in the list of spares (used only 8583377Seschrock * for basic validation purposes) and one in the active vdev 8593377Seschrock * configuration (if it's spared in). During this phase we open and 8603377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 8613377Seschrock * active configuration, then we also mark this vdev as an active spare. 8622082Seschrock */ 8635450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 8645450Sbrendan KM_SLEEP); 8655450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8662082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 8672082Seschrock VDEV_ALLOC_SPARE) == 0); 8682082Seschrock ASSERT(vd != NULL); 8692082Seschrock 8705450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 8712082Seschrock 8726643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8736643Seschrock B_FALSE)) != NULL) { 8743377Seschrock if (!tvd->vdev_isspare) 8753377Seschrock spa_spare_add(tvd); 8763377Seschrock 8773377Seschrock /* 8783377Seschrock * We only mark the spare active if we were successfully 8793377Seschrock * able to load the vdev. Otherwise, importing a pool 8803377Seschrock * with a bad active spare would result in strange 8813377Seschrock * behavior, because multiple pool would think the spare 8823377Seschrock * is actively in use. 8833377Seschrock * 8843377Seschrock * There is a vulnerability here to an equally bizarre 8853377Seschrock * circumstance, where a dead active spare is later 8863377Seschrock * brought back to life (onlined or otherwise). Given 8873377Seschrock * the rarity of this scenario, and the extra complexity 8883377Seschrock * it adds, we ignore the possibility. 8893377Seschrock */ 8903377Seschrock if (!vdev_is_dead(tvd)) 8913377Seschrock spa_spare_activate(tvd); 8923377Seschrock } 8933377Seschrock 8947754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 8959425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 8967754SJeff.Bonwick@Sun.COM 8972082Seschrock if (vdev_open(vd) != 0) 8982082Seschrock continue; 8992082Seschrock 9005450Sbrendan if (vdev_validate_aux(vd) == 0) 9015450Sbrendan spa_spare_add(vd); 9022082Seschrock } 9032082Seschrock 9042082Seschrock /* 9052082Seschrock * Recompute the stashed list of spares, with status information 9062082Seschrock * this time. 9072082Seschrock */ 9085450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 9092082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 9102082Seschrock 9115450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 9125450Sbrendan KM_SLEEP); 9135450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9145450Sbrendan spares[i] = vdev_config_generate(spa, 9155450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 9165450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 9175450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 9185450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9192082Seschrock nvlist_free(spares[i]); 9205450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 9215450Sbrendan } 9225450Sbrendan 9235450Sbrendan /* 9245450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 9255450Sbrendan * this pool. When this is called, we have some form of basic information in 9265450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 9275450Sbrendan * then re-generate a more complete list including status information. 9285450Sbrendan * Devices which are already active have their details maintained, and are 9295450Sbrendan * not re-opened. 9305450Sbrendan */ 9315450Sbrendan static void 9325450Sbrendan spa_load_l2cache(spa_t *spa) 9335450Sbrendan { 9345450Sbrendan nvlist_t **l2cache; 9355450Sbrendan uint_t nl2cache; 9365450Sbrendan int i, j, oldnvdevs; 9376643Seschrock uint64_t guid, size; 9385450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 9395450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 9405450Sbrendan 9417754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 9427754SJeff.Bonwick@Sun.COM 9435450Sbrendan if (sav->sav_config != NULL) { 9445450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 9455450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 9465450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 9475450Sbrendan } else { 9485450Sbrendan nl2cache = 0; 9495450Sbrendan } 9505450Sbrendan 9515450Sbrendan oldvdevs = sav->sav_vdevs; 9525450Sbrendan oldnvdevs = sav->sav_count; 9535450Sbrendan sav->sav_vdevs = NULL; 9545450Sbrendan sav->sav_count = 0; 9555450Sbrendan 9565450Sbrendan /* 9575450Sbrendan * Process new nvlist of vdevs. 9585450Sbrendan */ 9595450Sbrendan for (i = 0; i < nl2cache; i++) { 9605450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 9615450Sbrendan &guid) == 0); 9625450Sbrendan 9635450Sbrendan newvdevs[i] = NULL; 9645450Sbrendan for (j = 0; j < oldnvdevs; j++) { 9655450Sbrendan vd = oldvdevs[j]; 9665450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 9675450Sbrendan /* 9685450Sbrendan * Retain previous vdev for add/remove ops. 9695450Sbrendan */ 9705450Sbrendan newvdevs[i] = vd; 9715450Sbrendan oldvdevs[j] = NULL; 9725450Sbrendan break; 9735450Sbrendan } 9745450Sbrendan } 9755450Sbrendan 9765450Sbrendan if (newvdevs[i] == NULL) { 9775450Sbrendan /* 9785450Sbrendan * Create new vdev 9795450Sbrendan */ 9805450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 9815450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 9825450Sbrendan ASSERT(vd != NULL); 9835450Sbrendan newvdevs[i] = vd; 9845450Sbrendan 9855450Sbrendan /* 9865450Sbrendan * Commit this vdev as an l2cache device, 9875450Sbrendan * even if it fails to open. 9885450Sbrendan */ 9895450Sbrendan spa_l2cache_add(vd); 9905450Sbrendan 9916643Seschrock vd->vdev_top = vd; 9926643Seschrock vd->vdev_aux = sav; 9936643Seschrock 9946643Seschrock spa_l2cache_activate(vd); 9956643Seschrock 9965450Sbrendan if (vdev_open(vd) != 0) 9975450Sbrendan continue; 9985450Sbrendan 9995450Sbrendan (void) vdev_validate_aux(vd); 10005450Sbrendan 10015450Sbrendan if (!vdev_is_dead(vd)) { 10025450Sbrendan size = vdev_get_rsize(vd); 10036643Seschrock l2arc_add_vdev(spa, vd, 10046643Seschrock VDEV_LABEL_START_SIZE, 10056643Seschrock size - VDEV_LABEL_START_SIZE); 10065450Sbrendan } 10075450Sbrendan } 10085450Sbrendan } 10095450Sbrendan 10105450Sbrendan /* 10115450Sbrendan * Purge vdevs that were dropped 10125450Sbrendan */ 10135450Sbrendan for (i = 0; i < oldnvdevs; i++) { 10145450Sbrendan uint64_t pool; 10155450Sbrendan 10165450Sbrendan vd = oldvdevs[i]; 10175450Sbrendan if (vd != NULL) { 10188241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10198241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 10205450Sbrendan l2arc_remove_vdev(vd); 10215450Sbrendan (void) vdev_close(vd); 10225450Sbrendan spa_l2cache_remove(vd); 10235450Sbrendan } 10245450Sbrendan } 10255450Sbrendan 10265450Sbrendan if (oldvdevs) 10275450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 10285450Sbrendan 10295450Sbrendan if (sav->sav_config == NULL) 10305450Sbrendan goto out; 10315450Sbrendan 10325450Sbrendan sav->sav_vdevs = newvdevs; 10335450Sbrendan sav->sav_count = (int)nl2cache; 10345450Sbrendan 10355450Sbrendan /* 10365450Sbrendan * Recompute the stashed list of l2cache devices, with status 10375450Sbrendan * information this time. 10385450Sbrendan */ 10395450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 10405450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 10415450Sbrendan 10425450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 10435450Sbrendan for (i = 0; i < sav->sav_count; i++) 10445450Sbrendan l2cache[i] = vdev_config_generate(spa, 10455450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 10465450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 10475450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 10485450Sbrendan out: 10495450Sbrendan for (i = 0; i < sav->sav_count; i++) 10505450Sbrendan nvlist_free(l2cache[i]); 10515450Sbrendan if (sav->sav_count) 10525450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 10532082Seschrock } 10542082Seschrock 10552082Seschrock static int 10562082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 10572082Seschrock { 10582082Seschrock dmu_buf_t *db; 10592082Seschrock char *packed = NULL; 10602082Seschrock size_t nvsize = 0; 10612082Seschrock int error; 10622082Seschrock *value = NULL; 10632082Seschrock 10642082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 10652082Seschrock nvsize = *(uint64_t *)db->db_data; 10662082Seschrock dmu_buf_rele(db, FTAG); 10672082Seschrock 10682082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10699512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10709512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 10712082Seschrock if (error == 0) 10722082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 10732082Seschrock kmem_free(packed, nvsize); 10742082Seschrock 10752082Seschrock return (error); 10762082Seschrock } 10772082Seschrock 10782082Seschrock /* 10794451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10804451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10814451Seschrock */ 10824451Seschrock static void 10834451Seschrock spa_check_removed(vdev_t *vd) 10844451Seschrock { 10854451Seschrock int c; 10864451Seschrock 10874451Seschrock for (c = 0; c < vd->vdev_children; c++) 10884451Seschrock spa_check_removed(vd->vdev_child[c]); 10894451Seschrock 10904451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10914451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10924451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10934451Seschrock } 10944451Seschrock } 10954451Seschrock 10964451Seschrock /* 10977294Sperrin * Check for missing log devices 10987294Sperrin */ 10997294Sperrin int 11007294Sperrin spa_check_logs(spa_t *spa) 11017294Sperrin { 11027294Sperrin switch (spa->spa_log_state) { 11037294Sperrin case SPA_LOG_MISSING: 11047294Sperrin /* need to recheck in case slog has been restored */ 11057294Sperrin case SPA_LOG_UNKNOWN: 11067294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 11077294Sperrin DS_FIND_CHILDREN)) { 11087294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 11097294Sperrin return (1); 11107294Sperrin } 11117294Sperrin break; 11127294Sperrin 11137294Sperrin case SPA_LOG_CLEAR: 11147294Sperrin (void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL, 11157294Sperrin DS_FIND_CHILDREN); 11167294Sperrin break; 11177294Sperrin } 11187294Sperrin spa->spa_log_state = SPA_LOG_GOOD; 11197294Sperrin return (0); 11207294Sperrin } 11217294Sperrin 11227294Sperrin /* 1123789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 11241544Seschrock * source of configuration information. 1125789Sahrens */ 1126789Sahrens static int 11271544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1128789Sahrens { 1129789Sahrens int error = 0; 1130789Sahrens nvlist_t *nvroot = NULL; 1131789Sahrens vdev_t *rvd; 1132789Sahrens uberblock_t *ub = &spa->spa_uberblock; 11331635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1134789Sahrens uint64_t pool_guid; 11352082Seschrock uint64_t version; 11364451Seschrock uint64_t autoreplace = 0; 11378241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 11387294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1139789Sahrens 11408241SJeff.Bonwick@Sun.COM /* 11418241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 11428241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 11438241SJeff.Bonwick@Sun.COM */ 11448241SJeff.Bonwick@Sun.COM if (!mosconfig) 11458241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 11468241SJeff.Bonwick@Sun.COM 11477754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 11487754SJeff.Bonwick@Sun.COM 11491544Seschrock spa->spa_load_state = state; 11501635Sbonwick 1151789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 11521733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 11531544Seschrock error = EINVAL; 11541544Seschrock goto out; 11551544Seschrock } 1156789Sahrens 11572082Seschrock /* 11582082Seschrock * Versioning wasn't explicitly added to the label until later, so if 11592082Seschrock * it's not present treat it as the initial version. 11602082Seschrock */ 11612082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 11624577Sahrens version = SPA_VERSION_INITIAL; 11632082Seschrock 11641733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 11651733Sbonwick &spa->spa_config_txg); 11661733Sbonwick 11671635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 11681544Seschrock spa_guid_exists(pool_guid, 0)) { 11691544Seschrock error = EEXIST; 11701544Seschrock goto out; 11711544Seschrock } 1172789Sahrens 11732174Seschrock spa->spa_load_guid = pool_guid; 11742174Seschrock 1175789Sahrens /* 11769234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 11779234SGeorge.Wilson@Sun.COM */ 11789234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root == NULL) 11799234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 11809234SGeorge.Wilson@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 11819234SGeorge.Wilson@Sun.COM ZIO_FLAG_GODFATHER); 11829234SGeorge.Wilson@Sun.COM 11839234SGeorge.Wilson@Sun.COM /* 11842082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 11852082Seschrock * value that will be returned by spa_version() since parsing the 11862082Seschrock * configuration requires knowing the version number. 1187789Sahrens */ 11887754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 11892082Seschrock spa->spa_ubsync.ub_version = version; 11902082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 11917754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1192789Sahrens 11932082Seschrock if (error != 0) 11941544Seschrock goto out; 1195789Sahrens 11961585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1197789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1198789Sahrens 1199789Sahrens /* 1200789Sahrens * Try to open all vdevs, loading each label in the process. 1201789Sahrens */ 12027754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12034070Smc142369 error = vdev_open(rvd); 12047754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12054070Smc142369 if (error != 0) 12061544Seschrock goto out; 1207789Sahrens 1208789Sahrens /* 12099276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 12109276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 12119276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 12129276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 12139276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 12149276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 12159276SMark.Musante@Sun.COM * the vdev config. 12161986Seschrock */ 12179276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12189276SMark.Musante@Sun.COM error = vdev_validate(rvd); 12199276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12209276SMark.Musante@Sun.COM if (error != 0) 12219276SMark.Musante@Sun.COM goto out; 12221986Seschrock 12231986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 12241986Seschrock error = ENXIO; 12251986Seschrock goto out; 12261986Seschrock } 12271986Seschrock 12281986Seschrock /* 1229789Sahrens * Find the best uberblock. 1230789Sahrens */ 12317754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1232789Sahrens 1233789Sahrens /* 1234789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1235789Sahrens */ 1236789Sahrens if (ub->ub_txg == 0) { 12371760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12381760Seschrock VDEV_AUX_CORRUPT_DATA); 12391544Seschrock error = ENXIO; 12401544Seschrock goto out; 12411544Seschrock } 12421544Seschrock 12431544Seschrock /* 12441544Seschrock * If the pool is newer than the code, we can't open it. 12451544Seschrock */ 12464577Sahrens if (ub->ub_version > SPA_VERSION) { 12471760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12481760Seschrock VDEV_AUX_VERSION_NEWER); 12491544Seschrock error = ENOTSUP; 12501544Seschrock goto out; 1251789Sahrens } 1252789Sahrens 1253789Sahrens /* 1254789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1255789Sahrens * incomplete configuration. 1256789Sahrens */ 12571732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 12581544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12591544Seschrock VDEV_AUX_BAD_GUID_SUM); 12601544Seschrock error = ENXIO; 12611544Seschrock goto out; 1262789Sahrens } 1263789Sahrens 1264789Sahrens /* 1265789Sahrens * Initialize internal SPA structures. 1266789Sahrens */ 1267789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1268789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1269789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 12701544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 12711544Seschrock if (error) { 12721544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12731544Seschrock VDEV_AUX_CORRUPT_DATA); 12741544Seschrock goto out; 12751544Seschrock } 1276789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1277789Sahrens 12781544Seschrock if (zap_lookup(spa->spa_meta_objset, 1279789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 12801544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 12811544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12821544Seschrock VDEV_AUX_CORRUPT_DATA); 12831544Seschrock error = EIO; 12841544Seschrock goto out; 12851544Seschrock } 1286789Sahrens 1287789Sahrens if (!mosconfig) { 12882082Seschrock nvlist_t *newconfig; 12893975Sek110237 uint64_t hostid; 12902082Seschrock 12912082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 12921544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12931544Seschrock VDEV_AUX_CORRUPT_DATA); 12941544Seschrock error = EIO; 12951544Seschrock goto out; 12961544Seschrock } 1297789Sahrens 12987706SLin.Ling@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig, 12997706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 13003975Sek110237 char *hostname; 13013975Sek110237 unsigned long myhostid = 0; 13023975Sek110237 13033975Sek110237 VERIFY(nvlist_lookup_string(newconfig, 13043975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 13053975Sek110237 13068662SJordan.Vaughan@Sun.com #ifdef _KERNEL 13078662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 13088662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 13098662SJordan.Vaughan@Sun.com /* 13108662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 13118662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 13128662SJordan.Vaughan@Sun.com */ 13133975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13148662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 13154178Slling if (hostid != 0 && myhostid != 0 && 13168662SJordan.Vaughan@Sun.com hostid != myhostid) { 13173975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 13183975Sek110237 "loaded as it was last accessed by " 13197706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 13203975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 13217754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 13223975Sek110237 (unsigned long)hostid); 13233975Sek110237 error = EBADF; 13243975Sek110237 goto out; 13253975Sek110237 } 13263975Sek110237 } 13273975Sek110237 1328789Sahrens spa_config_set(spa, newconfig); 1329789Sahrens spa_unload(spa); 1330789Sahrens spa_deactivate(spa); 13318241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1332789Sahrens 13331544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 13341544Seschrock } 13351544Seschrock 13361544Seschrock if (zap_lookup(spa->spa_meta_objset, 13371544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 13381544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 13391544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13401544Seschrock VDEV_AUX_CORRUPT_DATA); 13411544Seschrock error = EIO; 13421544Seschrock goto out; 1343789Sahrens } 1344789Sahrens 13451544Seschrock /* 13462082Seschrock * Load the bit that tells us to use the new accounting function 13472082Seschrock * (raid-z deflation). If we have an older pool, this will not 13482082Seschrock * be present. 13492082Seschrock */ 13502082Seschrock error = zap_lookup(spa->spa_meta_objset, 13512082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 13522082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 13532082Seschrock if (error != 0 && error != ENOENT) { 13542082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13552082Seschrock VDEV_AUX_CORRUPT_DATA); 13562082Seschrock error = EIO; 13572082Seschrock goto out; 13582082Seschrock } 13592082Seschrock 13602082Seschrock /* 13611544Seschrock * Load the persistent error log. If we have an older pool, this will 13621544Seschrock * not be present. 13631544Seschrock */ 13641544Seschrock error = zap_lookup(spa->spa_meta_objset, 13651544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 13661544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 13671807Sbonwick if (error != 0 && error != ENOENT) { 13681544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13691544Seschrock VDEV_AUX_CORRUPT_DATA); 13701544Seschrock error = EIO; 13711544Seschrock goto out; 13721544Seschrock } 13731544Seschrock 13741544Seschrock error = zap_lookup(spa->spa_meta_objset, 13751544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 13761544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 13771544Seschrock if (error != 0 && error != ENOENT) { 13781544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13791544Seschrock VDEV_AUX_CORRUPT_DATA); 13801544Seschrock error = EIO; 13811544Seschrock goto out; 13821544Seschrock } 1383789Sahrens 1384789Sahrens /* 13852926Sek110237 * Load the history object. If we have an older pool, this 13862926Sek110237 * will not be present. 13872926Sek110237 */ 13882926Sek110237 error = zap_lookup(spa->spa_meta_objset, 13892926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 13902926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 13912926Sek110237 if (error != 0 && error != ENOENT) { 13922926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13932926Sek110237 VDEV_AUX_CORRUPT_DATA); 13942926Sek110237 error = EIO; 13952926Sek110237 goto out; 13962926Sek110237 } 13972926Sek110237 13982926Sek110237 /* 13992082Seschrock * Load any hot spares for this pool. 14002082Seschrock */ 14012082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14025450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 14032082Seschrock if (error != 0 && error != ENOENT) { 14042082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14052082Seschrock VDEV_AUX_CORRUPT_DATA); 14062082Seschrock error = EIO; 14072082Seschrock goto out; 14082082Seschrock } 14092082Seschrock if (error == 0) { 14104577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 14115450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 14125450Sbrendan &spa->spa_spares.sav_config) != 0) { 14132082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14142082Seschrock VDEV_AUX_CORRUPT_DATA); 14152082Seschrock error = EIO; 14162082Seschrock goto out; 14172082Seschrock } 14182082Seschrock 14197754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14202082Seschrock spa_load_spares(spa); 14217754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14222082Seschrock } 14232082Seschrock 14245450Sbrendan /* 14255450Sbrendan * Load any level 2 ARC devices for this pool. 14265450Sbrendan */ 14275450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14285450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 14295450Sbrendan &spa->spa_l2cache.sav_object); 14305450Sbrendan if (error != 0 && error != ENOENT) { 14315450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14325450Sbrendan VDEV_AUX_CORRUPT_DATA); 14335450Sbrendan error = EIO; 14345450Sbrendan goto out; 14355450Sbrendan } 14365450Sbrendan if (error == 0) { 14375450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 14385450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 14395450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 14405450Sbrendan vdev_set_state(rvd, B_TRUE, 14415450Sbrendan VDEV_STATE_CANT_OPEN, 14425450Sbrendan VDEV_AUX_CORRUPT_DATA); 14435450Sbrendan error = EIO; 14445450Sbrendan goto out; 14455450Sbrendan } 14465450Sbrendan 14477754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14485450Sbrendan spa_load_l2cache(spa); 14497754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14505450Sbrendan } 14515450Sbrendan 14527294Sperrin if (spa_check_logs(spa)) { 14537294Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14547294Sperrin VDEV_AUX_BAD_LOG); 14557294Sperrin error = ENXIO; 14567294Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 14577294Sperrin goto out; 14587294Sperrin } 14597294Sperrin 14607294Sperrin 14615094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 14624543Smarks 14633912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14643912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 14653912Slling 14663912Slling if (error && error != ENOENT) { 14673912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14683912Slling VDEV_AUX_CORRUPT_DATA); 14693912Slling error = EIO; 14703912Slling goto out; 14713912Slling } 14723912Slling 14733912Slling if (error == 0) { 14743912Slling (void) zap_lookup(spa->spa_meta_objset, 14753912Slling spa->spa_pool_props_object, 14764451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 14773912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 14784451Seschrock (void) zap_lookup(spa->spa_meta_objset, 14794451Seschrock spa->spa_pool_props_object, 14804451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 14814451Seschrock sizeof (uint64_t), 1, &autoreplace); 14824543Smarks (void) zap_lookup(spa->spa_meta_objset, 14834543Smarks spa->spa_pool_props_object, 14844543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 14854543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 14865329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 14875329Sgw25295 spa->spa_pool_props_object, 14885329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 14895329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 14903912Slling } 14913912Slling 14922082Seschrock /* 14934451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 14944451Seschrock * the ZFS DE that it should not issue any faults for unopenable 14954451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 14964451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 14974451Seschrock * over. 14984451Seschrock */ 14995756Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 15004451Seschrock spa_check_removed(spa->spa_root_vdev); 15014451Seschrock 15024451Seschrock /* 15031986Seschrock * Load the vdev state for all toplevel vdevs. 1504789Sahrens */ 15051986Seschrock vdev_load(rvd); 1506789Sahrens 1507789Sahrens /* 1508789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1509789Sahrens */ 15107754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1511789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 15127754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1513789Sahrens 1514789Sahrens /* 1515789Sahrens * Check the state of the root vdev. If it can't be opened, it 1516789Sahrens * indicates one or more toplevel vdevs are faulted. 1517789Sahrens */ 15181544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 15191544Seschrock error = ENXIO; 15201544Seschrock goto out; 15211544Seschrock } 1522789Sahrens 15238241SJeff.Bonwick@Sun.COM if (spa_writeable(spa)) { 15241635Sbonwick dmu_tx_t *tx; 15251635Sbonwick int need_update = B_FALSE; 15268241SJeff.Bonwick@Sun.COM 15278241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 15281601Sbonwick 15291635Sbonwick /* 15301635Sbonwick * Claim log blocks that haven't been committed yet. 15311635Sbonwick * This must all happen in a single txg. 15321635Sbonwick */ 15331601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1534789Sahrens spa_first_txg(spa)); 15357754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 15362417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1537789Sahrens dmu_tx_commit(tx); 1538789Sahrens 1539789Sahrens spa->spa_sync_on = B_TRUE; 1540789Sahrens txg_sync_start(spa->spa_dsl_pool); 1541789Sahrens 1542789Sahrens /* 1543789Sahrens * Wait for all claims to sync. 1544789Sahrens */ 1545789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15461585Sbonwick 15471585Sbonwick /* 15481635Sbonwick * If the config cache is stale, or we have uninitialized 15491635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 15501585Sbonwick */ 15511635Sbonwick if (config_cache_txg != spa->spa_config_txg || 15521635Sbonwick state == SPA_LOAD_IMPORT) 15531635Sbonwick need_update = B_TRUE; 15541635Sbonwick 15558241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 15561635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 15571635Sbonwick need_update = B_TRUE; 15581585Sbonwick 15591585Sbonwick /* 15601635Sbonwick * Update the config cache asychronously in case we're the 15611635Sbonwick * root pool, in which case the config cache isn't writable yet. 15621585Sbonwick */ 15631635Sbonwick if (need_update) 15641635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 15658241SJeff.Bonwick@Sun.COM 15668241SJeff.Bonwick@Sun.COM /* 15678241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 15688241SJeff.Bonwick@Sun.COM */ 15698241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 15708241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 1571789Sahrens } 1572789Sahrens 15731544Seschrock error = 0; 15741544Seschrock out: 15757046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 15762082Seschrock if (error && error != EBADF) 15777294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 15781544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 15791544Seschrock spa->spa_ena = 0; 15801544Seschrock 15811544Seschrock return (error); 1582789Sahrens } 1583789Sahrens 1584789Sahrens /* 1585789Sahrens * Pool Open/Import 1586789Sahrens * 1587789Sahrens * The import case is identical to an open except that the configuration is sent 1588789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1589789Sahrens * case of an open, the pool configuration will exist in the 15904451Seschrock * POOL_STATE_UNINITIALIZED state. 1591789Sahrens * 1592789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1593789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1594789Sahrens * ambiguous state. 1595789Sahrens */ 1596789Sahrens static int 1597789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1598789Sahrens { 1599789Sahrens spa_t *spa; 1600789Sahrens int error; 1601789Sahrens int locked = B_FALSE; 1602789Sahrens 1603789Sahrens *spapp = NULL; 1604789Sahrens 1605789Sahrens /* 1606789Sahrens * As disgusting as this is, we need to support recursive calls to this 1607789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1608789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1609789Sahrens * avoid dsl_dir_open() calling this in the first place. 1610789Sahrens */ 1611789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1612789Sahrens mutex_enter(&spa_namespace_lock); 1613789Sahrens locked = B_TRUE; 1614789Sahrens } 1615789Sahrens 1616789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1617789Sahrens if (locked) 1618789Sahrens mutex_exit(&spa_namespace_lock); 1619789Sahrens return (ENOENT); 1620789Sahrens } 1621789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1622789Sahrens 16238241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 1624789Sahrens 16251635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1626789Sahrens 1627789Sahrens if (error == EBADF) { 1628789Sahrens /* 16291986Seschrock * If vdev_validate() returns failure (indicated by 16301986Seschrock * EBADF), it indicates that one of the vdevs indicates 16311986Seschrock * that the pool has been exported or destroyed. If 16321986Seschrock * this is the case, the config cache is out of sync and 16331986Seschrock * we should remove the pool from the namespace. 1634789Sahrens */ 1635789Sahrens spa_unload(spa); 1636789Sahrens spa_deactivate(spa); 16376643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1638789Sahrens spa_remove(spa); 1639789Sahrens if (locked) 1640789Sahrens mutex_exit(&spa_namespace_lock); 1641789Sahrens return (ENOENT); 16421544Seschrock } 16431544Seschrock 16441544Seschrock if (error) { 1645789Sahrens /* 1646789Sahrens * We can't open the pool, but we still have useful 1647789Sahrens * information: the state of each vdev after the 1648789Sahrens * attempted vdev_open(). Return this to the user. 1649789Sahrens */ 16507754SJeff.Bonwick@Sun.COM if (config != NULL && spa->spa_root_vdev != NULL) 1651789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1652789Sahrens B_TRUE); 1653789Sahrens spa_unload(spa); 1654789Sahrens spa_deactivate(spa); 16551544Seschrock spa->spa_last_open_failed = B_TRUE; 1656789Sahrens if (locked) 1657789Sahrens mutex_exit(&spa_namespace_lock); 1658789Sahrens *spapp = NULL; 1659789Sahrens return (error); 16601544Seschrock } else { 16611544Seschrock spa->spa_last_open_failed = B_FALSE; 1662789Sahrens } 1663789Sahrens } 1664789Sahrens 1665789Sahrens spa_open_ref(spa, tag); 16664451Seschrock 1667789Sahrens if (locked) 1668789Sahrens mutex_exit(&spa_namespace_lock); 1669789Sahrens 1670789Sahrens *spapp = spa; 1671789Sahrens 16727754SJeff.Bonwick@Sun.COM if (config != NULL) 1673789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1674789Sahrens 1675789Sahrens return (0); 1676789Sahrens } 1677789Sahrens 1678789Sahrens int 1679789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1680789Sahrens { 1681789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1682789Sahrens } 1683789Sahrens 16841544Seschrock /* 16851544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 16861544Seschrock * preventing it from being exported or destroyed. 16871544Seschrock */ 16881544Seschrock spa_t * 16891544Seschrock spa_inject_addref(char *name) 16901544Seschrock { 16911544Seschrock spa_t *spa; 16921544Seschrock 16931544Seschrock mutex_enter(&spa_namespace_lock); 16941544Seschrock if ((spa = spa_lookup(name)) == NULL) { 16951544Seschrock mutex_exit(&spa_namespace_lock); 16961544Seschrock return (NULL); 16971544Seschrock } 16981544Seschrock spa->spa_inject_ref++; 16991544Seschrock mutex_exit(&spa_namespace_lock); 17001544Seschrock 17011544Seschrock return (spa); 17021544Seschrock } 17031544Seschrock 17041544Seschrock void 17051544Seschrock spa_inject_delref(spa_t *spa) 17061544Seschrock { 17071544Seschrock mutex_enter(&spa_namespace_lock); 17081544Seschrock spa->spa_inject_ref--; 17091544Seschrock mutex_exit(&spa_namespace_lock); 17101544Seschrock } 17111544Seschrock 17125450Sbrendan /* 17135450Sbrendan * Add spares device information to the nvlist. 17145450Sbrendan */ 17152082Seschrock static void 17162082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 17172082Seschrock { 17182082Seschrock nvlist_t **spares; 17192082Seschrock uint_t i, nspares; 17202082Seschrock nvlist_t *nvroot; 17212082Seschrock uint64_t guid; 17222082Seschrock vdev_stat_t *vs; 17232082Seschrock uint_t vsc; 17243377Seschrock uint64_t pool; 17252082Seschrock 17269425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17279425SEric.Schrock@Sun.COM 17285450Sbrendan if (spa->spa_spares.sav_count == 0) 17292082Seschrock return; 17302082Seschrock 17312082Seschrock VERIFY(nvlist_lookup_nvlist(config, 17322082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 17335450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 17342082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17352082Seschrock if (nspares != 0) { 17362082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 17372082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 17382082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 17392082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17402082Seschrock 17412082Seschrock /* 17422082Seschrock * Go through and find any spares which have since been 17432082Seschrock * repurposed as an active spare. If this is the case, update 17442082Seschrock * their status appropriately. 17452082Seschrock */ 17462082Seschrock for (i = 0; i < nspares; i++) { 17472082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 17482082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 17497214Slling if (spa_spare_exists(guid, &pool, NULL) && 17507214Slling pool != 0ULL) { 17512082Seschrock VERIFY(nvlist_lookup_uint64_array( 17522082Seschrock spares[i], ZPOOL_CONFIG_STATS, 17532082Seschrock (uint64_t **)&vs, &vsc) == 0); 17542082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 17552082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 17562082Seschrock } 17572082Seschrock } 17582082Seschrock } 17592082Seschrock } 17602082Seschrock 17615450Sbrendan /* 17625450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 17635450Sbrendan */ 17645450Sbrendan static void 17655450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 17665450Sbrendan { 17675450Sbrendan nvlist_t **l2cache; 17685450Sbrendan uint_t i, j, nl2cache; 17695450Sbrendan nvlist_t *nvroot; 17705450Sbrendan uint64_t guid; 17715450Sbrendan vdev_t *vd; 17725450Sbrendan vdev_stat_t *vs; 17735450Sbrendan uint_t vsc; 17745450Sbrendan 17759425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17769425SEric.Schrock@Sun.COM 17775450Sbrendan if (spa->spa_l2cache.sav_count == 0) 17785450Sbrendan return; 17795450Sbrendan 17805450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 17815450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 17825450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 17835450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 17845450Sbrendan if (nl2cache != 0) { 17855450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 17865450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 17875450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 17885450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 17895450Sbrendan 17905450Sbrendan /* 17915450Sbrendan * Update level 2 cache device stats. 17925450Sbrendan */ 17935450Sbrendan 17945450Sbrendan for (i = 0; i < nl2cache; i++) { 17955450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 17965450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 17975450Sbrendan 17985450Sbrendan vd = NULL; 17995450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 18005450Sbrendan if (guid == 18015450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 18025450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 18035450Sbrendan break; 18045450Sbrendan } 18055450Sbrendan } 18065450Sbrendan ASSERT(vd != NULL); 18075450Sbrendan 18085450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 18095450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 18105450Sbrendan vdev_get_stats(vd, vs); 18115450Sbrendan } 18125450Sbrendan } 18135450Sbrendan } 18145450Sbrendan 1815789Sahrens int 18161544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1817789Sahrens { 1818789Sahrens int error; 1819789Sahrens spa_t *spa; 1820789Sahrens 1821789Sahrens *config = NULL; 1822789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1823789Sahrens 18249425SEric.Schrock@Sun.COM if (spa != NULL) { 18259425SEric.Schrock@Sun.COM /* 18269425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 18279425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 18289425SEric.Schrock@Sun.COM * self-inconsistent. 18299425SEric.Schrock@Sun.COM */ 18309425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 18319425SEric.Schrock@Sun.COM 18329425SEric.Schrock@Sun.COM if (*config != NULL) { 18337754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 18349425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 18359425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 18369425SEric.Schrock@Sun.COM 18379425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 18389425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 18399425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 18409425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 18419425SEric.Schrock@Sun.COM 18429425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 18439425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 18449425SEric.Schrock@Sun.COM } 18452082Seschrock } 18462082Seschrock 18471544Seschrock /* 18481544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 18491544Seschrock * and call spa_lookup() directly. 18501544Seschrock */ 18511544Seschrock if (altroot) { 18521544Seschrock if (spa == NULL) { 18531544Seschrock mutex_enter(&spa_namespace_lock); 18541544Seschrock spa = spa_lookup(name); 18551544Seschrock if (spa) 18561544Seschrock spa_altroot(spa, altroot, buflen); 18571544Seschrock else 18581544Seschrock altroot[0] = '\0'; 18591544Seschrock spa = NULL; 18601544Seschrock mutex_exit(&spa_namespace_lock); 18611544Seschrock } else { 18621544Seschrock spa_altroot(spa, altroot, buflen); 18631544Seschrock } 18641544Seschrock } 18651544Seschrock 18669425SEric.Schrock@Sun.COM if (spa != NULL) { 18679425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 1868789Sahrens spa_close(spa, FTAG); 18699425SEric.Schrock@Sun.COM } 1870789Sahrens 1871789Sahrens return (error); 1872789Sahrens } 1873789Sahrens 1874789Sahrens /* 18755450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 18765450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 18775450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 18785450Sbrendan * specified, as long as they are well-formed. 18792082Seschrock */ 18802082Seschrock static int 18815450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 18825450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 18835450Sbrendan vdev_labeltype_t label) 18842082Seschrock { 18855450Sbrendan nvlist_t **dev; 18865450Sbrendan uint_t i, ndev; 18872082Seschrock vdev_t *vd; 18882082Seschrock int error; 18892082Seschrock 18907754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 18917754SJeff.Bonwick@Sun.COM 18922082Seschrock /* 18935450Sbrendan * It's acceptable to have no devs specified. 18942082Seschrock */ 18955450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 18962082Seschrock return (0); 18972082Seschrock 18985450Sbrendan if (ndev == 0) 18992082Seschrock return (EINVAL); 19002082Seschrock 19012082Seschrock /* 19025450Sbrendan * Make sure the pool is formatted with a version that supports this 19035450Sbrendan * device type. 19042082Seschrock */ 19055450Sbrendan if (spa_version(spa) < version) 19062082Seschrock return (ENOTSUP); 19072082Seschrock 19083377Seschrock /* 19095450Sbrendan * Set the pending device list so we correctly handle device in-use 19103377Seschrock * checking. 19113377Seschrock */ 19125450Sbrendan sav->sav_pending = dev; 19135450Sbrendan sav->sav_npending = ndev; 19145450Sbrendan 19155450Sbrendan for (i = 0; i < ndev; i++) { 19165450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 19172082Seschrock mode)) != 0) 19183377Seschrock goto out; 19192082Seschrock 19202082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 19212082Seschrock vdev_free(vd); 19223377Seschrock error = EINVAL; 19233377Seschrock goto out; 19242082Seschrock } 19252082Seschrock 19265450Sbrendan /* 19277754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 19287754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 19295450Sbrendan */ 19307754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 19315450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 19325450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 19335450Sbrendan error = ENOTBLK; 19345450Sbrendan goto out; 19355450Sbrendan } 19367754SJeff.Bonwick@Sun.COM #endif 19372082Seschrock vd->vdev_top = vd; 19383377Seschrock 19393377Seschrock if ((error = vdev_open(vd)) == 0 && 19405450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 19415450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 19423377Seschrock vd->vdev_guid) == 0); 19432082Seschrock } 19442082Seschrock 19452082Seschrock vdev_free(vd); 19463377Seschrock 19475450Sbrendan if (error && 19485450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 19493377Seschrock goto out; 19503377Seschrock else 19513377Seschrock error = 0; 19522082Seschrock } 19532082Seschrock 19543377Seschrock out: 19555450Sbrendan sav->sav_pending = NULL; 19565450Sbrendan sav->sav_npending = 0; 19573377Seschrock return (error); 19582082Seschrock } 19592082Seschrock 19605450Sbrendan static int 19615450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 19625450Sbrendan { 19635450Sbrendan int error; 19645450Sbrendan 19657754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 19667754SJeff.Bonwick@Sun.COM 19675450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 19685450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 19695450Sbrendan VDEV_LABEL_SPARE)) != 0) { 19705450Sbrendan return (error); 19715450Sbrendan } 19725450Sbrendan 19735450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 19745450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 19755450Sbrendan VDEV_LABEL_L2CACHE)); 19765450Sbrendan } 19775450Sbrendan 19785450Sbrendan static void 19795450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 19805450Sbrendan const char *config) 19815450Sbrendan { 19825450Sbrendan int i; 19835450Sbrendan 19845450Sbrendan if (sav->sav_config != NULL) { 19855450Sbrendan nvlist_t **olddevs; 19865450Sbrendan uint_t oldndevs; 19875450Sbrendan nvlist_t **newdevs; 19885450Sbrendan 19895450Sbrendan /* 19905450Sbrendan * Generate new dev list by concatentating with the 19915450Sbrendan * current dev list. 19925450Sbrendan */ 19935450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 19945450Sbrendan &olddevs, &oldndevs) == 0); 19955450Sbrendan 19965450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 19975450Sbrendan (ndevs + oldndevs), KM_SLEEP); 19985450Sbrendan for (i = 0; i < oldndevs; i++) 19995450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 20005450Sbrendan KM_SLEEP) == 0); 20015450Sbrendan for (i = 0; i < ndevs; i++) 20025450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 20035450Sbrendan KM_SLEEP) == 0); 20045450Sbrendan 20055450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 20065450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 20075450Sbrendan 20085450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 20095450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 20105450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 20115450Sbrendan nvlist_free(newdevs[i]); 20125450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 20135450Sbrendan } else { 20145450Sbrendan /* 20155450Sbrendan * Generate a new dev list. 20165450Sbrendan */ 20175450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 20185450Sbrendan KM_SLEEP) == 0); 20195450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 20205450Sbrendan devs, ndevs) == 0); 20215450Sbrendan } 20225450Sbrendan } 20235450Sbrendan 20245450Sbrendan /* 20255450Sbrendan * Stop and drop level 2 ARC devices 20265450Sbrendan */ 20275450Sbrendan void 20285450Sbrendan spa_l2cache_drop(spa_t *spa) 20295450Sbrendan { 20305450Sbrendan vdev_t *vd; 20315450Sbrendan int i; 20325450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 20335450Sbrendan 20345450Sbrendan for (i = 0; i < sav->sav_count; i++) { 20355450Sbrendan uint64_t pool; 20365450Sbrendan 20375450Sbrendan vd = sav->sav_vdevs[i]; 20385450Sbrendan ASSERT(vd != NULL); 20395450Sbrendan 20408241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 20418241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 20425450Sbrendan l2arc_remove_vdev(vd); 20435450Sbrendan if (vd->vdev_isl2cache) 20445450Sbrendan spa_l2cache_remove(vd); 20455450Sbrendan vdev_clear_stats(vd); 20465450Sbrendan (void) vdev_close(vd); 20475450Sbrendan } 20485450Sbrendan } 20495450Sbrendan 20502082Seschrock /* 2051789Sahrens * Pool Creation 2052789Sahrens */ 2053789Sahrens int 20545094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 20557184Stimh const char *history_str, nvlist_t *zplprops) 2056789Sahrens { 2057789Sahrens spa_t *spa; 20585094Slling char *altroot = NULL; 20591635Sbonwick vdev_t *rvd; 2060789Sahrens dsl_pool_t *dp; 2061789Sahrens dmu_tx_t *tx; 20622082Seschrock int c, error = 0; 2063789Sahrens uint64_t txg = TXG_INITIAL; 20645450Sbrendan nvlist_t **spares, **l2cache; 20655450Sbrendan uint_t nspares, nl2cache; 20665094Slling uint64_t version; 2067789Sahrens 2068789Sahrens /* 2069789Sahrens * If this pool already exists, return failure. 2070789Sahrens */ 2071789Sahrens mutex_enter(&spa_namespace_lock); 2072789Sahrens if (spa_lookup(pool) != NULL) { 2073789Sahrens mutex_exit(&spa_namespace_lock); 2074789Sahrens return (EEXIST); 2075789Sahrens } 2076789Sahrens 2077789Sahrens /* 2078789Sahrens * Allocate a new spa_t structure. 2079789Sahrens */ 20805094Slling (void) nvlist_lookup_string(props, 20815094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 20821635Sbonwick spa = spa_add(pool, altroot); 20838241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2084789Sahrens 2085789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 20865094Slling 20875094Slling if (props && (error = spa_prop_validate(spa, props))) { 20885094Slling spa_unload(spa); 20895094Slling spa_deactivate(spa); 20905094Slling spa_remove(spa); 20916643Seschrock mutex_exit(&spa_namespace_lock); 20925094Slling return (error); 20935094Slling } 20945094Slling 20955094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 20965094Slling &version) != 0) 20975094Slling version = SPA_VERSION; 20985094Slling ASSERT(version <= SPA_VERSION); 20995094Slling spa->spa_uberblock.ub_version = version; 2100789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2101789Sahrens 21021635Sbonwick /* 21039234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 21049234SGeorge.Wilson@Sun.COM */ 21059234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root == NULL) 21069234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 21079234SGeorge.Wilson@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 21089234SGeorge.Wilson@Sun.COM ZIO_FLAG_GODFATHER); 21099234SGeorge.Wilson@Sun.COM 21109234SGeorge.Wilson@Sun.COM /* 21111635Sbonwick * Create the root vdev. 21121635Sbonwick */ 21137754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21141635Sbonwick 21152082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21162082Seschrock 21172082Seschrock ASSERT(error != 0 || rvd != NULL); 21182082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21192082Seschrock 21205913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21211635Sbonwick error = EINVAL; 21222082Seschrock 21232082Seschrock if (error == 0 && 21242082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 21255450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 21262082Seschrock VDEV_ALLOC_ADD)) == 0) { 21272082Seschrock for (c = 0; c < rvd->vdev_children; c++) 21282082Seschrock vdev_init(rvd->vdev_child[c], txg); 21292082Seschrock vdev_config_dirty(rvd); 21301635Sbonwick } 21311635Sbonwick 21327754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2133789Sahrens 21342082Seschrock if (error != 0) { 2135789Sahrens spa_unload(spa); 2136789Sahrens spa_deactivate(spa); 2137789Sahrens spa_remove(spa); 2138789Sahrens mutex_exit(&spa_namespace_lock); 2139789Sahrens return (error); 2140789Sahrens } 2141789Sahrens 21422082Seschrock /* 21432082Seschrock * Get the list of spares, if specified. 21442082Seschrock */ 21452082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 21462082Seschrock &spares, &nspares) == 0) { 21475450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 21482082Seschrock KM_SLEEP) == 0); 21495450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 21502082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 21517754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21522082Seschrock spa_load_spares(spa); 21537754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 21545450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 21555450Sbrendan } 21565450Sbrendan 21575450Sbrendan /* 21585450Sbrendan * Get the list of level 2 cache devices, if specified. 21595450Sbrendan */ 21605450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 21615450Sbrendan &l2cache, &nl2cache) == 0) { 21625450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 21635450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 21645450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 21655450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 21667754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21675450Sbrendan spa_load_l2cache(spa); 21687754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 21695450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 21702082Seschrock } 21712082Seschrock 21727184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2173789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2174789Sahrens 2175789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2176789Sahrens 2177789Sahrens /* 2178789Sahrens * Create the pool config object. 2179789Sahrens */ 2180789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 21817497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2182789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2183789Sahrens 21841544Seschrock if (zap_add(spa->spa_meta_objset, 2185789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 21861544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 21871544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 21881544Seschrock } 2189789Sahrens 21905094Slling /* Newly created pools with the right version are always deflated. */ 21915094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 21925094Slling spa->spa_deflate = TRUE; 21935094Slling if (zap_add(spa->spa_meta_objset, 21945094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 21955094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 21965094Slling cmn_err(CE_PANIC, "failed to add deflate"); 21975094Slling } 21982082Seschrock } 21992082Seschrock 2200789Sahrens /* 2201789Sahrens * Create the deferred-free bplist object. Turn off compression 2202789Sahrens * because sync-to-convergence takes longer if the blocksize 2203789Sahrens * keeps changing. 2204789Sahrens */ 2205789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2206789Sahrens 1 << 14, tx); 2207789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2208789Sahrens ZIO_COMPRESS_OFF, tx); 2209789Sahrens 22101544Seschrock if (zap_add(spa->spa_meta_objset, 2211789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 22121544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 22131544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 22141544Seschrock } 2215789Sahrens 22162926Sek110237 /* 22172926Sek110237 * Create the pool's history object. 22182926Sek110237 */ 22195094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 22205094Slling spa_history_create_obj(spa, tx); 22215094Slling 22225094Slling /* 22235094Slling * Set pool properties. 22245094Slling */ 22255094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 22265094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22275329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 22288525SEric.Schrock@Sun.COM if (props != NULL) { 22298525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 22305094Slling spa_sync_props(spa, props, CRED(), tx); 22318525SEric.Schrock@Sun.COM } 22322926Sek110237 2233789Sahrens dmu_tx_commit(tx); 2234789Sahrens 2235789Sahrens spa->spa_sync_on = B_TRUE; 2236789Sahrens txg_sync_start(spa->spa_dsl_pool); 2237789Sahrens 2238789Sahrens /* 2239789Sahrens * We explicitly wait for the first transaction to complete so that our 2240789Sahrens * bean counters are appropriately updated. 2241789Sahrens */ 2242789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2243789Sahrens 22446643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2245789Sahrens 22465094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 22474715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 22484715Sek110237 22498667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 22508667SGeorge.Wilson@Sun.COM 2251789Sahrens mutex_exit(&spa_namespace_lock); 2252789Sahrens 2253789Sahrens return (0); 2254789Sahrens } 2255789Sahrens 22566423Sgw25295 #ifdef _KERNEL 22576423Sgw25295 /* 22586423Sgw25295 * Build a "root" vdev for a top level vdev read in from a rootpool 22596423Sgw25295 * device label. 22606423Sgw25295 */ 22616423Sgw25295 static void 22626423Sgw25295 spa_build_rootpool_config(nvlist_t *config) 22636423Sgw25295 { 22646423Sgw25295 nvlist_t *nvtop, *nvroot; 22656423Sgw25295 uint64_t pgid; 22666423Sgw25295 22676423Sgw25295 /* 22686423Sgw25295 * Add this top-level vdev to the child array. 22696423Sgw25295 */ 22706423Sgw25295 VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) 22716423Sgw25295 == 0); 22726423Sgw25295 VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) 22736423Sgw25295 == 0); 22746423Sgw25295 22756423Sgw25295 /* 22766423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 22776423Sgw25295 */ 22786423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 22796423Sgw25295 VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) 22806423Sgw25295 == 0); 22816423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 22826423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 22836423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 22846423Sgw25295 &nvtop, 1) == 0); 22856423Sgw25295 22866423Sgw25295 /* 22876423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 22886423Sgw25295 * this pool's configuration (remove the old, add the new). 22896423Sgw25295 */ 22906423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 22916423Sgw25295 nvlist_free(nvroot); 22926423Sgw25295 } 22936423Sgw25295 22946423Sgw25295 /* 22956423Sgw25295 * Get the root pool information from the root disk, then import the root pool 22966423Sgw25295 * during the system boot up time. 22976423Sgw25295 */ 22987539SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 22997147Staylor 23007147Staylor int 23017147Staylor spa_check_rootconf(char *devpath, char *devid, nvlist_t **bestconf, 23026423Sgw25295 uint64_t *besttxg) 23036423Sgw25295 { 23046423Sgw25295 nvlist_t *config; 23056423Sgw25295 uint64_t txg; 23067539SLin.Ling@Sun.COM int error; 23077539SLin.Ling@Sun.COM 23087539SLin.Ling@Sun.COM if (error = vdev_disk_read_rootlabel(devpath, devid, &config)) 23097539SLin.Ling@Sun.COM return (error); 23106423Sgw25295 23116423Sgw25295 VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 23126423Sgw25295 23137147Staylor if (bestconf != NULL) 23146423Sgw25295 *bestconf = config; 23157539SLin.Ling@Sun.COM else 23167539SLin.Ling@Sun.COM nvlist_free(config); 23177147Staylor *besttxg = txg; 23187147Staylor return (0); 23196423Sgw25295 } 23206423Sgw25295 23216423Sgw25295 boolean_t 23226423Sgw25295 spa_rootdev_validate(nvlist_t *nv) 23236423Sgw25295 { 23246423Sgw25295 uint64_t ival; 23256423Sgw25295 23266423Sgw25295 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || 23276423Sgw25295 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || 23286423Sgw25295 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) 23296423Sgw25295 return (B_FALSE); 23306423Sgw25295 23316423Sgw25295 return (B_TRUE); 23326423Sgw25295 } 23336423Sgw25295 23347147Staylor 23357147Staylor /* 23367147Staylor * Given the boot device's physical path or devid, check if the device 23377147Staylor * is in a valid state. If so, return the configuration from the vdev 23387147Staylor * label. 23397147Staylor */ 23407147Staylor int 23417147Staylor spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf) 23427147Staylor { 23437147Staylor nvlist_t *conf = NULL; 23447147Staylor uint64_t txg = 0; 23457147Staylor nvlist_t *nvtop, **child; 23467147Staylor char *type; 23477147Staylor char *bootpath = NULL; 23487147Staylor uint_t children, c; 23497147Staylor char *tmp; 23507539SLin.Ling@Sun.COM int error; 23517147Staylor 23527147Staylor if (devpath && ((tmp = strchr(devpath, ' ')) != NULL)) 23537147Staylor *tmp = '\0'; 23547539SLin.Ling@Sun.COM if (error = spa_check_rootconf(devpath, devid, &conf, &txg)) { 23557147Staylor cmn_err(CE_NOTE, "error reading device label"); 23567539SLin.Ling@Sun.COM return (error); 23577147Staylor } 23587147Staylor if (txg == 0) { 23597147Staylor cmn_err(CE_NOTE, "this device is detached"); 23607147Staylor nvlist_free(conf); 23617147Staylor return (EINVAL); 23627147Staylor } 23637147Staylor 23647147Staylor VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE, 23657147Staylor &nvtop) == 0); 23667147Staylor VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0); 23677147Staylor 23687147Staylor if (strcmp(type, VDEV_TYPE_DISK) == 0) { 23697147Staylor if (spa_rootdev_validate(nvtop)) { 23707147Staylor goto out; 23717147Staylor } else { 23727147Staylor nvlist_free(conf); 23737147Staylor return (EINVAL); 23747147Staylor } 23757147Staylor } 23767147Staylor 23777147Staylor ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0); 23787147Staylor 23797147Staylor VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN, 23807147Staylor &child, &children) == 0); 23817147Staylor 23827147Staylor /* 23837147Staylor * Go thru vdevs in the mirror to see if the given device 23847147Staylor * has the most recent txg. Only the device with the most 23857147Staylor * recent txg has valid information and should be booted. 23867147Staylor */ 23877147Staylor for (c = 0; c < children; c++) { 23887147Staylor char *cdevid, *cpath; 23897147Staylor uint64_t tmptxg; 23907147Staylor 23918242SLin.Ling@Sun.COM cpath = NULL; 23928242SLin.Ling@Sun.COM cdevid = NULL; 23937147Staylor if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH, 23948242SLin.Ling@Sun.COM &cpath) != 0 && nvlist_lookup_string(child[c], 23958242SLin.Ling@Sun.COM ZPOOL_CONFIG_DEVID, &cdevid) != 0) 23967147Staylor return (EINVAL); 23977687SLin.Ling@Sun.COM if ((spa_check_rootconf(cpath, cdevid, NULL, 23987687SLin.Ling@Sun.COM &tmptxg) == 0) && (tmptxg > txg)) { 23997147Staylor txg = tmptxg; 24007147Staylor VERIFY(nvlist_lookup_string(child[c], 24017147Staylor ZPOOL_CONFIG_PATH, &bootpath) == 0); 24027147Staylor } 24037147Staylor } 24047147Staylor 24057147Staylor /* Does the best device match the one we've booted from? */ 24067147Staylor if (bootpath) { 24077147Staylor cmn_err(CE_NOTE, "try booting from '%s'", bootpath); 24087147Staylor return (EINVAL); 24097147Staylor } 24107147Staylor out: 24117147Staylor *bestconf = conf; 24127147Staylor return (0); 24137147Staylor } 24147147Staylor 24156423Sgw25295 /* 24166423Sgw25295 * Import a root pool. 24176423Sgw25295 * 24187147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 24197147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 24207147Staylor * The GRUB "findroot" command will return the vdev we should boot. 24216423Sgw25295 * 24226423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 24236423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 24246423Sgw25295 * e.g. 24256423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 24266423Sgw25295 */ 24276423Sgw25295 int 24287147Staylor spa_import_rootpool(char *devpath, char *devid) 24296423Sgw25295 { 24306423Sgw25295 nvlist_t *conf = NULL; 24316423Sgw25295 char *pname; 24326423Sgw25295 int error; 24339425SEric.Schrock@Sun.COM spa_t *spa; 24346423Sgw25295 24356423Sgw25295 /* 24366423Sgw25295 * Get the vdev pathname and configuation from the most 24376423Sgw25295 * recently updated vdev (highest txg). 24386423Sgw25295 */ 24397147Staylor if (error = spa_get_rootconf(devpath, devid, &conf)) 24406423Sgw25295 goto msg_out; 24416423Sgw25295 24426423Sgw25295 /* 24436423Sgw25295 * Add type "root" vdev to the config. 24446423Sgw25295 */ 24456423Sgw25295 spa_build_rootpool_config(conf); 24466423Sgw25295 24476423Sgw25295 VERIFY(nvlist_lookup_string(conf, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); 24486423Sgw25295 24499425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 24509425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 24519425SEric.Schrock@Sun.COM /* 24529425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 24539425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 24549425SEric.Schrock@Sun.COM */ 24559425SEric.Schrock@Sun.COM spa_remove(spa); 24569425SEric.Schrock@Sun.COM } 24579425SEric.Schrock@Sun.COM 24589425SEric.Schrock@Sun.COM spa = spa_add(pname, NULL); 24599425SEric.Schrock@Sun.COM 24609425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 24619425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(conf, &spa->spa_config, 0) == 0); 24629425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 24636423Sgw25295 24646423Sgw25295 nvlist_free(conf); 24659425SEric.Schrock@Sun.COM return (0); 24666423Sgw25295 24676423Sgw25295 msg_out: 24687147Staylor cmn_err(CE_NOTE, "\n" 24696423Sgw25295 " *************************************************** \n" 24706423Sgw25295 " * This device is not bootable! * \n" 24716423Sgw25295 " * It is either offlined or detached or faulted. * \n" 24726423Sgw25295 " * Please try to boot from a different device. * \n" 24737147Staylor " *************************************************** "); 24746423Sgw25295 24756423Sgw25295 return (error); 24766423Sgw25295 } 24776423Sgw25295 #endif 24786423Sgw25295 24796423Sgw25295 /* 24809425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 24819425SEric.Schrock@Sun.COM * boot. 24829425SEric.Schrock@Sun.COM */ 24839425SEric.Schrock@Sun.COM int 24849425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 24859425SEric.Schrock@Sun.COM { 24869425SEric.Schrock@Sun.COM spa_t *spa; 24879425SEric.Schrock@Sun.COM char *altroot = NULL; 24889425SEric.Schrock@Sun.COM 24899425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 24909425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 24919425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 24929425SEric.Schrock@Sun.COM return (EEXIST); 24939425SEric.Schrock@Sun.COM } 24949425SEric.Schrock@Sun.COM 24959425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 24969425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 24979425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 24989425SEric.Schrock@Sun.COM 24999425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25009425SEric.Schrock@Sun.COM 25019425SEric.Schrock@Sun.COM if (props != NULL) 25029425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25039425SEric.Schrock@Sun.COM 25049425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 25059425SEric.Schrock@Sun.COM 25069425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25079425SEric.Schrock@Sun.COM 25089425SEric.Schrock@Sun.COM return (0); 25099425SEric.Schrock@Sun.COM } 25109425SEric.Schrock@Sun.COM 25119425SEric.Schrock@Sun.COM /* 25126423Sgw25295 * Import a non-root pool into the system. 25136423Sgw25295 */ 25146423Sgw25295 int 25156423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 25166423Sgw25295 { 25179425SEric.Schrock@Sun.COM spa_t *spa; 25189425SEric.Schrock@Sun.COM char *altroot = NULL; 25199425SEric.Schrock@Sun.COM int error; 25209425SEric.Schrock@Sun.COM nvlist_t *nvroot; 25219425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 25229425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 25239425SEric.Schrock@Sun.COM 25249425SEric.Schrock@Sun.COM /* 25259425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 25269425SEric.Schrock@Sun.COM */ 25279425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25289425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 25299425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25309425SEric.Schrock@Sun.COM return (EEXIST); 25319425SEric.Schrock@Sun.COM } 25329425SEric.Schrock@Sun.COM 25339425SEric.Schrock@Sun.COM /* 25349425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 25359425SEric.Schrock@Sun.COM */ 25369425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25379425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25389425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25399425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 25409425SEric.Schrock@Sun.COM 25419425SEric.Schrock@Sun.COM /* 25429425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25439425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 25449425SEric.Schrock@Sun.COM * doing an import. 25459425SEric.Schrock@Sun.COM */ 25469425SEric.Schrock@Sun.COM error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 25479425SEric.Schrock@Sun.COM 25489425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 25499425SEric.Schrock@Sun.COM /* 25509425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 25519425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 25529425SEric.Schrock@Sun.COM */ 25539425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 25549425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 25559425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 25569425SEric.Schrock@Sun.COM spa_load_spares(spa); 25579425SEric.Schrock@Sun.COM } 25589425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 25599425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 25609425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 25619425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 25629425SEric.Schrock@Sun.COM } 25639425SEric.Schrock@Sun.COM 25649425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 25659425SEric.Schrock@Sun.COM &nvroot) == 0); 25669425SEric.Schrock@Sun.COM if (error == 0) 25679425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25689425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 25699425SEric.Schrock@Sun.COM if (error == 0) 25709425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25719425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 25729425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 25739425SEric.Schrock@Sun.COM 25749425SEric.Schrock@Sun.COM if (props != NULL) 25759425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25769425SEric.Schrock@Sun.COM 25779425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 25789425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 25799425SEric.Schrock@Sun.COM spa_unload(spa); 25809425SEric.Schrock@Sun.COM spa_deactivate(spa); 25819425SEric.Schrock@Sun.COM spa_remove(spa); 25829425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25839425SEric.Schrock@Sun.COM return (error); 25849425SEric.Schrock@Sun.COM } 25859425SEric.Schrock@Sun.COM 25869425SEric.Schrock@Sun.COM /* 25879425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 25889425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 25899425SEric.Schrock@Sun.COM */ 25909425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 25919425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 25929425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 25939425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 25949425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 25959425SEric.Schrock@Sun.COM else 25969425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 25979425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 25989425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 25999425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26009425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26019425SEric.Schrock@Sun.COM spa_load_spares(spa); 26029425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26039425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 26049425SEric.Schrock@Sun.COM } 26059425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26069425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 26079425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 26089425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26099425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26109425SEric.Schrock@Sun.COM else 26119425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26129425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26139425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26149425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26159425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26169425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 26179425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26189425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 26199425SEric.Schrock@Sun.COM } 26209425SEric.Schrock@Sun.COM 26219425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 26229425SEric.Schrock@Sun.COM /* 26239425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 26249425SEric.Schrock@Sun.COM */ 26259425SEric.Schrock@Sun.COM spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, B_FALSE); 26269425SEric.Schrock@Sun.COM } 26279425SEric.Schrock@Sun.COM 26289425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26299425SEric.Schrock@Sun.COM 26309425SEric.Schrock@Sun.COM return (0); 26316643Seschrock } 26326643Seschrock 26336643Seschrock 2634789Sahrens /* 2635789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2636789Sahrens * to get the vdev stats associated with the imported devices. 2637789Sahrens */ 2638789Sahrens #define TRYIMPORT_NAME "$import" 2639789Sahrens 2640789Sahrens nvlist_t * 2641789Sahrens spa_tryimport(nvlist_t *tryconfig) 2642789Sahrens { 2643789Sahrens nvlist_t *config = NULL; 2644789Sahrens char *poolname; 2645789Sahrens spa_t *spa; 2646789Sahrens uint64_t state; 26478680SLin.Ling@Sun.COM int error; 2648789Sahrens 2649789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2650789Sahrens return (NULL); 2651789Sahrens 2652789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2653789Sahrens return (NULL); 2654789Sahrens 26551635Sbonwick /* 26561635Sbonwick * Create and initialize the spa structure. 26571635Sbonwick */ 2658789Sahrens mutex_enter(&spa_namespace_lock); 26591635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 26608241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 2661789Sahrens 2662789Sahrens /* 26631635Sbonwick * Pass off the heavy lifting to spa_load(). 26641732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 26651732Sbonwick * is actually the one to trust when doing an import. 2666789Sahrens */ 26678680SLin.Ling@Sun.COM error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2668789Sahrens 2669789Sahrens /* 2670789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2671789Sahrens */ 2672789Sahrens if (spa->spa_root_vdev != NULL) { 2673789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2674789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2675789Sahrens poolname) == 0); 2676789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2677789Sahrens state) == 0); 26783975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 26793975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 26802082Seschrock 26812082Seschrock /* 26826423Sgw25295 * If the bootfs property exists on this pool then we 26836423Sgw25295 * copy it out so that external consumers can tell which 26846423Sgw25295 * pools are bootable. 26856423Sgw25295 */ 26868680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 26876423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 26886423Sgw25295 26896423Sgw25295 /* 26906423Sgw25295 * We have to play games with the name since the 26916423Sgw25295 * pool was opened as TRYIMPORT_NAME. 26926423Sgw25295 */ 26937754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 26946423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 26956423Sgw25295 char *cp; 26966423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 26976423Sgw25295 26986423Sgw25295 cp = strchr(tmpname, '/'); 26996423Sgw25295 if (cp == NULL) { 27006423Sgw25295 (void) strlcpy(dsname, tmpname, 27016423Sgw25295 MAXPATHLEN); 27026423Sgw25295 } else { 27036423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 27046423Sgw25295 "%s/%s", poolname, ++cp); 27056423Sgw25295 } 27066423Sgw25295 VERIFY(nvlist_add_string(config, 27076423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 27086423Sgw25295 kmem_free(dsname, MAXPATHLEN); 27096423Sgw25295 } 27106423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 27116423Sgw25295 } 27126423Sgw25295 27136423Sgw25295 /* 27145450Sbrendan * Add the list of hot spares and level 2 cache devices. 27152082Seschrock */ 27169425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 27172082Seschrock spa_add_spares(spa, config); 27185450Sbrendan spa_add_l2cache(spa, config); 27199425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2720789Sahrens } 2721789Sahrens 2722789Sahrens spa_unload(spa); 2723789Sahrens spa_deactivate(spa); 2724789Sahrens spa_remove(spa); 2725789Sahrens mutex_exit(&spa_namespace_lock); 2726789Sahrens 2727789Sahrens return (config); 2728789Sahrens } 2729789Sahrens 2730789Sahrens /* 2731789Sahrens * Pool export/destroy 2732789Sahrens * 2733789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2734789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2735789Sahrens * update the pool state and sync all the labels to disk, removing the 27368211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 27378211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 2738789Sahrens */ 2739789Sahrens static int 27407214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 27418211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 2742789Sahrens { 2743789Sahrens spa_t *spa; 2744789Sahrens 27451775Sbillm if (oldconfig) 27461775Sbillm *oldconfig = NULL; 27471775Sbillm 27488241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 2749789Sahrens return (EROFS); 2750789Sahrens 2751789Sahrens mutex_enter(&spa_namespace_lock); 2752789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2753789Sahrens mutex_exit(&spa_namespace_lock); 2754789Sahrens return (ENOENT); 2755789Sahrens } 2756789Sahrens 2757789Sahrens /* 27581544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 27591544Seschrock * reacquire the namespace lock, and see if we can export. 27601544Seschrock */ 27611544Seschrock spa_open_ref(spa, FTAG); 27621544Seschrock mutex_exit(&spa_namespace_lock); 27631544Seschrock spa_async_suspend(spa); 27641544Seschrock mutex_enter(&spa_namespace_lock); 27651544Seschrock spa_close(spa, FTAG); 27661544Seschrock 27671544Seschrock /* 2768789Sahrens * The pool will be in core if it's openable, 2769789Sahrens * in which case we can modify its state. 2770789Sahrens */ 2771789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2772789Sahrens /* 2773789Sahrens * Objsets may be open only because they're dirty, so we 2774789Sahrens * have to force it to sync before checking spa_refcnt. 2775789Sahrens */ 2776789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2777789Sahrens 27781544Seschrock /* 27791544Seschrock * A pool cannot be exported or destroyed if there are active 27801544Seschrock * references. If we are resetting a pool, allow references by 27811544Seschrock * fault injection handlers. 27821544Seschrock */ 27831544Seschrock if (!spa_refcount_zero(spa) || 27841544Seschrock (spa->spa_inject_ref != 0 && 27851544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 27861544Seschrock spa_async_resume(spa); 2787789Sahrens mutex_exit(&spa_namespace_lock); 2788789Sahrens return (EBUSY); 2789789Sahrens } 2790789Sahrens 2791789Sahrens /* 27927214Slling * A pool cannot be exported if it has an active shared spare. 27937214Slling * This is to prevent other pools stealing the active spare 27947214Slling * from an exported pool. At user's own will, such pool can 27957214Slling * be forcedly exported. 27967214Slling */ 27977214Slling if (!force && new_state == POOL_STATE_EXPORTED && 27987214Slling spa_has_active_shared_spare(spa)) { 27997214Slling spa_async_resume(spa); 28007214Slling mutex_exit(&spa_namespace_lock); 28017214Slling return (EXDEV); 28027214Slling } 28037214Slling 28047214Slling /* 2805789Sahrens * We want this to be reflected on every label, 2806789Sahrens * so mark them all dirty. spa_unload() will do the 2807789Sahrens * final sync that pushes these changes out. 2808789Sahrens */ 28098211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 28107754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28111544Seschrock spa->spa_state = new_state; 28121635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 28131544Seschrock vdev_config_dirty(spa->spa_root_vdev); 28147754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 28151544Seschrock } 2816789Sahrens } 2817789Sahrens 28184451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28194451Seschrock 2820789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2821789Sahrens spa_unload(spa); 2822789Sahrens spa_deactivate(spa); 2823789Sahrens } 2824789Sahrens 28251775Sbillm if (oldconfig && spa->spa_config) 28261775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 28271775Sbillm 28281544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 28298211SGeorge.Wilson@Sun.COM if (!hardforce) 28308211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 28311544Seschrock spa_remove(spa); 28321544Seschrock } 2833789Sahrens mutex_exit(&spa_namespace_lock); 2834789Sahrens 2835789Sahrens return (0); 2836789Sahrens } 2837789Sahrens 2838789Sahrens /* 2839789Sahrens * Destroy a storage pool. 2840789Sahrens */ 2841789Sahrens int 2842789Sahrens spa_destroy(char *pool) 2843789Sahrens { 28448211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 28458211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 2846789Sahrens } 2847789Sahrens 2848789Sahrens /* 2849789Sahrens * Export a storage pool. 2850789Sahrens */ 2851789Sahrens int 28528211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 28538211SGeorge.Wilson@Sun.COM boolean_t hardforce) 2854789Sahrens { 28558211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 28568211SGeorge.Wilson@Sun.COM force, hardforce)); 2857789Sahrens } 2858789Sahrens 2859789Sahrens /* 28601544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 28611544Seschrock * from the namespace in any way. 28621544Seschrock */ 28631544Seschrock int 28641544Seschrock spa_reset(char *pool) 28651544Seschrock { 28667214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 28678211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 28681544Seschrock } 28691544Seschrock 28701544Seschrock /* 2871789Sahrens * ========================================================================== 2872789Sahrens * Device manipulation 2873789Sahrens * ========================================================================== 2874789Sahrens */ 2875789Sahrens 2876789Sahrens /* 28774527Sperrin * Add a device to a storage pool. 2878789Sahrens */ 2879789Sahrens int 2880789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2881789Sahrens { 2882789Sahrens uint64_t txg; 28838241SJeff.Bonwick@Sun.COM int error; 2884789Sahrens vdev_t *rvd = spa->spa_root_vdev; 28851585Sbonwick vdev_t *vd, *tvd; 28865450Sbrendan nvlist_t **spares, **l2cache; 28875450Sbrendan uint_t nspares, nl2cache; 2888789Sahrens 2889789Sahrens txg = spa_vdev_enter(spa); 2890789Sahrens 28912082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 28922082Seschrock VDEV_ALLOC_ADD)) != 0) 28932082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 28942082Seschrock 28957754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 2896789Sahrens 28975450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 28985450Sbrendan &nspares) != 0) 28992082Seschrock nspares = 0; 29002082Seschrock 29015450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 29025450Sbrendan &nl2cache) != 0) 29035450Sbrendan nl2cache = 0; 29045450Sbrendan 29057754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 29062082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 29077754SJeff.Bonwick@Sun.COM 29087754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 29097754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 29107754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 29112082Seschrock 29123377Seschrock /* 29135450Sbrendan * We must validate the spares and l2cache devices after checking the 29145450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 29153377Seschrock */ 29167754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 29173377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 29183377Seschrock 29193377Seschrock /* 29203377Seschrock * Transfer each new top-level vdev from vd to rvd. 29213377Seschrock */ 29228241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 29233377Seschrock tvd = vd->vdev_child[c]; 29243377Seschrock vdev_remove_child(vd, tvd); 29253377Seschrock tvd->vdev_id = rvd->vdev_children; 29263377Seschrock vdev_add_child(rvd, tvd); 29273377Seschrock vdev_config_dirty(tvd); 29283377Seschrock } 29293377Seschrock 29302082Seschrock if (nspares != 0) { 29315450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 29325450Sbrendan ZPOOL_CONFIG_SPARES); 29332082Seschrock spa_load_spares(spa); 29345450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 29355450Sbrendan } 29365450Sbrendan 29375450Sbrendan if (nl2cache != 0) { 29385450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 29395450Sbrendan ZPOOL_CONFIG_L2CACHE); 29405450Sbrendan spa_load_l2cache(spa); 29415450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2942789Sahrens } 2943789Sahrens 2944789Sahrens /* 29451585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 29461585Sbonwick * If other threads start allocating from these vdevs before we 29471585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 29481585Sbonwick * fail to open the pool because there are DVAs that the config cache 29491585Sbonwick * can't translate. Therefore, we first add the vdevs without 29501585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 29511635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 29521585Sbonwick * 29531585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 29541585Sbonwick * if we lose power at any point in this sequence, the remaining 29551585Sbonwick * steps will be completed the next time we load the pool. 2956789Sahrens */ 29571635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 29581585Sbonwick 29591635Sbonwick mutex_enter(&spa_namespace_lock); 29601635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 29611635Sbonwick mutex_exit(&spa_namespace_lock); 2962789Sahrens 29631635Sbonwick return (0); 2964789Sahrens } 2965789Sahrens 2966789Sahrens /* 2967789Sahrens * Attach a device to a mirror. The arguments are the path to any device 2968789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 2969789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 2970789Sahrens * 2971789Sahrens * If 'replacing' is specified, the new device is intended to replace the 2972789Sahrens * existing device; in this case the two devices are made into their own 29734451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 2974789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 2975789Sahrens * extra rules: you can't attach to it after it's been created, and upon 2976789Sahrens * completion of resilvering, the first disk (the one being replaced) 2977789Sahrens * is automatically detached. 2978789Sahrens */ 2979789Sahrens int 29801544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 2981789Sahrens { 2982789Sahrens uint64_t txg, open_txg; 2983789Sahrens vdev_t *rvd = spa->spa_root_vdev; 2984789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 29852082Seschrock vdev_ops_t *pvops; 29867313SEric.Kustarz@Sun.COM dmu_tx_t *tx; 29877313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 29887313SEric.Kustarz@Sun.COM int newvd_isspare; 29897313SEric.Kustarz@Sun.COM int error; 2990789Sahrens 2991789Sahrens txg = spa_vdev_enter(spa); 2992789Sahrens 29936643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 2994789Sahrens 2995789Sahrens if (oldvd == NULL) 2996789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2997789Sahrens 29981585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 29991585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30001585Sbonwick 3001789Sahrens pvd = oldvd->vdev_parent; 3002789Sahrens 30032082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30044451Seschrock VDEV_ALLOC_ADD)) != 0) 30054451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30064451Seschrock 30074451Seschrock if (newrootvd->vdev_children != 1) 3008789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3009789Sahrens 3010789Sahrens newvd = newrootvd->vdev_child[0]; 3011789Sahrens 3012789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3013789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3014789Sahrens 30152082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3016789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3017789Sahrens 30184527Sperrin /* 30194527Sperrin * Spares can't replace logs 30204527Sperrin */ 30217326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 30224527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30234527Sperrin 30242082Seschrock if (!replacing) { 30252082Seschrock /* 30262082Seschrock * For attach, the only allowable parent is a mirror or the root 30272082Seschrock * vdev. 30282082Seschrock */ 30292082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 30302082Seschrock pvd->vdev_ops != &vdev_root_ops) 30312082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30322082Seschrock 30332082Seschrock pvops = &vdev_mirror_ops; 30342082Seschrock } else { 30352082Seschrock /* 30362082Seschrock * Active hot spares can only be replaced by inactive hot 30372082Seschrock * spares. 30382082Seschrock */ 30392082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 30402082Seschrock pvd->vdev_child[1] == oldvd && 30412082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 30422082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30432082Seschrock 30442082Seschrock /* 30452082Seschrock * If the source is a hot spare, and the parent isn't already a 30462082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 30473377Seschrock * want to create a replacing vdev. The user is not allowed to 30483377Seschrock * attach to a spared vdev child unless the 'isspare' state is 30493377Seschrock * the same (spare replaces spare, non-spare replaces 30503377Seschrock * non-spare). 30512082Seschrock */ 30522082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 30532082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30543377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 30553377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 30563377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30572082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 30582082Seschrock newvd->vdev_isspare) 30592082Seschrock pvops = &vdev_spare_ops; 30602082Seschrock else 30612082Seschrock pvops = &vdev_replacing_ops; 30622082Seschrock } 30632082Seschrock 30641175Slling /* 30651175Slling * Compare the new device size with the replaceable/attachable 30661175Slling * device size. 30671175Slling */ 30681175Slling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 3069789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3070789Sahrens 30711732Sbonwick /* 30721732Sbonwick * The new device cannot have a higher alignment requirement 30731732Sbonwick * than the top-level vdev. 30741732Sbonwick */ 30751732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3076789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3077789Sahrens 3078789Sahrens /* 3079789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3080789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3081789Sahrens */ 3082789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3083789Sahrens spa_strfree(oldvd->vdev_path); 3084789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3085789Sahrens KM_SLEEP); 3086789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3087789Sahrens newvd->vdev_path, "old"); 3088789Sahrens if (oldvd->vdev_devid != NULL) { 3089789Sahrens spa_strfree(oldvd->vdev_devid); 3090789Sahrens oldvd->vdev_devid = NULL; 3091789Sahrens } 3092789Sahrens } 3093789Sahrens 3094789Sahrens /* 30952082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 30962082Seschrock * mirror/replacing/spare vdev above oldvd. 3097789Sahrens */ 3098789Sahrens if (pvd->vdev_ops != pvops) 3099789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3100789Sahrens 3101789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3102789Sahrens ASSERT(pvd->vdev_ops == pvops); 3103789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3104789Sahrens 3105789Sahrens /* 3106789Sahrens * Extract the new device from its root and add it to pvd. 3107789Sahrens */ 3108789Sahrens vdev_remove_child(newrootvd, newvd); 3109789Sahrens newvd->vdev_id = pvd->vdev_children; 3110789Sahrens vdev_add_child(pvd, newvd); 3111789Sahrens 31121544Seschrock /* 31131544Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 31141544Seschrock * the addition of newvd may have decreased our parent's asize. 31151544Seschrock */ 31161544Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 31171544Seschrock 3118789Sahrens tvd = newvd->vdev_top; 3119789Sahrens ASSERT(pvd->vdev_top == tvd); 3120789Sahrens ASSERT(tvd->vdev_parent == rvd); 3121789Sahrens 3122789Sahrens vdev_config_dirty(tvd); 3123789Sahrens 3124789Sahrens /* 3125789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3126789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3127789Sahrens */ 3128789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3129789Sahrens 31308241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 31318241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3132789Sahrens 31339425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 31343377Seschrock spa_spare_activate(newvd); 31359425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 31369425SEric.Schrock@Sun.COM } 31379425SEric.Schrock@Sun.COM 31387754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 31397754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 31407313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 31411544Seschrock 3142789Sahrens /* 3143789Sahrens * Mark newvd's DTL dirty in this txg. 3144789Sahrens */ 31451732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3146789Sahrens 3147789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3148789Sahrens 31497313SEric.Kustarz@Sun.COM tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 31507313SEric.Kustarz@Sun.COM if (dmu_tx_assign(tx, TXG_WAIT) == 0) { 31517313SEric.Kustarz@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, tx, 31527313SEric.Kustarz@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 31537313SEric.Kustarz@Sun.COM replacing && newvd_isspare ? "spare in" : 31547313SEric.Kustarz@Sun.COM replacing ? "replace" : "attach", newvdpath, 31557313SEric.Kustarz@Sun.COM replacing ? "for" : "to", oldvdpath); 31567313SEric.Kustarz@Sun.COM dmu_tx_commit(tx); 31577313SEric.Kustarz@Sun.COM } else { 31587313SEric.Kustarz@Sun.COM dmu_tx_abort(tx); 31597313SEric.Kustarz@Sun.COM } 31607313SEric.Kustarz@Sun.COM 31617313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 31627313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 31637313SEric.Kustarz@Sun.COM 3164789Sahrens /* 31657046Sahrens * Kick off a resilver to update newvd. 3166789Sahrens */ 31677046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3168789Sahrens 3169789Sahrens return (0); 3170789Sahrens } 3171789Sahrens 3172789Sahrens /* 3173789Sahrens * Detach a device from a mirror or replacing vdev. 3174789Sahrens * If 'replace_done' is specified, only detach if the parent 3175789Sahrens * is a replacing vdev. 3176789Sahrens */ 3177789Sahrens int 31788241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3179789Sahrens { 3180789Sahrens uint64_t txg; 31818241SJeff.Bonwick@Sun.COM int error; 3182789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3183789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 31842082Seschrock boolean_t unspare = B_FALSE; 31852082Seschrock uint64_t unspare_guid; 31866673Seschrock size_t len; 3187789Sahrens 3188789Sahrens txg = spa_vdev_enter(spa); 3189789Sahrens 31906643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3191789Sahrens 3192789Sahrens if (vd == NULL) 3193789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3194789Sahrens 31951585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 31961585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 31971585Sbonwick 3198789Sahrens pvd = vd->vdev_parent; 3199789Sahrens 3200789Sahrens /* 32018241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 32028241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32038241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 32048241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 32058241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 32068241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 32078241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 32088241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 32098241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 32108241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 32118241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 32128241SJeff.Bonwick@Sun.COM */ 32138241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 32148241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32158241SJeff.Bonwick@Sun.COM 32168241SJeff.Bonwick@Sun.COM /* 3217789Sahrens * If replace_done is specified, only remove this device if it's 32182082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 32192082Seschrock * disk can be removed. 3220789Sahrens */ 32212082Seschrock if (replace_done) { 32222082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 32232082Seschrock if (vd->vdev_id != 0) 32242082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32252082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 32262082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32272082Seschrock } 32282082Seschrock } 32292082Seschrock 32302082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 32314577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3232789Sahrens 3233789Sahrens /* 32342082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3235789Sahrens */ 3236789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 32372082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 32382082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3239789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3240789Sahrens 3241789Sahrens /* 32428241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 32438241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3244789Sahrens */ 32458241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3246789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3247789Sahrens 32488241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 32498241SJeff.Bonwick@Sun.COM 3250789Sahrens /* 32516673Seschrock * If we are detaching the second disk from a replacing vdev, then 32526673Seschrock * check to see if we changed the original vdev's path to have "/old" 32536673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 32546673Seschrock */ 32556673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 32566673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 32576673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 32586673Seschrock ASSERT(pvd->vdev_child[1] == vd); 32596673Seschrock cvd = pvd->vdev_child[0]; 32606673Seschrock len = strlen(vd->vdev_path); 32616673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 32626673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 32636673Seschrock spa_strfree(cvd->vdev_path); 32646673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 32656673Seschrock } 32666673Seschrock } 32676673Seschrock 32686673Seschrock /* 32692082Seschrock * If we are detaching the original disk from a spare, then it implies 32702082Seschrock * that the spare should become a real disk, and be removed from the 32712082Seschrock * active spare list for the pool. 32722082Seschrock */ 32732082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 32748241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 32752082Seschrock unspare = B_TRUE; 32762082Seschrock 32772082Seschrock /* 3278789Sahrens * Erase the disk labels so the disk can be used for other things. 3279789Sahrens * This must be done after all other error cases are handled, 3280789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3281789Sahrens * But if we can't do it, don't treat the error as fatal -- 3282789Sahrens * it may be that the unwritability of the disk is the reason 3283789Sahrens * it's being detached! 3284789Sahrens */ 32853377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3286789Sahrens 3287789Sahrens /* 3288789Sahrens * Remove vd from its parent and compact the parent's children. 3289789Sahrens */ 3290789Sahrens vdev_remove_child(pvd, vd); 3291789Sahrens vdev_compact_children(pvd); 3292789Sahrens 3293789Sahrens /* 3294789Sahrens * Remember one of the remaining children so we can get tvd below. 3295789Sahrens */ 3296789Sahrens cvd = pvd->vdev_child[0]; 3297789Sahrens 3298789Sahrens /* 32992082Seschrock * If we need to remove the remaining child from the list of hot spares, 33008241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 33018241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 33028241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 33038241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 33048241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 33058241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 33062082Seschrock */ 33072082Seschrock if (unspare) { 33082082Seschrock ASSERT(cvd->vdev_isspare); 33093377Seschrock spa_spare_remove(cvd); 33102082Seschrock unspare_guid = cvd->vdev_guid; 33118241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33122082Seschrock } 33132082Seschrock 33142082Seschrock /* 3315789Sahrens * If the parent mirror/replacing vdev only has one child, 3316789Sahrens * the parent is no longer needed. Remove it from the tree. 3317789Sahrens */ 3318789Sahrens if (pvd->vdev_children == 1) 3319789Sahrens vdev_remove_parent(cvd); 3320789Sahrens 3321789Sahrens /* 3322789Sahrens * We don't set tvd until now because the parent we just removed 3323789Sahrens * may have been the previous top-level vdev. 3324789Sahrens */ 3325789Sahrens tvd = cvd->vdev_top; 3326789Sahrens ASSERT(tvd->vdev_parent == rvd); 3327789Sahrens 3328789Sahrens /* 33293377Seschrock * Reevaluate the parent vdev state. 3330789Sahrens */ 33314451Seschrock vdev_propagate_state(cvd); 3332789Sahrens 3333789Sahrens /* 33343377Seschrock * If the device we just detached was smaller than the others, it may be 33353377Seschrock * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 33363377Seschrock * can't fail because the existing metaslabs are already in core, so 33373377Seschrock * there's nothing to read from disk. 3338789Sahrens */ 33391732Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 3340789Sahrens 3341789Sahrens vdev_config_dirty(tvd); 3342789Sahrens 3343789Sahrens /* 33443377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 33453377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 33463377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 33473377Seschrock * prevent vd from being accessed after it's freed. 3348789Sahrens */ 33498241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3350789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 33511732Sbonwick vd->vdev_detached = B_TRUE; 33521732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3353789Sahrens 33544451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 33554451Seschrock 33562082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 33572082Seschrock 33582082Seschrock /* 33593377Seschrock * If this was the removal of the original device in a hot spare vdev, 33603377Seschrock * then we want to go through and remove the device from the hot spare 33613377Seschrock * list of every other pool. 33622082Seschrock */ 33632082Seschrock if (unspare) { 33648241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 33652082Seschrock spa = NULL; 33662082Seschrock mutex_enter(&spa_namespace_lock); 33672082Seschrock while ((spa = spa_next(spa)) != NULL) { 33682082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 33692082Seschrock continue; 33708241SJeff.Bonwick@Sun.COM if (spa == myspa) 33718241SJeff.Bonwick@Sun.COM continue; 33727793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 33737793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 33742082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33757793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 33767793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 33772082Seschrock } 33782082Seschrock mutex_exit(&spa_namespace_lock); 33792082Seschrock } 33802082Seschrock 33812082Seschrock return (error); 33822082Seschrock } 33832082Seschrock 33847754SJeff.Bonwick@Sun.COM static nvlist_t * 33857754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 33862082Seschrock { 33877754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 33887754SJeff.Bonwick@Sun.COM uint64_t guid; 33897754SJeff.Bonwick@Sun.COM 33907754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 33917754SJeff.Bonwick@Sun.COM &guid) == 0); 33927754SJeff.Bonwick@Sun.COM 33937754SJeff.Bonwick@Sun.COM if (guid == target_guid) 33947754SJeff.Bonwick@Sun.COM return (nvpp[i]); 33952082Seschrock } 33962082Seschrock 33977754SJeff.Bonwick@Sun.COM return (NULL); 33985450Sbrendan } 33995450Sbrendan 34007754SJeff.Bonwick@Sun.COM static void 34017754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 34027754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 34035450Sbrendan { 34047754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 34057754SJeff.Bonwick@Sun.COM 34067754SJeff.Bonwick@Sun.COM if (count > 1) 34077754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 34087754SJeff.Bonwick@Sun.COM 34097754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 34107754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 34117754SJeff.Bonwick@Sun.COM continue; 34127754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 34135450Sbrendan } 34145450Sbrendan 34157754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 34167754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 34177754SJeff.Bonwick@Sun.COM 34187754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 34197754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 34207754SJeff.Bonwick@Sun.COM 34217754SJeff.Bonwick@Sun.COM if (count > 1) 34227754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 34235450Sbrendan } 34245450Sbrendan 34255450Sbrendan /* 34265450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 34275450Sbrendan * spares and level 2 ARC devices. 34285450Sbrendan */ 34295450Sbrendan int 34305450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 34315450Sbrendan { 34325450Sbrendan vdev_t *vd; 34337754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 34345450Sbrendan uint_t nspares, nl2cache; 34358241SJeff.Bonwick@Sun.COM uint64_t txg = 0; 34365450Sbrendan int error = 0; 34378241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 34388241SJeff.Bonwick@Sun.COM 34398241SJeff.Bonwick@Sun.COM if (!locked) 34408241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 34415450Sbrendan 34426643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 34435450Sbrendan 34445450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 34455450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 34467754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 34477754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 34487754SJeff.Bonwick@Sun.COM /* 34497754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 34507754SJeff.Bonwick@Sun.COM * in this pool. 34517754SJeff.Bonwick@Sun.COM */ 34527754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 34537754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 34547754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 34557754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 34567754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 34577754SJeff.Bonwick@Sun.COM } else { 34587754SJeff.Bonwick@Sun.COM error = EBUSY; 34597754SJeff.Bonwick@Sun.COM } 34607754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 34615450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 34627754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 34637754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 34647754SJeff.Bonwick@Sun.COM /* 34657754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 34667754SJeff.Bonwick@Sun.COM */ 34677754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 34687754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 34695450Sbrendan spa_load_l2cache(spa); 34705450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 34717754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 34727754SJeff.Bonwick@Sun.COM /* 34737754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 34747754SJeff.Bonwick@Sun.COM */ 34757754SJeff.Bonwick@Sun.COM error = ENOTSUP; 34767754SJeff.Bonwick@Sun.COM } else { 34777754SJeff.Bonwick@Sun.COM /* 34787754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 34797754SJeff.Bonwick@Sun.COM */ 34807754SJeff.Bonwick@Sun.COM error = ENOENT; 34815450Sbrendan } 34822082Seschrock 34838241SJeff.Bonwick@Sun.COM if (!locked) 34848241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 34858241SJeff.Bonwick@Sun.COM 34868241SJeff.Bonwick@Sun.COM return (error); 3487789Sahrens } 3488789Sahrens 3489789Sahrens /* 34904451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 34914451Seschrock * current spared, so we can detach it. 3492789Sahrens */ 34931544Seschrock static vdev_t * 34944451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3495789Sahrens { 34961544Seschrock vdev_t *newvd, *oldvd; 3497789Sahrens int c; 3498789Sahrens 34991544Seschrock for (c = 0; c < vd->vdev_children; c++) { 35004451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 35011544Seschrock if (oldvd != NULL) 35021544Seschrock return (oldvd); 35031544Seschrock } 3504789Sahrens 35054451Seschrock /* 35064451Seschrock * Check for a completed replacement. 35074451Seschrock */ 3508789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 35091544Seschrock oldvd = vd->vdev_child[0]; 35101544Seschrock newvd = vd->vdev_child[1]; 3511789Sahrens 35128241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 35138241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 35141544Seschrock return (oldvd); 35151544Seschrock } 3516789Sahrens 35174451Seschrock /* 35184451Seschrock * Check for a completed resilver with the 'unspare' flag set. 35194451Seschrock */ 35204451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 35214451Seschrock newvd = vd->vdev_child[0]; 35224451Seschrock oldvd = vd->vdev_child[1]; 35234451Seschrock 35244451Seschrock if (newvd->vdev_unspare && 35258241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 35268241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 35274451Seschrock newvd->vdev_unspare = 0; 35284451Seschrock return (oldvd); 35294451Seschrock } 35304451Seschrock } 35314451Seschrock 35321544Seschrock return (NULL); 3533789Sahrens } 3534789Sahrens 35351544Seschrock static void 35364451Seschrock spa_vdev_resilver_done(spa_t *spa) 3537789Sahrens { 35388241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 35398241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 35408241SJeff.Bonwick@Sun.COM 35418241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3542789Sahrens 35434451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 35448241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 35458241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 35461544Seschrock guid = vd->vdev_guid; 35478241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 35488241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 35498241SJeff.Bonwick@Sun.COM sguid = 0; 35502082Seschrock /* 35512082Seschrock * If we have just finished replacing a hot spared device, then 35522082Seschrock * we need to detach the parent's first child (the original hot 35532082Seschrock * spare) as well. 35542082Seschrock */ 35558241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 35562082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 35578241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 35588241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 35592082Seschrock } 35608241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 35618241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 35621544Seschrock return; 35638241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 35642082Seschrock return; 35658241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3566789Sahrens } 3567789Sahrens 35688241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 3569789Sahrens } 3570789Sahrens 3571789Sahrens /* 35729425SEric.Schrock@Sun.COM * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 35739425SEric.Schrock@Sun.COM * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 35741354Seschrock */ 35751354Seschrock int 35769425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 35779425SEric.Schrock@Sun.COM boolean_t ispath) 35781354Seschrock { 35796643Seschrock vdev_t *vd; 35801354Seschrock uint64_t txg; 35811354Seschrock 35821354Seschrock txg = spa_vdev_enter(spa); 35831354Seschrock 35849425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 35855450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 35861354Seschrock 35871585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 35881585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35891585Sbonwick 35909425SEric.Schrock@Sun.COM if (ispath) { 35919425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 35929425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 35939425SEric.Schrock@Sun.COM } else { 35949425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 35959425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 35969425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 35979425SEric.Schrock@Sun.COM } 35981354Seschrock 35991354Seschrock vdev_config_dirty(vd->vdev_top); 36001354Seschrock 36011354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 36021354Seschrock } 36031354Seschrock 36049425SEric.Schrock@Sun.COM int 36059425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 36069425SEric.Schrock@Sun.COM { 36079425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 36089425SEric.Schrock@Sun.COM } 36099425SEric.Schrock@Sun.COM 36109425SEric.Schrock@Sun.COM int 36119425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 36129425SEric.Schrock@Sun.COM { 36139425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 36149425SEric.Schrock@Sun.COM } 36159425SEric.Schrock@Sun.COM 36161354Seschrock /* 3617789Sahrens * ========================================================================== 3618789Sahrens * SPA Scrubbing 3619789Sahrens * ========================================================================== 3620789Sahrens */ 3621789Sahrens 36227046Sahrens int 36237046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3624789Sahrens { 36257754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 36264808Sek110237 3627789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3628789Sahrens return (ENOTSUP); 3629789Sahrens 3630789Sahrens /* 36317046Sahrens * If a resilver was requested, but there is no DTL on a 36327046Sahrens * writeable leaf device, we have nothing to do. 3633789Sahrens */ 36347046Sahrens if (type == POOL_SCRUB_RESILVER && 36357046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 36367046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 36371544Seschrock return (0); 36381544Seschrock } 3639789Sahrens 36407046Sahrens if (type == POOL_SCRUB_EVERYTHING && 36417046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 36427046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 36437046Sahrens return (EBUSY); 36447046Sahrens 36457046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 36467046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 36477046Sahrens } else if (type == POOL_SCRUB_NONE) { 36487046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 36491544Seschrock } else { 36507046Sahrens return (EINVAL); 36511544Seschrock } 3652789Sahrens } 3653789Sahrens 36541544Seschrock /* 36551544Seschrock * ========================================================================== 36561544Seschrock * SPA async task processing 36571544Seschrock * ========================================================================== 36581544Seschrock */ 36591544Seschrock 36601544Seschrock static void 36614451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3662789Sahrens { 36637361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 36647361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 36657361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 36667754SJeff.Bonwick@Sun.COM vdev_clear(spa, vd); 36677754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 36681544Seschrock } 36697361SBrendan.Gregg@Sun.COM 36707754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 36717361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 36721544Seschrock } 36731544Seschrock 36741544Seschrock static void 36757754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 36767754SJeff.Bonwick@Sun.COM { 36777754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 36787754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 36797754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 36807754SJeff.Bonwick@Sun.COM } 36817754SJeff.Bonwick@Sun.COM 36827754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 36837754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 36847754SJeff.Bonwick@Sun.COM } 36857754SJeff.Bonwick@Sun.COM 36867754SJeff.Bonwick@Sun.COM static void 36871544Seschrock spa_async_thread(spa_t *spa) 36881544Seschrock { 36897754SJeff.Bonwick@Sun.COM int tasks; 36901544Seschrock 36911544Seschrock ASSERT(spa->spa_sync_on); 3692789Sahrens 36931544Seschrock mutex_enter(&spa->spa_async_lock); 36941544Seschrock tasks = spa->spa_async_tasks; 36951544Seschrock spa->spa_async_tasks = 0; 36961544Seschrock mutex_exit(&spa->spa_async_lock); 36971544Seschrock 36981544Seschrock /* 36991635Sbonwick * See if the config needs to be updated. 37001635Sbonwick */ 37011635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 37021635Sbonwick mutex_enter(&spa_namespace_lock); 37031635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 37041635Sbonwick mutex_exit(&spa_namespace_lock); 37051635Sbonwick } 37061635Sbonwick 37071635Sbonwick /* 37084451Seschrock * See if any devices need to be marked REMOVED. 37091544Seschrock */ 37107754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 37117754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 37124451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 37137754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 37147361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 37157754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 37167361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 37177754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 37187754SJeff.Bonwick@Sun.COM } 37197754SJeff.Bonwick@Sun.COM 37207754SJeff.Bonwick@Sun.COM /* 37217754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 37227754SJeff.Bonwick@Sun.COM */ 37237754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 37247754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 37257754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 37267754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 37274451Seschrock } 37281544Seschrock 37291544Seschrock /* 37301544Seschrock * If any devices are done replacing, detach them. 37311544Seschrock */ 37324451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 37334451Seschrock spa_vdev_resilver_done(spa); 3734789Sahrens 37351544Seschrock /* 37361544Seschrock * Kick off a resilver. 37371544Seschrock */ 37387046Sahrens if (tasks & SPA_ASYNC_RESILVER) 37397046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 37401544Seschrock 37411544Seschrock /* 37421544Seschrock * Let the world know that we're done. 37431544Seschrock */ 37441544Seschrock mutex_enter(&spa->spa_async_lock); 37451544Seschrock spa->spa_async_thread = NULL; 37461544Seschrock cv_broadcast(&spa->spa_async_cv); 37471544Seschrock mutex_exit(&spa->spa_async_lock); 37481544Seschrock thread_exit(); 37491544Seschrock } 37501544Seschrock 37511544Seschrock void 37521544Seschrock spa_async_suspend(spa_t *spa) 37531544Seschrock { 37541544Seschrock mutex_enter(&spa->spa_async_lock); 37551544Seschrock spa->spa_async_suspended++; 37561544Seschrock while (spa->spa_async_thread != NULL) 37571544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 37581544Seschrock mutex_exit(&spa->spa_async_lock); 37591544Seschrock } 37601544Seschrock 37611544Seschrock void 37621544Seschrock spa_async_resume(spa_t *spa) 37631544Seschrock { 37641544Seschrock mutex_enter(&spa->spa_async_lock); 37651544Seschrock ASSERT(spa->spa_async_suspended != 0); 37661544Seschrock spa->spa_async_suspended--; 37671544Seschrock mutex_exit(&spa->spa_async_lock); 37681544Seschrock } 37691544Seschrock 37701544Seschrock static void 37711544Seschrock spa_async_dispatch(spa_t *spa) 37721544Seschrock { 37731544Seschrock mutex_enter(&spa->spa_async_lock); 37741544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 37751635Sbonwick spa->spa_async_thread == NULL && 37761635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 37771544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 37781544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 37791544Seschrock mutex_exit(&spa->spa_async_lock); 37801544Seschrock } 37811544Seschrock 37821544Seschrock void 37831544Seschrock spa_async_request(spa_t *spa, int task) 37841544Seschrock { 37851544Seschrock mutex_enter(&spa->spa_async_lock); 37861544Seschrock spa->spa_async_tasks |= task; 37871544Seschrock mutex_exit(&spa->spa_async_lock); 3788789Sahrens } 3789789Sahrens 3790789Sahrens /* 3791789Sahrens * ========================================================================== 3792789Sahrens * SPA syncing routines 3793789Sahrens * ========================================================================== 3794789Sahrens */ 3795789Sahrens 3796789Sahrens static void 3797789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3798789Sahrens { 3799789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 3800789Sahrens dmu_tx_t *tx; 3801789Sahrens blkptr_t blk; 3802789Sahrens uint64_t itor = 0; 3803789Sahrens zio_t *zio; 3804789Sahrens int error; 3805789Sahrens uint8_t c = 1; 3806789Sahrens 38077754SJeff.Bonwick@Sun.COM zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 38087754SJeff.Bonwick@Sun.COM 38097754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 38107754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 38117754SJeff.Bonwick@Sun.COM zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 38127754SJeff.Bonwick@Sun.COM ZIO_FLAG_MUSTSUCCEED)); 38137754SJeff.Bonwick@Sun.COM } 3814789Sahrens 3815789Sahrens error = zio_wait(zio); 3816789Sahrens ASSERT3U(error, ==, 0); 3817789Sahrens 3818789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3819789Sahrens bplist_vacate(bpl, tx); 3820789Sahrens 3821789Sahrens /* 3822789Sahrens * Pre-dirty the first block so we sync to convergence faster. 3823789Sahrens * (Usually only the first block is needed.) 3824789Sahrens */ 3825789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3826789Sahrens dmu_tx_commit(tx); 3827789Sahrens } 3828789Sahrens 3829789Sahrens static void 38302082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 38312082Seschrock { 38322082Seschrock char *packed = NULL; 38337497STim.Haley@Sun.COM size_t bufsize; 38342082Seschrock size_t nvsize = 0; 38352082Seschrock dmu_buf_t *db; 38362082Seschrock 38372082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 38382082Seschrock 38397497STim.Haley@Sun.COM /* 38407497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 38417497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 38427497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 38437497STim.Haley@Sun.COM */ 38447497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 38457497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 38462082Seschrock 38472082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 38482082Seschrock KM_SLEEP) == 0); 38497497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 38507497STim.Haley@Sun.COM 38517497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 38527497STim.Haley@Sun.COM 38537497STim.Haley@Sun.COM kmem_free(packed, bufsize); 38542082Seschrock 38552082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 38562082Seschrock dmu_buf_will_dirty(db, tx); 38572082Seschrock *(uint64_t *)db->db_data = nvsize; 38582082Seschrock dmu_buf_rele(db, FTAG); 38592082Seschrock } 38602082Seschrock 38612082Seschrock static void 38625450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 38635450Sbrendan const char *config, const char *entry) 38642082Seschrock { 38652082Seschrock nvlist_t *nvroot; 38665450Sbrendan nvlist_t **list; 38672082Seschrock int i; 38682082Seschrock 38695450Sbrendan if (!sav->sav_sync) 38702082Seschrock return; 38712082Seschrock 38722082Seschrock /* 38735450Sbrendan * Update the MOS nvlist describing the list of available devices. 38745450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 38754451Seschrock * valid and the vdevs are labeled appropriately. 38762082Seschrock */ 38775450Sbrendan if (sav->sav_object == 0) { 38785450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 38795450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 38805450Sbrendan sizeof (uint64_t), tx); 38812082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 38825450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 38835450Sbrendan &sav->sav_object, tx) == 0); 38842082Seschrock } 38852082Seschrock 38862082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 38875450Sbrendan if (sav->sav_count == 0) { 38885450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 38892082Seschrock } else { 38905450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 38915450Sbrendan for (i = 0; i < sav->sav_count; i++) 38925450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 38935450Sbrendan B_FALSE, B_FALSE, B_TRUE); 38945450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 38955450Sbrendan sav->sav_count) == 0); 38965450Sbrendan for (i = 0; i < sav->sav_count; i++) 38975450Sbrendan nvlist_free(list[i]); 38985450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 38992082Seschrock } 39002082Seschrock 39015450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 39022926Sek110237 nvlist_free(nvroot); 39032082Seschrock 39045450Sbrendan sav->sav_sync = B_FALSE; 39052082Seschrock } 39062082Seschrock 39072082Seschrock static void 3908789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 3909789Sahrens { 3910789Sahrens nvlist_t *config; 3911789Sahrens 39127754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 3913789Sahrens return; 3914789Sahrens 39157754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 39167754SJeff.Bonwick@Sun.COM 39177754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 39187754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 39197754SJeff.Bonwick@Sun.COM 39207754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 3921789Sahrens 39221635Sbonwick if (spa->spa_config_syncing) 39231635Sbonwick nvlist_free(spa->spa_config_syncing); 39241635Sbonwick spa->spa_config_syncing = config; 3925789Sahrens 39262082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 3927789Sahrens } 3928789Sahrens 39295094Slling /* 39305094Slling * Set zpool properties. 39315094Slling */ 39323912Slling static void 39334543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 39343912Slling { 39353912Slling spa_t *spa = arg1; 39365094Slling objset_t *mos = spa->spa_meta_objset; 39373912Slling nvlist_t *nvp = arg2; 39385094Slling nvpair_t *elem; 39394451Seschrock uint64_t intval; 39406643Seschrock char *strval; 39415094Slling zpool_prop_t prop; 39425094Slling const char *propname; 39435094Slling zprop_type_t proptype; 39445094Slling 39457754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 39467754SJeff.Bonwick@Sun.COM 39475094Slling elem = NULL; 39485094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 39495094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 39505094Slling case ZPOOL_PROP_VERSION: 39515094Slling /* 39525094Slling * Only set version for non-zpool-creation cases 39535094Slling * (set/import). spa_create() needs special care 39545094Slling * for version setting. 39555094Slling */ 39565094Slling if (tx->tx_txg != TXG_INITIAL) { 39575094Slling VERIFY(nvpair_value_uint64(elem, 39585094Slling &intval) == 0); 39595094Slling ASSERT(intval <= SPA_VERSION); 39605094Slling ASSERT(intval >= spa_version(spa)); 39615094Slling spa->spa_uberblock.ub_version = intval; 39625094Slling vdev_config_dirty(spa->spa_root_vdev); 39635094Slling } 39645094Slling break; 39655094Slling 39665094Slling case ZPOOL_PROP_ALTROOT: 39675094Slling /* 39685094Slling * 'altroot' is a non-persistent property. It should 39695094Slling * have been set temporarily at creation or import time. 39705094Slling */ 39715094Slling ASSERT(spa->spa_root != NULL); 39725094Slling break; 39735094Slling 39745363Seschrock case ZPOOL_PROP_CACHEFILE: 39755094Slling /* 39768525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 39775094Slling */ 39784543Smarks break; 39795094Slling default: 39805094Slling /* 39815094Slling * Set pool property values in the poolprops mos object. 39825094Slling */ 39835094Slling if (spa->spa_pool_props_object == 0) { 39845094Slling objset_t *mos = spa->spa_meta_objset; 39855094Slling 39865094Slling VERIFY((spa->spa_pool_props_object = 39875094Slling zap_create(mos, DMU_OT_POOL_PROPS, 39885094Slling DMU_OT_NONE, 0, tx)) > 0); 39895094Slling 39905094Slling VERIFY(zap_update(mos, 39915094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 39925094Slling 8, 1, &spa->spa_pool_props_object, tx) 39935094Slling == 0); 39945094Slling } 39955094Slling 39965094Slling /* normalize the property name */ 39975094Slling propname = zpool_prop_to_name(prop); 39985094Slling proptype = zpool_prop_get_type(prop); 39995094Slling 40005094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 40015094Slling ASSERT(proptype == PROP_TYPE_STRING); 40025094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 40035094Slling VERIFY(zap_update(mos, 40045094Slling spa->spa_pool_props_object, propname, 40055094Slling 1, strlen(strval) + 1, strval, tx) == 0); 40065094Slling 40075094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 40085094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 40095094Slling 40105094Slling if (proptype == PROP_TYPE_INDEX) { 40115094Slling const char *unused; 40125094Slling VERIFY(zpool_prop_index_to_string( 40135094Slling prop, intval, &unused) == 0); 40145094Slling } 40155094Slling VERIFY(zap_update(mos, 40165094Slling spa->spa_pool_props_object, propname, 40175094Slling 8, 1, &intval, tx) == 0); 40185094Slling } else { 40195094Slling ASSERT(0); /* not allowed */ 40205094Slling } 40215094Slling 40225329Sgw25295 switch (prop) { 40235329Sgw25295 case ZPOOL_PROP_DELEGATION: 40245094Slling spa->spa_delegation = intval; 40255329Sgw25295 break; 40265329Sgw25295 case ZPOOL_PROP_BOOTFS: 40275094Slling spa->spa_bootfs = intval; 40285329Sgw25295 break; 40295329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 40305329Sgw25295 spa->spa_failmode = intval; 40315329Sgw25295 break; 40325329Sgw25295 default: 40335329Sgw25295 break; 40345329Sgw25295 } 40353912Slling } 40365094Slling 40375094Slling /* log internal history if this is not a zpool create */ 40385094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 40395094Slling tx->tx_txg != TXG_INITIAL) { 40405094Slling spa_history_internal_log(LOG_POOL_PROPSET, 40415094Slling spa, tx, cr, "%s %lld %s", 40427754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 40435094Slling } 40443912Slling } 40457754SJeff.Bonwick@Sun.COM 40467754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 40473912Slling } 40483912Slling 4049789Sahrens /* 4050789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4051789Sahrens * part of the process, so we iterate until it converges. 4052789Sahrens */ 4053789Sahrens void 4054789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4055789Sahrens { 4056789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4057789Sahrens objset_t *mos = spa->spa_meta_objset; 4058789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 40591635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4060789Sahrens vdev_t *vd; 4061789Sahrens dmu_tx_t *tx; 4062789Sahrens int dirty_vdevs; 40637754SJeff.Bonwick@Sun.COM int error; 4064789Sahrens 4065789Sahrens /* 4066789Sahrens * Lock out configuration changes. 4067789Sahrens */ 40687754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4069789Sahrens 4070789Sahrens spa->spa_syncing_txg = txg; 4071789Sahrens spa->spa_sync_pass = 0; 4072789Sahrens 40737754SJeff.Bonwick@Sun.COM /* 40747754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 40757754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 40767754SJeff.Bonwick@Sun.COM */ 40777754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 40788241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 40798241SJeff.Bonwick@Sun.COM /* 40808241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 40818241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 40828241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 40838241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 40848241SJeff.Bonwick@Sun.COM * into the root vdev tree. 40858241SJeff.Bonwick@Sun.COM */ 40868241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 40878241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 40888241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 40898241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 40908241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 40918241SJeff.Bonwick@Sun.COM } 40928241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 40938241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 40947754SJeff.Bonwick@Sun.COM } 40957754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 40967754SJeff.Bonwick@Sun.COM 40971544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4098789Sahrens 40992082Seschrock tx = dmu_tx_create_assigned(dp, txg); 41002082Seschrock 41012082Seschrock /* 41024577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 41032082Seschrock * set spa_deflate if we have no raid-z vdevs. 41042082Seschrock */ 41054577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 41064577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 41072082Seschrock int i; 41082082Seschrock 41092082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 41102082Seschrock vd = rvd->vdev_child[i]; 41112082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 41122082Seschrock break; 41132082Seschrock } 41142082Seschrock if (i == rvd->vdev_children) { 41152082Seschrock spa->spa_deflate = TRUE; 41162082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 41172082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 41182082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 41192082Seschrock } 41202082Seschrock } 41212082Seschrock 41227046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 41237046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 41247046Sahrens dsl_pool_create_origin(dp, tx); 41257046Sahrens 41267046Sahrens /* Keeping the origin open increases spa_minref */ 41277046Sahrens spa->spa_minref += 3; 41287046Sahrens } 41297046Sahrens 41307046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 41317046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 41327046Sahrens dsl_pool_upgrade_clones(dp, tx); 41337046Sahrens } 41347046Sahrens 4135789Sahrens /* 4136789Sahrens * If anything has changed in this txg, push the deferred frees 4137789Sahrens * from the previous txg. If not, leave them alone so that we 4138789Sahrens * don't generate work on an otherwise idle system. 4139789Sahrens */ 4140789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 41412329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 41422329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4143789Sahrens spa_sync_deferred_frees(spa, txg); 4144789Sahrens 4145789Sahrens /* 4146789Sahrens * Iterate to convergence. 4147789Sahrens */ 4148789Sahrens do { 4149789Sahrens spa->spa_sync_pass++; 4150789Sahrens 4151789Sahrens spa_sync_config_object(spa, tx); 41525450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 41535450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 41545450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 41555450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 41561544Seschrock spa_errlog_sync(spa, txg); 4157789Sahrens dsl_pool_sync(dp, txg); 4158789Sahrens 4159789Sahrens dirty_vdevs = 0; 4160789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4161789Sahrens vdev_sync(vd, txg); 4162789Sahrens dirty_vdevs++; 4163789Sahrens } 4164789Sahrens 4165789Sahrens bplist_sync(bpl, tx); 4166789Sahrens } while (dirty_vdevs); 4167789Sahrens 4168789Sahrens bplist_close(bpl); 4169789Sahrens 4170789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4171789Sahrens 4172789Sahrens /* 4173789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4174789Sahrens * to commit the transaction group. 41751635Sbonwick * 41765688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 41775688Sbonwick * random top-level vdevs that are known to be visible in the 41787754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 41797754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4180789Sahrens */ 41817754SJeff.Bonwick@Sun.COM for (;;) { 41827754SJeff.Bonwick@Sun.COM /* 41837754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 41847754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 41857754SJeff.Bonwick@Sun.COM */ 41867754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 41877754SJeff.Bonwick@Sun.COM 41887754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 41897754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 41907754SJeff.Bonwick@Sun.COM int svdcount = 0; 41917754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 41927754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 41937754SJeff.Bonwick@Sun.COM int c; 41947754SJeff.Bonwick@Sun.COM 41957754SJeff.Bonwick@Sun.COM for (c = 0; c < children; c++) { 41967754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 41977754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 41987754SJeff.Bonwick@Sun.COM continue; 41997754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 42007754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 42017754SJeff.Bonwick@Sun.COM break; 42027754SJeff.Bonwick@Sun.COM } 42037754SJeff.Bonwick@Sun.COM error = vdev_config_sync(svd, svdcount, txg); 42047754SJeff.Bonwick@Sun.COM } else { 42057754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 42067754SJeff.Bonwick@Sun.COM rvd->vdev_children, txg); 42071635Sbonwick } 42087754SJeff.Bonwick@Sun.COM 42097754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 42107754SJeff.Bonwick@Sun.COM 42117754SJeff.Bonwick@Sun.COM if (error == 0) 42127754SJeff.Bonwick@Sun.COM break; 42137754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 42147754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 42151635Sbonwick } 42162082Seschrock dmu_tx_commit(tx); 42172082Seschrock 42181635Sbonwick /* 42191635Sbonwick * Clear the dirty config list. 42201635Sbonwick */ 42217754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 42221635Sbonwick vdev_config_clean(vd); 42231635Sbonwick 42241635Sbonwick /* 42251635Sbonwick * Now that the new config has synced transactionally, 42261635Sbonwick * let it become visible to the config cache. 42271635Sbonwick */ 42281635Sbonwick if (spa->spa_config_syncing != NULL) { 42291635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 42301635Sbonwick spa->spa_config_txg = txg; 42311635Sbonwick spa->spa_config_syncing = NULL; 42321635Sbonwick } 4233789Sahrens 4234789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4235789Sahrens 4236789Sahrens /* 4237789Sahrens * Clean up the ZIL records for the synced txg. 4238789Sahrens */ 4239789Sahrens dsl_pool_zil_clean(dp); 4240789Sahrens 4241789Sahrens /* 4242789Sahrens * Update usable space statistics. 4243789Sahrens */ 4244789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4245789Sahrens vdev_sync_done(vd, txg); 4246789Sahrens 4247789Sahrens /* 4248789Sahrens * It had better be the case that we didn't dirty anything 42492082Seschrock * since vdev_config_sync(). 4250789Sahrens */ 4251789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4252789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4253789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4254789Sahrens ASSERT(bpl->bpl_queue == NULL); 4255789Sahrens 42567754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 42571544Seschrock 42581544Seschrock /* 42591544Seschrock * If any async tasks have been requested, kick them off. 42601544Seschrock */ 42611544Seschrock spa_async_dispatch(spa); 4262789Sahrens } 4263789Sahrens 4264789Sahrens /* 4265789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4266789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4267789Sahrens * sync. 4268789Sahrens */ 4269789Sahrens void 4270789Sahrens spa_sync_allpools(void) 4271789Sahrens { 4272789Sahrens spa_t *spa = NULL; 4273789Sahrens mutex_enter(&spa_namespace_lock); 4274789Sahrens while ((spa = spa_next(spa)) != NULL) { 42757754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4276789Sahrens continue; 4277789Sahrens spa_open_ref(spa, FTAG); 4278789Sahrens mutex_exit(&spa_namespace_lock); 4279789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4280789Sahrens mutex_enter(&spa_namespace_lock); 4281789Sahrens spa_close(spa, FTAG); 4282789Sahrens } 4283789Sahrens mutex_exit(&spa_namespace_lock); 4284789Sahrens } 4285789Sahrens 4286789Sahrens /* 4287789Sahrens * ========================================================================== 4288789Sahrens * Miscellaneous routines 4289789Sahrens * ========================================================================== 4290789Sahrens */ 4291789Sahrens 4292789Sahrens /* 4293789Sahrens * Remove all pools in the system. 4294789Sahrens */ 4295789Sahrens void 4296789Sahrens spa_evict_all(void) 4297789Sahrens { 4298789Sahrens spa_t *spa; 4299789Sahrens 4300789Sahrens /* 4301789Sahrens * Remove all cached state. All pools should be closed now, 4302789Sahrens * so every spa in the AVL tree should be unreferenced. 4303789Sahrens */ 4304789Sahrens mutex_enter(&spa_namespace_lock); 4305789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4306789Sahrens /* 43071544Seschrock * Stop async tasks. The async thread may need to detach 43081544Seschrock * a device that's been replaced, which requires grabbing 43091544Seschrock * spa_namespace_lock, so we must drop it here. 4310789Sahrens */ 4311789Sahrens spa_open_ref(spa, FTAG); 4312789Sahrens mutex_exit(&spa_namespace_lock); 43131544Seschrock spa_async_suspend(spa); 43144808Sek110237 mutex_enter(&spa_namespace_lock); 4315789Sahrens spa_close(spa, FTAG); 4316789Sahrens 4317789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4318789Sahrens spa_unload(spa); 4319789Sahrens spa_deactivate(spa); 4320789Sahrens } 4321789Sahrens spa_remove(spa); 4322789Sahrens } 4323789Sahrens mutex_exit(&spa_namespace_lock); 4324789Sahrens } 43251544Seschrock 43261544Seschrock vdev_t * 43279425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 43281544Seschrock { 43296643Seschrock vdev_t *vd; 43306643Seschrock int i; 43316643Seschrock 43326643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 43336643Seschrock return (vd); 43346643Seschrock 43359425SEric.Schrock@Sun.COM if (aux) { 43366643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 43376643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 43386643Seschrock if (vd->vdev_guid == guid) 43396643Seschrock return (vd); 43406643Seschrock } 43419425SEric.Schrock@Sun.COM 43429425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 43439425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 43449425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 43459425SEric.Schrock@Sun.COM return (vd); 43469425SEric.Schrock@Sun.COM } 43476643Seschrock } 43486643Seschrock 43496643Seschrock return (NULL); 43501544Seschrock } 43511760Seschrock 43521760Seschrock void 43535094Slling spa_upgrade(spa_t *spa, uint64_t version) 43541760Seschrock { 43557754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 43561760Seschrock 43571760Seschrock /* 43581760Seschrock * This should only be called for a non-faulted pool, and since a 43591760Seschrock * future version would result in an unopenable pool, this shouldn't be 43601760Seschrock * possible. 43611760Seschrock */ 43624577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 43635094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 43645094Slling 43655094Slling spa->spa_uberblock.ub_version = version; 43661760Seschrock vdev_config_dirty(spa->spa_root_vdev); 43671760Seschrock 43687754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 43692082Seschrock 43702082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 43711760Seschrock } 43722082Seschrock 43732082Seschrock boolean_t 43742082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 43752082Seschrock { 43762082Seschrock int i; 43773377Seschrock uint64_t spareguid; 43785450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 43795450Sbrendan 43805450Sbrendan for (i = 0; i < sav->sav_count; i++) 43815450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 43822082Seschrock return (B_TRUE); 43832082Seschrock 43845450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 43855450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 43865450Sbrendan &spareguid) == 0 && spareguid == guid) 43873377Seschrock return (B_TRUE); 43883377Seschrock } 43893377Seschrock 43902082Seschrock return (B_FALSE); 43912082Seschrock } 43923912Slling 43934451Seschrock /* 43947214Slling * Check if a pool has an active shared spare device. 43957214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 43967214Slling */ 43977214Slling static boolean_t 43987214Slling spa_has_active_shared_spare(spa_t *spa) 43997214Slling { 44007214Slling int i, refcnt; 44017214Slling uint64_t pool; 44027214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 44037214Slling 44047214Slling for (i = 0; i < sav->sav_count; i++) { 44057214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 44067214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 44077214Slling refcnt > 2) 44087214Slling return (B_TRUE); 44097214Slling } 44107214Slling 44117214Slling return (B_FALSE); 44127214Slling } 44137214Slling 44147214Slling /* 44154451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 44164451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 44174451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 44184451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 44194451Seschrock * or zdb as real changes. 44204451Seschrock */ 44214451Seschrock void 44224451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 44234451Seschrock { 44244451Seschrock #ifdef _KERNEL 44254451Seschrock sysevent_t *ev; 44264451Seschrock sysevent_attr_list_t *attr = NULL; 44274451Seschrock sysevent_value_t value; 44284451Seschrock sysevent_id_t eid; 44294451Seschrock 44304451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 44314451Seschrock SE_SLEEP); 44324451Seschrock 44334451Seschrock value.value_type = SE_DATA_TYPE_STRING; 44344451Seschrock value.value.sv_string = spa_name(spa); 44354451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 44364451Seschrock goto done; 44374451Seschrock 44384451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 44394451Seschrock value.value.sv_uint64 = spa_guid(spa); 44404451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 44414451Seschrock goto done; 44424451Seschrock 44434451Seschrock if (vd) { 44444451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 44454451Seschrock value.value.sv_uint64 = vd->vdev_guid; 44464451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 44474451Seschrock SE_SLEEP) != 0) 44484451Seschrock goto done; 44494451Seschrock 44504451Seschrock if (vd->vdev_path) { 44514451Seschrock value.value_type = SE_DATA_TYPE_STRING; 44524451Seschrock value.value.sv_string = vd->vdev_path; 44534451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 44544451Seschrock &value, SE_SLEEP) != 0) 44554451Seschrock goto done; 44564451Seschrock } 44574451Seschrock } 44584451Seschrock 44595756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 44605756Seschrock goto done; 44615756Seschrock attr = NULL; 44625756Seschrock 44634451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 44644451Seschrock 44654451Seschrock done: 44664451Seschrock if (attr) 44674451Seschrock sysevent_free_attr(attr); 44684451Seschrock sysevent_free(ev); 44694451Seschrock #endif 44704451Seschrock } 4471