1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/zio_compress.h> 39789Sahrens #include <sys/dmu.h> 40789Sahrens #include <sys/dmu_tx.h> 41789Sahrens #include <sys/zap.h> 42789Sahrens #include <sys/zil.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 4510594SGeorge.Wilson@Sun.COM #include <sys/metaslab_impl.h> 46789Sahrens #include <sys/uberblock_impl.h> 47789Sahrens #include <sys/txg.h> 48789Sahrens #include <sys/avl.h> 49789Sahrens #include <sys/dmu_traverse.h> 503912Slling #include <sys/dmu_objset.h> 51789Sahrens #include <sys/unique.h> 52789Sahrens #include <sys/dsl_pool.h> 533912Slling #include <sys/dsl_dataset.h> 54789Sahrens #include <sys/dsl_dir.h> 55789Sahrens #include <sys/dsl_prop.h> 563912Slling #include <sys/dsl_synctask.h> 57789Sahrens #include <sys/fs/zfs.h> 585450Sbrendan #include <sys/arc.h> 59789Sahrens #include <sys/callb.h> 603975Sek110237 #include <sys/systeminfo.h> 613975Sek110237 #include <sys/sunddi.h> 626423Sgw25295 #include <sys/spa_boot.h> 639816SGeorge.Wilson@Sun.COM #include <sys/zfs_ioctl.h> 64789Sahrens 658662SJordan.Vaughan@Sun.com #ifdef _KERNEL 668662SJordan.Vaughan@Sun.com #include <sys/zone.h> 67*10822SJack.Meng@Sun.COM #include <sys/bootprops.h> 688662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 698662SJordan.Vaughan@Sun.com 705094Slling #include "zfs_prop.h" 715913Sperrin #include "zfs_comutil.h" 725094Slling 739515SJonathan.Adams@Sun.COM enum zti_modes { 749515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 759515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 769515SJonathan.Adams@Sun.COM zti_mode_tune, /* fill from zio_taskq_tune_* */ 779515SJonathan.Adams@Sun.COM zti_nmodes 787754SJeff.Bonwick@Sun.COM }; 792986Sek110237 809515SJonathan.Adams@Sun.COM #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 819515SJonathan.Adams@Sun.COM #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 829515SJonathan.Adams@Sun.COM #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 839515SJonathan.Adams@Sun.COM 849515SJonathan.Adams@Sun.COM #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 859515SJonathan.Adams@Sun.COM 869515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 879515SJonathan.Adams@Sun.COM const char *zti_name; 889515SJonathan.Adams@Sun.COM struct { 899515SJonathan.Adams@Sun.COM enum zti_modes zti_mode; 909515SJonathan.Adams@Sun.COM uint_t zti_value; 919515SJonathan.Adams@Sun.COM } zti_nthreads[ZIO_TASKQ_TYPES]; 929515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 939515SJonathan.Adams@Sun.COM 949515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 959515SJonathan.Adams@Sun.COM "issue", "intr" 969515SJonathan.Adams@Sun.COM }; 979515SJonathan.Adams@Sun.COM 989515SJonathan.Adams@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 999515SJonathan.Adams@Sun.COM /* ISSUE INTR */ 1009515SJonathan.Adams@Sun.COM { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1019515SJonathan.Adams@Sun.COM { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1029515SJonathan.Adams@Sun.COM { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1039515SJonathan.Adams@Sun.COM { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1049515SJonathan.Adams@Sun.COM { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1059515SJonathan.Adams@Sun.COM { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1069515SJonathan.Adams@Sun.COM }; 1079515SJonathan.Adams@Sun.COM 1089515SJonathan.Adams@Sun.COM enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1099515SJonathan.Adams@Sun.COM uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1109515SJonathan.Adams@Sun.COM 1115094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1127214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1135094Slling 1145094Slling /* 1155094Slling * ========================================================================== 1165094Slling * SPA properties routines 1175094Slling * ========================================================================== 1185094Slling */ 1195094Slling 1205094Slling /* 1215094Slling * Add a (source=src, propname=propval) list to an nvlist. 1225094Slling */ 1235949Slling static void 1245094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1255094Slling uint64_t intval, zprop_source_t src) 1265094Slling { 1275094Slling const char *propname = zpool_prop_to_name(prop); 1285094Slling nvlist_t *propval; 1295949Slling 1305949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1315949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1325949Slling 1335949Slling if (strval != NULL) 1345949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1355949Slling else 1365949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1375949Slling 1385949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1395094Slling nvlist_free(propval); 1405094Slling } 1415094Slling 1425094Slling /* 1435094Slling * Get property values from the spa configuration. 1445094Slling */ 1455949Slling static void 1465094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1475094Slling { 1488525SEric.Schrock@Sun.COM uint64_t size; 1498525SEric.Schrock@Sun.COM uint64_t used; 1505094Slling uint64_t cap, version; 1515094Slling zprop_source_t src = ZPROP_SRC_NONE; 1526643Seschrock spa_config_dirent_t *dp; 1535094Slling 1547754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1557754SJeff.Bonwick@Sun.COM 1568525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 1578525SEric.Schrock@Sun.COM size = spa_get_space(spa); 1588525SEric.Schrock@Sun.COM used = spa_get_alloc(spa); 1598525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1608525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 1618525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 1628525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 1638525SEric.Schrock@Sun.COM size - used, src); 1648525SEric.Schrock@Sun.COM 1658525SEric.Schrock@Sun.COM cap = (size == 0) ? 0 : (used * 100 / size); 1668525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1678525SEric.Schrock@Sun.COM 1688525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1698525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1708525SEric.Schrock@Sun.COM 1718525SEric.Schrock@Sun.COM version = spa_version(spa); 1728525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1738525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1748525SEric.Schrock@Sun.COM else 1758525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1768525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1778525SEric.Schrock@Sun.COM } 1785949Slling 1795949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1805949Slling 1815949Slling if (spa->spa_root != NULL) 1825949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1835949Slling 0, ZPROP_SRC_LOCAL); 1845094Slling 1856643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 1866643Seschrock if (dp->scd_path == NULL) { 1875949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1886643Seschrock "none", 0, ZPROP_SRC_LOCAL); 1896643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1905949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1916643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1925363Seschrock } 1935363Seschrock } 1945094Slling } 1955094Slling 1965094Slling /* 1975094Slling * Get zpool property values. 1985094Slling */ 1995094Slling int 2005094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 2015094Slling { 2025094Slling zap_cursor_t zc; 2035094Slling zap_attribute_t za; 2045094Slling objset_t *mos = spa->spa_meta_objset; 2055094Slling int err; 2065094Slling 2075949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2085094Slling 2097754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2107754SJeff.Bonwick@Sun.COM 2115094Slling /* 2125094Slling * Get properties from the spa config. 2135094Slling */ 2145949Slling spa_prop_get_config(spa, nvp); 2155094Slling 2165094Slling /* If no pool property object, no more prop to get. */ 2175094Slling if (spa->spa_pool_props_object == 0) { 2185094Slling mutex_exit(&spa->spa_props_lock); 2195094Slling return (0); 2205094Slling } 2215094Slling 2225094Slling /* 2235094Slling * Get properties from the MOS pool property object. 2245094Slling */ 2255094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2265094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2275094Slling zap_cursor_advance(&zc)) { 2285094Slling uint64_t intval = 0; 2295094Slling char *strval = NULL; 2305094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2315094Slling zpool_prop_t prop; 2325094Slling 2335094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2345094Slling continue; 2355094Slling 2365094Slling switch (za.za_integer_length) { 2375094Slling case 8: 2385094Slling /* integer property */ 2395094Slling if (za.za_first_integer != 2405094Slling zpool_prop_default_numeric(prop)) 2415094Slling src = ZPROP_SRC_LOCAL; 2425094Slling 2435094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2445094Slling dsl_pool_t *dp; 2455094Slling dsl_dataset_t *ds = NULL; 2465094Slling 2475094Slling dp = spa_get_dsl(spa); 2485094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2496689Smaybee if (err = dsl_dataset_hold_obj(dp, 2506689Smaybee za.za_first_integer, FTAG, &ds)) { 2515094Slling rw_exit(&dp->dp_config_rwlock); 2525094Slling break; 2535094Slling } 2545094Slling 2555094Slling strval = kmem_alloc( 2565094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2575094Slling KM_SLEEP); 2585094Slling dsl_dataset_name(ds, strval); 2596689Smaybee dsl_dataset_rele(ds, FTAG); 2605094Slling rw_exit(&dp->dp_config_rwlock); 2615094Slling } else { 2625094Slling strval = NULL; 2635094Slling intval = za.za_first_integer; 2645094Slling } 2655094Slling 2665949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2675094Slling 2685094Slling if (strval != NULL) 2695094Slling kmem_free(strval, 2705094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2715094Slling 2725094Slling break; 2735094Slling 2745094Slling case 1: 2755094Slling /* string property */ 2765094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2775094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2785094Slling za.za_name, 1, za.za_num_integers, strval); 2795094Slling if (err) { 2805094Slling kmem_free(strval, za.za_num_integers); 2815094Slling break; 2825094Slling } 2835949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2845094Slling kmem_free(strval, za.za_num_integers); 2855094Slling break; 2865094Slling 2875094Slling default: 2885094Slling break; 2895094Slling } 2905094Slling } 2915094Slling zap_cursor_fini(&zc); 2925094Slling mutex_exit(&spa->spa_props_lock); 2935094Slling out: 2945094Slling if (err && err != ENOENT) { 2955094Slling nvlist_free(*nvp); 2965949Slling *nvp = NULL; 2975094Slling return (err); 2985094Slling } 2995094Slling 3005094Slling return (0); 3015094Slling } 3025094Slling 3035094Slling /* 3045094Slling * Validate the given pool properties nvlist and modify the list 3055094Slling * for the property values to be set. 3065094Slling */ 3075094Slling static int 3085094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3095094Slling { 3105094Slling nvpair_t *elem; 3115094Slling int error = 0, reset_bootfs = 0; 3125094Slling uint64_t objnum; 3135094Slling 3145094Slling elem = NULL; 3155094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3165094Slling zpool_prop_t prop; 3175094Slling char *propname, *strval; 3185094Slling uint64_t intval; 3195094Slling objset_t *os; 3205363Seschrock char *slash; 3215094Slling 3225094Slling propname = nvpair_name(elem); 3235094Slling 3245094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3255094Slling return (EINVAL); 3265094Slling 3275094Slling switch (prop) { 3285094Slling case ZPOOL_PROP_VERSION: 3295094Slling error = nvpair_value_uint64(elem, &intval); 3305094Slling if (!error && 3315094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3325094Slling error = EINVAL; 3335094Slling break; 3345094Slling 3355094Slling case ZPOOL_PROP_DELEGATION: 3365094Slling case ZPOOL_PROP_AUTOREPLACE: 3377538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3389816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 3395094Slling error = nvpair_value_uint64(elem, &intval); 3405094Slling if (!error && intval > 1) 3415094Slling error = EINVAL; 3425094Slling break; 3435094Slling 3445094Slling case ZPOOL_PROP_BOOTFS: 3459630SJeff.Bonwick@Sun.COM /* 3469630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3479630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3489630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3499630SJeff.Bonwick@Sun.COM */ 3505094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3515094Slling error = ENOTSUP; 3525094Slling break; 3535094Slling } 3545094Slling 3555094Slling /* 3567042Sgw25295 * Make sure the vdev config is bootable 3575094Slling */ 3587042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3595094Slling error = ENOTSUP; 3605094Slling break; 3615094Slling } 3625094Slling 3635094Slling reset_bootfs = 1; 3645094Slling 3655094Slling error = nvpair_value_string(elem, &strval); 3665094Slling 3675094Slling if (!error) { 3687042Sgw25295 uint64_t compress; 3697042Sgw25295 3705094Slling if (strval == NULL || strval[0] == '\0') { 3715094Slling objnum = zpool_prop_default_numeric( 3725094Slling ZPOOL_PROP_BOOTFS); 3735094Slling break; 3745094Slling } 3755094Slling 37610298SMatthew.Ahrens@Sun.COM if (error = dmu_objset_hold(strval, FTAG, &os)) 3775094Slling break; 3787042Sgw25295 37910298SMatthew.Ahrens@Sun.COM /* Must be ZPL and not gzip compressed. */ 38010298SMatthew.Ahrens@Sun.COM 38110298SMatthew.Ahrens@Sun.COM if (dmu_objset_type(os) != DMU_OST_ZFS) { 38210298SMatthew.Ahrens@Sun.COM error = ENOTSUP; 38310298SMatthew.Ahrens@Sun.COM } else if ((error = dsl_prop_get_integer(strval, 3847042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 3857042Sgw25295 &compress, NULL)) == 0 && 3867042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 3877042Sgw25295 error = ENOTSUP; 3887042Sgw25295 } else { 3897042Sgw25295 objnum = dmu_objset_id(os); 3907042Sgw25295 } 39110298SMatthew.Ahrens@Sun.COM dmu_objset_rele(os, FTAG); 3925094Slling } 3935094Slling break; 3947754SJeff.Bonwick@Sun.COM 3955329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3965329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3975329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3985329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3995329Sgw25295 error = EINVAL; 4005329Sgw25295 4015329Sgw25295 /* 4025329Sgw25295 * This is a special case which only occurs when 4035329Sgw25295 * the pool has completely failed. This allows 4045329Sgw25295 * the user to change the in-core failmode property 4055329Sgw25295 * without syncing it out to disk (I/Os might 4065329Sgw25295 * currently be blocked). We do this by returning 4075329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4085329Sgw25295 * into thinking we encountered a property validation 4095329Sgw25295 * error. 4105329Sgw25295 */ 4117754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4125329Sgw25295 spa->spa_failmode = intval; 4135329Sgw25295 error = EIO; 4145329Sgw25295 } 4155329Sgw25295 break; 4165363Seschrock 4175363Seschrock case ZPOOL_PROP_CACHEFILE: 4185363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4195363Seschrock break; 4205363Seschrock 4215363Seschrock if (strval[0] == '\0') 4225363Seschrock break; 4235363Seschrock 4245363Seschrock if (strcmp(strval, "none") == 0) 4255363Seschrock break; 4265363Seschrock 4275363Seschrock if (strval[0] != '/') { 4285363Seschrock error = EINVAL; 4295363Seschrock break; 4305363Seschrock } 4315363Seschrock 4325363Seschrock slash = strrchr(strval, '/'); 4335363Seschrock ASSERT(slash != NULL); 4345363Seschrock 4355363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4365363Seschrock strcmp(slash, "/..") == 0) 4375363Seschrock error = EINVAL; 4385363Seschrock break; 4395094Slling } 4405094Slling 4415094Slling if (error) 4425094Slling break; 4435094Slling } 4445094Slling 4455094Slling if (!error && reset_bootfs) { 4465094Slling error = nvlist_remove(props, 4475094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4485094Slling 4495094Slling if (!error) { 4505094Slling error = nvlist_add_uint64(props, 4515094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4525094Slling } 4535094Slling } 4545094Slling 4555094Slling return (error); 4565094Slling } 4575094Slling 4588525SEric.Schrock@Sun.COM void 4598525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4608525SEric.Schrock@Sun.COM { 4618525SEric.Schrock@Sun.COM char *cachefile; 4628525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4638525SEric.Schrock@Sun.COM 4648525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4658525SEric.Schrock@Sun.COM &cachefile) != 0) 4668525SEric.Schrock@Sun.COM return; 4678525SEric.Schrock@Sun.COM 4688525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4698525SEric.Schrock@Sun.COM KM_SLEEP); 4708525SEric.Schrock@Sun.COM 4718525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 4728525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 4738525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 4748525SEric.Schrock@Sun.COM dp->scd_path = NULL; 4758525SEric.Schrock@Sun.COM else 4768525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 4778525SEric.Schrock@Sun.COM 4788525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 4798525SEric.Schrock@Sun.COM if (need_sync) 4808525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 4818525SEric.Schrock@Sun.COM } 4828525SEric.Schrock@Sun.COM 4835094Slling int 4845094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4855094Slling { 4865094Slling int error; 4878525SEric.Schrock@Sun.COM nvpair_t *elem; 4888525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 4898525SEric.Schrock@Sun.COM zpool_prop_t prop; 4905094Slling 4915094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4925094Slling return (error); 4935094Slling 4948525SEric.Schrock@Sun.COM elem = NULL; 4958525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 4968525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 4978525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 4988525SEric.Schrock@Sun.COM return (EINVAL); 4998525SEric.Schrock@Sun.COM 5008525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 5018525SEric.Schrock@Sun.COM continue; 5028525SEric.Schrock@Sun.COM 5038525SEric.Schrock@Sun.COM need_sync = B_TRUE; 5048525SEric.Schrock@Sun.COM break; 5058525SEric.Schrock@Sun.COM } 5068525SEric.Schrock@Sun.COM 5078525SEric.Schrock@Sun.COM if (need_sync) 5088525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5098525SEric.Schrock@Sun.COM spa, nvp, 3)); 5108525SEric.Schrock@Sun.COM else 5118525SEric.Schrock@Sun.COM return (0); 5125094Slling } 5135094Slling 5145094Slling /* 5155094Slling * If the bootfs property value is dsobj, clear it. 5165094Slling */ 5175094Slling void 5185094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5195094Slling { 5205094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5215094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5225094Slling spa->spa_pool_props_object, 5235094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5245094Slling spa->spa_bootfs = 0; 5255094Slling } 5265094Slling } 5275094Slling 528789Sahrens /* 529789Sahrens * ========================================================================== 530789Sahrens * SPA state manipulation (open/create/destroy/import/export) 531789Sahrens * ========================================================================== 532789Sahrens */ 533789Sahrens 5341544Seschrock static int 5351544Seschrock spa_error_entry_compare(const void *a, const void *b) 5361544Seschrock { 5371544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5381544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5391544Seschrock int ret; 5401544Seschrock 5411544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5421544Seschrock sizeof (zbookmark_t)); 5431544Seschrock 5441544Seschrock if (ret < 0) 5451544Seschrock return (-1); 5461544Seschrock else if (ret > 0) 5471544Seschrock return (1); 5481544Seschrock else 5491544Seschrock return (0); 5501544Seschrock } 5511544Seschrock 5521544Seschrock /* 5531544Seschrock * Utility function which retrieves copies of the current logs and 5541544Seschrock * re-initializes them in the process. 5551544Seschrock */ 5561544Seschrock void 5571544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5581544Seschrock { 5591544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5601544Seschrock 5611544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5621544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5631544Seschrock 5641544Seschrock avl_create(&spa->spa_errlist_scrub, 5651544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5661544Seschrock offsetof(spa_error_entry_t, se_avl)); 5671544Seschrock avl_create(&spa->spa_errlist_last, 5681544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5691544Seschrock offsetof(spa_error_entry_t, se_avl)); 5701544Seschrock } 5711544Seschrock 572789Sahrens /* 573789Sahrens * Activate an uninitialized pool. 574789Sahrens */ 575789Sahrens static void 5768241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 577789Sahrens { 578789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 579789Sahrens 580789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5818241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 582789Sahrens 58310594SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 58410594SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 585789Sahrens 5867754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 5879515SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t]; 5887754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 5899515SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 5909515SJonathan.Adams@Sun.COM uint_t value = ztip->zti_nthreads[q].zti_value; 5919515SJonathan.Adams@Sun.COM char name[32]; 5929515SJonathan.Adams@Sun.COM 5939515SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 5949515SJonathan.Adams@Sun.COM "%s_%s", ztip->zti_name, zio_taskq_types[q]); 5959515SJonathan.Adams@Sun.COM 5969515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) { 5979515SJonathan.Adams@Sun.COM mode = zio_taskq_tune_mode; 5989515SJonathan.Adams@Sun.COM value = zio_taskq_tune_value; 5999515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) 6009515SJonathan.Adams@Sun.COM mode = zti_mode_online_percent; 6019515SJonathan.Adams@Sun.COM } 6029515SJonathan.Adams@Sun.COM 6039515SJonathan.Adams@Sun.COM switch (mode) { 6049515SJonathan.Adams@Sun.COM case zti_mode_fixed: 6059515SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 6069515SJonathan.Adams@Sun.COM value = MAX(value, 1); 6079515SJonathan.Adams@Sun.COM 6089515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6099515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6109515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE); 6119515SJonathan.Adams@Sun.COM break; 6129515SJonathan.Adams@Sun.COM 6139515SJonathan.Adams@Sun.COM case zti_mode_online_percent: 6149515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6159515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6169515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6179515SJonathan.Adams@Sun.COM break; 6189515SJonathan.Adams@Sun.COM 6199515SJonathan.Adams@Sun.COM case zti_mode_tune: 6209515SJonathan.Adams@Sun.COM default: 6219515SJonathan.Adams@Sun.COM panic("unrecognized mode for " 6229515SJonathan.Adams@Sun.COM "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6239515SJonathan.Adams@Sun.COM "in spa_activate()", 6249515SJonathan.Adams@Sun.COM t, q, mode, value); 6259515SJonathan.Adams@Sun.COM break; 6269515SJonathan.Adams@Sun.COM } 6277754SJeff.Bonwick@Sun.COM } 628789Sahrens } 629789Sahrens 6307754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 6317754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 6327754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 6337754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 634789Sahrens 635789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 636789Sahrens offsetof(struct vdev, vdev_txg_node)); 6371544Seschrock 6381544Seschrock avl_create(&spa->spa_errlist_scrub, 6391544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6401544Seschrock offsetof(spa_error_entry_t, se_avl)); 6411544Seschrock avl_create(&spa->spa_errlist_last, 6421544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6431544Seschrock offsetof(spa_error_entry_t, se_avl)); 644789Sahrens } 645789Sahrens 646789Sahrens /* 647789Sahrens * Opposite of spa_activate(). 648789Sahrens */ 649789Sahrens static void 650789Sahrens spa_deactivate(spa_t *spa) 651789Sahrens { 652789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 653789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 654789Sahrens ASSERT(spa->spa_root_vdev == NULL); 6559630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 656789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 657789Sahrens 658789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 659789Sahrens 6607754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 6617754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 6627754SJeff.Bonwick@Sun.COM 6637754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 6647754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6657754SJeff.Bonwick@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 6667754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 6677754SJeff.Bonwick@Sun.COM } 668789Sahrens } 669789Sahrens 670789Sahrens metaslab_class_destroy(spa->spa_normal_class); 671789Sahrens spa->spa_normal_class = NULL; 672789Sahrens 6734527Sperrin metaslab_class_destroy(spa->spa_log_class); 6744527Sperrin spa->spa_log_class = NULL; 6754527Sperrin 6761544Seschrock /* 6771544Seschrock * If this was part of an import or the open otherwise failed, we may 6781544Seschrock * still have errors left in the queues. Empty them just in case. 6791544Seschrock */ 6801544Seschrock spa_errlog_drain(spa); 6811544Seschrock 6821544Seschrock avl_destroy(&spa->spa_errlist_scrub); 6831544Seschrock avl_destroy(&spa->spa_errlist_last); 6841544Seschrock 685789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 686789Sahrens } 687789Sahrens 688789Sahrens /* 689789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 690789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 691789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 692789Sahrens * All vdev validation is done by the vdev_alloc() routine. 693789Sahrens */ 6942082Seschrock static int 6952082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 6962082Seschrock uint_t id, int atype) 697789Sahrens { 698789Sahrens nvlist_t **child; 6999816SGeorge.Wilson@Sun.COM uint_t children; 7002082Seschrock int error; 7012082Seschrock 7022082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 7032082Seschrock return (error); 7042082Seschrock 7052082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 7062082Seschrock return (0); 707789Sahrens 7087754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 7097754SJeff.Bonwick@Sun.COM &child, &children); 7107754SJeff.Bonwick@Sun.COM 7117754SJeff.Bonwick@Sun.COM if (error == ENOENT) 7127754SJeff.Bonwick@Sun.COM return (0); 7137754SJeff.Bonwick@Sun.COM 7147754SJeff.Bonwick@Sun.COM if (error) { 7152082Seschrock vdev_free(*vdp); 7162082Seschrock *vdp = NULL; 7172082Seschrock return (EINVAL); 718789Sahrens } 719789Sahrens 7209816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 7212082Seschrock vdev_t *vd; 7222082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 7232082Seschrock atype)) != 0) { 7242082Seschrock vdev_free(*vdp); 7252082Seschrock *vdp = NULL; 7262082Seschrock return (error); 727789Sahrens } 728789Sahrens } 729789Sahrens 7302082Seschrock ASSERT(*vdp != NULL); 7312082Seschrock 7322082Seschrock return (0); 733789Sahrens } 734789Sahrens 735789Sahrens /* 736789Sahrens * Opposite of spa_load(). 737789Sahrens */ 738789Sahrens static void 739789Sahrens spa_unload(spa_t *spa) 740789Sahrens { 7412082Seschrock int i; 7422082Seschrock 7437754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 7447754SJeff.Bonwick@Sun.COM 745789Sahrens /* 7461544Seschrock * Stop async tasks. 7471544Seschrock */ 7481544Seschrock spa_async_suspend(spa); 7491544Seschrock 7501544Seschrock /* 751789Sahrens * Stop syncing. 752789Sahrens */ 753789Sahrens if (spa->spa_sync_on) { 754789Sahrens txg_sync_stop(spa->spa_dsl_pool); 755789Sahrens spa->spa_sync_on = B_FALSE; 756789Sahrens } 757789Sahrens 758789Sahrens /* 7597754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 760789Sahrens */ 7619234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 7629234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 7639234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 7649234SGeorge.Wilson@Sun.COM } 765789Sahrens 766789Sahrens /* 767789Sahrens * Close the dsl pool. 768789Sahrens */ 769789Sahrens if (spa->spa_dsl_pool) { 770789Sahrens dsl_pool_close(spa->spa_dsl_pool); 771789Sahrens spa->spa_dsl_pool = NULL; 772789Sahrens } 773789Sahrens 7748241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7758241SJeff.Bonwick@Sun.COM 7768241SJeff.Bonwick@Sun.COM /* 7778241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 7788241SJeff.Bonwick@Sun.COM */ 7798241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 7808241SJeff.Bonwick@Sun.COM 781789Sahrens /* 782789Sahrens * Close all vdevs. 783789Sahrens */ 7841585Sbonwick if (spa->spa_root_vdev) 785789Sahrens vdev_free(spa->spa_root_vdev); 7861585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 7871544Seschrock 7885450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7895450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 7905450Sbrendan if (spa->spa_spares.sav_vdevs) { 7915450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7925450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7935450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7945450Sbrendan } 7955450Sbrendan if (spa->spa_spares.sav_config) { 7965450Sbrendan nvlist_free(spa->spa_spares.sav_config); 7975450Sbrendan spa->spa_spares.sav_config = NULL; 7982082Seschrock } 7997377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 8005450Sbrendan 8015450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 8025450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 8035450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 8045450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 8055450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 8065450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 8075450Sbrendan } 8085450Sbrendan if (spa->spa_l2cache.sav_config) { 8095450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 8105450Sbrendan spa->spa_l2cache.sav_config = NULL; 8112082Seschrock } 8127377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 8132082Seschrock 8141544Seschrock spa->spa_async_suspended = 0; 8158241SJeff.Bonwick@Sun.COM 8168241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 817789Sahrens } 818789Sahrens 819789Sahrens /* 8202082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 8212082Seschrock * this pool. When this is called, we have some form of basic information in 8225450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 8235450Sbrendan * then re-generate a more complete list including status information. 8242082Seschrock */ 8252082Seschrock static void 8262082Seschrock spa_load_spares(spa_t *spa) 8272082Seschrock { 8282082Seschrock nvlist_t **spares; 8292082Seschrock uint_t nspares; 8302082Seschrock int i; 8313377Seschrock vdev_t *vd, *tvd; 8322082Seschrock 8337754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 8347754SJeff.Bonwick@Sun.COM 8352082Seschrock /* 8362082Seschrock * First, close and free any existing spare vdevs. 8372082Seschrock */ 8385450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8395450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 8403377Seschrock 8413377Seschrock /* Undo the call to spa_activate() below */ 8426643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8436643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 8443377Seschrock spa_spare_remove(tvd); 8453377Seschrock vdev_close(vd); 8463377Seschrock vdev_free(vd); 8472082Seschrock } 8483377Seschrock 8495450Sbrendan if (spa->spa_spares.sav_vdevs) 8505450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8515450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8525450Sbrendan 8535450Sbrendan if (spa->spa_spares.sav_config == NULL) 8542082Seschrock nspares = 0; 8552082Seschrock else 8565450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 8572082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8582082Seschrock 8595450Sbrendan spa->spa_spares.sav_count = (int)nspares; 8605450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8612082Seschrock 8622082Seschrock if (nspares == 0) 8632082Seschrock return; 8642082Seschrock 8652082Seschrock /* 8662082Seschrock * Construct the array of vdevs, opening them to get status in the 8673377Seschrock * process. For each spare, there is potentially two different vdev_t 8683377Seschrock * structures associated with it: one in the list of spares (used only 8693377Seschrock * for basic validation purposes) and one in the active vdev 8703377Seschrock * configuration (if it's spared in). During this phase we open and 8713377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 8723377Seschrock * active configuration, then we also mark this vdev as an active spare. 8732082Seschrock */ 8745450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 8755450Sbrendan KM_SLEEP); 8765450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8772082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 8782082Seschrock VDEV_ALLOC_SPARE) == 0); 8792082Seschrock ASSERT(vd != NULL); 8802082Seschrock 8815450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 8822082Seschrock 8836643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8846643Seschrock B_FALSE)) != NULL) { 8853377Seschrock if (!tvd->vdev_isspare) 8863377Seschrock spa_spare_add(tvd); 8873377Seschrock 8883377Seschrock /* 8893377Seschrock * We only mark the spare active if we were successfully 8903377Seschrock * able to load the vdev. Otherwise, importing a pool 8913377Seschrock * with a bad active spare would result in strange 8923377Seschrock * behavior, because multiple pool would think the spare 8933377Seschrock * is actively in use. 8943377Seschrock * 8953377Seschrock * There is a vulnerability here to an equally bizarre 8963377Seschrock * circumstance, where a dead active spare is later 8973377Seschrock * brought back to life (onlined or otherwise). Given 8983377Seschrock * the rarity of this scenario, and the extra complexity 8993377Seschrock * it adds, we ignore the possibility. 9003377Seschrock */ 9013377Seschrock if (!vdev_is_dead(tvd)) 9023377Seschrock spa_spare_activate(tvd); 9033377Seschrock } 9043377Seschrock 9057754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 9069425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 9077754SJeff.Bonwick@Sun.COM 9082082Seschrock if (vdev_open(vd) != 0) 9092082Seschrock continue; 9102082Seschrock 9115450Sbrendan if (vdev_validate_aux(vd) == 0) 9125450Sbrendan spa_spare_add(vd); 9132082Seschrock } 9142082Seschrock 9152082Seschrock /* 9162082Seschrock * Recompute the stashed list of spares, with status information 9172082Seschrock * this time. 9182082Seschrock */ 9195450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 9202082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 9212082Seschrock 9225450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 9235450Sbrendan KM_SLEEP); 9245450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9255450Sbrendan spares[i] = vdev_config_generate(spa, 9265450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 9275450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 9285450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 9295450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9302082Seschrock nvlist_free(spares[i]); 9315450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 9325450Sbrendan } 9335450Sbrendan 9345450Sbrendan /* 9355450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 9365450Sbrendan * this pool. When this is called, we have some form of basic information in 9375450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 9385450Sbrendan * then re-generate a more complete list including status information. 9395450Sbrendan * Devices which are already active have their details maintained, and are 9405450Sbrendan * not re-opened. 9415450Sbrendan */ 9425450Sbrendan static void 9435450Sbrendan spa_load_l2cache(spa_t *spa) 9445450Sbrendan { 9455450Sbrendan nvlist_t **l2cache; 9465450Sbrendan uint_t nl2cache; 9475450Sbrendan int i, j, oldnvdevs; 9489816SGeorge.Wilson@Sun.COM uint64_t guid; 9495450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 9505450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 9515450Sbrendan 9527754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 9537754SJeff.Bonwick@Sun.COM 9545450Sbrendan if (sav->sav_config != NULL) { 9555450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 9565450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 9575450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 9585450Sbrendan } else { 9595450Sbrendan nl2cache = 0; 9605450Sbrendan } 9615450Sbrendan 9625450Sbrendan oldvdevs = sav->sav_vdevs; 9635450Sbrendan oldnvdevs = sav->sav_count; 9645450Sbrendan sav->sav_vdevs = NULL; 9655450Sbrendan sav->sav_count = 0; 9665450Sbrendan 9675450Sbrendan /* 9685450Sbrendan * Process new nvlist of vdevs. 9695450Sbrendan */ 9705450Sbrendan for (i = 0; i < nl2cache; i++) { 9715450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 9725450Sbrendan &guid) == 0); 9735450Sbrendan 9745450Sbrendan newvdevs[i] = NULL; 9755450Sbrendan for (j = 0; j < oldnvdevs; j++) { 9765450Sbrendan vd = oldvdevs[j]; 9775450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 9785450Sbrendan /* 9795450Sbrendan * Retain previous vdev for add/remove ops. 9805450Sbrendan */ 9815450Sbrendan newvdevs[i] = vd; 9825450Sbrendan oldvdevs[j] = NULL; 9835450Sbrendan break; 9845450Sbrendan } 9855450Sbrendan } 9865450Sbrendan 9875450Sbrendan if (newvdevs[i] == NULL) { 9885450Sbrendan /* 9895450Sbrendan * Create new vdev 9905450Sbrendan */ 9915450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 9925450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 9935450Sbrendan ASSERT(vd != NULL); 9945450Sbrendan newvdevs[i] = vd; 9955450Sbrendan 9965450Sbrendan /* 9975450Sbrendan * Commit this vdev as an l2cache device, 9985450Sbrendan * even if it fails to open. 9995450Sbrendan */ 10005450Sbrendan spa_l2cache_add(vd); 10015450Sbrendan 10026643Seschrock vd->vdev_top = vd; 10036643Seschrock vd->vdev_aux = sav; 10046643Seschrock 10056643Seschrock spa_l2cache_activate(vd); 10066643Seschrock 10075450Sbrendan if (vdev_open(vd) != 0) 10085450Sbrendan continue; 10095450Sbrendan 10105450Sbrendan (void) vdev_validate_aux(vd); 10115450Sbrendan 10129816SGeorge.Wilson@Sun.COM if (!vdev_is_dead(vd)) 10139816SGeorge.Wilson@Sun.COM l2arc_add_vdev(spa, vd); 10145450Sbrendan } 10155450Sbrendan } 10165450Sbrendan 10175450Sbrendan /* 10185450Sbrendan * Purge vdevs that were dropped 10195450Sbrendan */ 10205450Sbrendan for (i = 0; i < oldnvdevs; i++) { 10215450Sbrendan uint64_t pool; 10225450Sbrendan 10235450Sbrendan vd = oldvdevs[i]; 10245450Sbrendan if (vd != NULL) { 10258241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10268241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 10275450Sbrendan l2arc_remove_vdev(vd); 10285450Sbrendan (void) vdev_close(vd); 10295450Sbrendan spa_l2cache_remove(vd); 10305450Sbrendan } 10315450Sbrendan } 10325450Sbrendan 10335450Sbrendan if (oldvdevs) 10345450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 10355450Sbrendan 10365450Sbrendan if (sav->sav_config == NULL) 10375450Sbrendan goto out; 10385450Sbrendan 10395450Sbrendan sav->sav_vdevs = newvdevs; 10405450Sbrendan sav->sav_count = (int)nl2cache; 10415450Sbrendan 10425450Sbrendan /* 10435450Sbrendan * Recompute the stashed list of l2cache devices, with status 10445450Sbrendan * information this time. 10455450Sbrendan */ 10465450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 10475450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 10485450Sbrendan 10495450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 10505450Sbrendan for (i = 0; i < sav->sav_count; i++) 10515450Sbrendan l2cache[i] = vdev_config_generate(spa, 10525450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 10535450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 10545450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 10555450Sbrendan out: 10565450Sbrendan for (i = 0; i < sav->sav_count; i++) 10575450Sbrendan nvlist_free(l2cache[i]); 10585450Sbrendan if (sav->sav_count) 10595450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 10602082Seschrock } 10612082Seschrock 10622082Seschrock static int 10632082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 10642082Seschrock { 10652082Seschrock dmu_buf_t *db; 10662082Seschrock char *packed = NULL; 10672082Seschrock size_t nvsize = 0; 10682082Seschrock int error; 10692082Seschrock *value = NULL; 10702082Seschrock 10712082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 10722082Seschrock nvsize = *(uint64_t *)db->db_data; 10732082Seschrock dmu_buf_rele(db, FTAG); 10742082Seschrock 10752082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10769512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10779512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 10782082Seschrock if (error == 0) 10792082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 10802082Seschrock kmem_free(packed, nvsize); 10812082Seschrock 10822082Seschrock return (error); 10832082Seschrock } 10842082Seschrock 10852082Seschrock /* 10864451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10874451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10884451Seschrock */ 10894451Seschrock static void 10904451Seschrock spa_check_removed(vdev_t *vd) 10914451Seschrock { 10929816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 10934451Seschrock spa_check_removed(vd->vdev_child[c]); 10944451Seschrock 10954451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10964451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10974451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10984451Seschrock } 10994451Seschrock } 11004451Seschrock 11014451Seschrock /* 11029701SGeorge.Wilson@Sun.COM * Load the slog device state from the config object since it's possible 11039701SGeorge.Wilson@Sun.COM * that the label does not contain the most up-to-date information. 11049701SGeorge.Wilson@Sun.COM */ 11059701SGeorge.Wilson@Sun.COM void 110610594SGeorge.Wilson@Sun.COM spa_load_log_state(spa_t *spa, nvlist_t *nv) 11079701SGeorge.Wilson@Sun.COM { 110810594SGeorge.Wilson@Sun.COM vdev_t *ovd, *rvd = spa->spa_root_vdev; 110910594SGeorge.Wilson@Sun.COM 111010594SGeorge.Wilson@Sun.COM /* 111110594SGeorge.Wilson@Sun.COM * Load the original root vdev tree from the passed config. 111210594SGeorge.Wilson@Sun.COM */ 111310594SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 111410594SGeorge.Wilson@Sun.COM VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 111510594SGeorge.Wilson@Sun.COM 111610594SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 111710594SGeorge.Wilson@Sun.COM vdev_t *cvd = rvd->vdev_child[c]; 111810594SGeorge.Wilson@Sun.COM if (cvd->vdev_islog) 111910594SGeorge.Wilson@Sun.COM vdev_load_log_state(cvd, ovd->vdev_child[c]); 11209701SGeorge.Wilson@Sun.COM } 112110594SGeorge.Wilson@Sun.COM vdev_free(ovd); 112210594SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 11239701SGeorge.Wilson@Sun.COM } 11249701SGeorge.Wilson@Sun.COM 11259701SGeorge.Wilson@Sun.COM /* 11267294Sperrin * Check for missing log devices 11277294Sperrin */ 11287294Sperrin int 11297294Sperrin spa_check_logs(spa_t *spa) 11307294Sperrin { 11317294Sperrin switch (spa->spa_log_state) { 11327294Sperrin case SPA_LOG_MISSING: 11337294Sperrin /* need to recheck in case slog has been restored */ 11347294Sperrin case SPA_LOG_UNKNOWN: 11357294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 11367294Sperrin DS_FIND_CHILDREN)) { 11377294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 11387294Sperrin return (1); 11397294Sperrin } 11407294Sperrin break; 11417294Sperrin } 11427294Sperrin return (0); 11437294Sperrin } 11447294Sperrin 114510672SEric.Schrock@Sun.COM static void 114610672SEric.Schrock@Sun.COM spa_aux_check_removed(spa_aux_vdev_t *sav) 114710672SEric.Schrock@Sun.COM { 114810672SEric.Schrock@Sun.COM int i; 114910672SEric.Schrock@Sun.COM 115010672SEric.Schrock@Sun.COM for (i = 0; i < sav->sav_count; i++) 115110672SEric.Schrock@Sun.COM spa_check_removed(sav->sav_vdevs[i]); 115210672SEric.Schrock@Sun.COM } 115310672SEric.Schrock@Sun.COM 11547294Sperrin /* 1155789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 11561544Seschrock * source of configuration information. 1157789Sahrens */ 1158789Sahrens static int 11591544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1160789Sahrens { 1161789Sahrens int error = 0; 116210594SGeorge.Wilson@Sun.COM nvlist_t *nvconfig, *nvroot = NULL; 1163789Sahrens vdev_t *rvd; 1164789Sahrens uberblock_t *ub = &spa->spa_uberblock; 11651635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1166789Sahrens uint64_t pool_guid; 11672082Seschrock uint64_t version; 11684451Seschrock uint64_t autoreplace = 0; 11698241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 11707294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1171789Sahrens 11728241SJeff.Bonwick@Sun.COM /* 11738241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 11748241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 11758241SJeff.Bonwick@Sun.COM */ 11768241SJeff.Bonwick@Sun.COM if (!mosconfig) 11778241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 11788241SJeff.Bonwick@Sun.COM 11797754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 11807754SJeff.Bonwick@Sun.COM 11811544Seschrock spa->spa_load_state = state; 11821635Sbonwick 1183789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 11841733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 11851544Seschrock error = EINVAL; 11861544Seschrock goto out; 11871544Seschrock } 1188789Sahrens 11892082Seschrock /* 11902082Seschrock * Versioning wasn't explicitly added to the label until later, so if 11912082Seschrock * it's not present treat it as the initial version. 11922082Seschrock */ 11932082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 11944577Sahrens version = SPA_VERSION_INITIAL; 11952082Seschrock 11961733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 11971733Sbonwick &spa->spa_config_txg); 11981733Sbonwick 11991635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 12001544Seschrock spa_guid_exists(pool_guid, 0)) { 12011544Seschrock error = EEXIST; 12021544Seschrock goto out; 12031544Seschrock } 1204789Sahrens 12052174Seschrock spa->spa_load_guid = pool_guid; 12062174Seschrock 1207789Sahrens /* 12089234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 12099234SGeorge.Wilson@Sun.COM */ 12109630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 12119630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 12129234SGeorge.Wilson@Sun.COM 12139234SGeorge.Wilson@Sun.COM /* 12142082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 12152082Seschrock * value that will be returned by spa_version() since parsing the 12162082Seschrock * configuration requires knowing the version number. 1217789Sahrens */ 12187754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12192082Seschrock spa->spa_ubsync.ub_version = version; 12202082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 12217754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1222789Sahrens 12232082Seschrock if (error != 0) 12241544Seschrock goto out; 1225789Sahrens 12261585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1227789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1228789Sahrens 1229789Sahrens /* 1230789Sahrens * Try to open all vdevs, loading each label in the process. 1231789Sahrens */ 12327754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12334070Smc142369 error = vdev_open(rvd); 12347754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12354070Smc142369 if (error != 0) 12361544Seschrock goto out; 1237789Sahrens 1238789Sahrens /* 12399276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 12409276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 12419276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 12429276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 12439276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 12449276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 12459276SMark.Musante@Sun.COM * the vdev config. 12461986Seschrock */ 12479276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12489276SMark.Musante@Sun.COM error = vdev_validate(rvd); 12499276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12509276SMark.Musante@Sun.COM if (error != 0) 12519276SMark.Musante@Sun.COM goto out; 12521986Seschrock 12531986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 12541986Seschrock error = ENXIO; 12551986Seschrock goto out; 12561986Seschrock } 12571986Seschrock 12581986Seschrock /* 1259789Sahrens * Find the best uberblock. 1260789Sahrens */ 12617754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1262789Sahrens 1263789Sahrens /* 1264789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1265789Sahrens */ 1266789Sahrens if (ub->ub_txg == 0) { 12671760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12681760Seschrock VDEV_AUX_CORRUPT_DATA); 12691544Seschrock error = ENXIO; 12701544Seschrock goto out; 12711544Seschrock } 12721544Seschrock 12731544Seschrock /* 12741544Seschrock * If the pool is newer than the code, we can't open it. 12751544Seschrock */ 12764577Sahrens if (ub->ub_version > SPA_VERSION) { 12771760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12781760Seschrock VDEV_AUX_VERSION_NEWER); 12791544Seschrock error = ENOTSUP; 12801544Seschrock goto out; 1281789Sahrens } 1282789Sahrens 1283789Sahrens /* 1284789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1285789Sahrens * incomplete configuration. 1286789Sahrens */ 12871732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 12881544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12891544Seschrock VDEV_AUX_BAD_GUID_SUM); 12901544Seschrock error = ENXIO; 12911544Seschrock goto out; 1292789Sahrens } 1293789Sahrens 1294789Sahrens /* 1295789Sahrens * Initialize internal SPA structures. 1296789Sahrens */ 1297789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1298789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1299789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 13001544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 13011544Seschrock if (error) { 13021544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13031544Seschrock VDEV_AUX_CORRUPT_DATA); 13041544Seschrock goto out; 13051544Seschrock } 1306789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1307789Sahrens 13081544Seschrock if (zap_lookup(spa->spa_meta_objset, 1309789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 13101544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 13111544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13121544Seschrock VDEV_AUX_CORRUPT_DATA); 13131544Seschrock error = EIO; 13141544Seschrock goto out; 13151544Seschrock } 1316789Sahrens 131710594SGeorge.Wilson@Sun.COM if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 131810594SGeorge.Wilson@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 131910594SGeorge.Wilson@Sun.COM VDEV_AUX_CORRUPT_DATA); 132010594SGeorge.Wilson@Sun.COM error = EIO; 132110594SGeorge.Wilson@Sun.COM goto out; 132210594SGeorge.Wilson@Sun.COM } 132310594SGeorge.Wilson@Sun.COM 1324789Sahrens if (!mosconfig) { 13253975Sek110237 uint64_t hostid; 13262082Seschrock 132710594SGeorge.Wilson@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 13287706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 13293975Sek110237 char *hostname; 13303975Sek110237 unsigned long myhostid = 0; 13313975Sek110237 133210594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_string(nvconfig, 13333975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 13343975Sek110237 13358662SJordan.Vaughan@Sun.com #ifdef _KERNEL 13368662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 13378662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 13388662SJordan.Vaughan@Sun.com /* 13398662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 13408662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 13418662SJordan.Vaughan@Sun.com */ 13423975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13438662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 13444178Slling if (hostid != 0 && myhostid != 0 && 13458662SJordan.Vaughan@Sun.com hostid != myhostid) { 13463975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 13473975Sek110237 "loaded as it was last accessed by " 13487706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 13493975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 13507754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 13513975Sek110237 (unsigned long)hostid); 13523975Sek110237 error = EBADF; 13533975Sek110237 goto out; 13543975Sek110237 } 13553975Sek110237 } 13563975Sek110237 135710594SGeorge.Wilson@Sun.COM spa_config_set(spa, nvconfig); 1358789Sahrens spa_unload(spa); 1359789Sahrens spa_deactivate(spa); 13608241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1361789Sahrens 136210594SGeorge.Wilson@Sun.COM return (spa_load(spa, nvconfig, state, B_TRUE)); 13631544Seschrock } 13641544Seschrock 13651544Seschrock if (zap_lookup(spa->spa_meta_objset, 13661544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 13671544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 13681544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13691544Seschrock VDEV_AUX_CORRUPT_DATA); 13701544Seschrock error = EIO; 13711544Seschrock goto out; 1372789Sahrens } 1373789Sahrens 13741544Seschrock /* 13752082Seschrock * Load the bit that tells us to use the new accounting function 13762082Seschrock * (raid-z deflation). If we have an older pool, this will not 13772082Seschrock * be present. 13782082Seschrock */ 13792082Seschrock error = zap_lookup(spa->spa_meta_objset, 13802082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 13812082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 13822082Seschrock if (error != 0 && error != ENOENT) { 13832082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13842082Seschrock VDEV_AUX_CORRUPT_DATA); 13852082Seschrock error = EIO; 13862082Seschrock goto out; 13872082Seschrock } 13882082Seschrock 13892082Seschrock /* 13901544Seschrock * Load the persistent error log. If we have an older pool, this will 13911544Seschrock * not be present. 13921544Seschrock */ 13931544Seschrock error = zap_lookup(spa->spa_meta_objset, 13941544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 13951544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 13961807Sbonwick if (error != 0 && error != ENOENT) { 13971544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13981544Seschrock VDEV_AUX_CORRUPT_DATA); 13991544Seschrock error = EIO; 14001544Seschrock goto out; 14011544Seschrock } 14021544Seschrock 14031544Seschrock error = zap_lookup(spa->spa_meta_objset, 14041544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 14051544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 14061544Seschrock if (error != 0 && error != ENOENT) { 14071544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14081544Seschrock VDEV_AUX_CORRUPT_DATA); 14091544Seschrock error = EIO; 14101544Seschrock goto out; 14111544Seschrock } 1412789Sahrens 1413789Sahrens /* 14142926Sek110237 * Load the history object. If we have an older pool, this 14152926Sek110237 * will not be present. 14162926Sek110237 */ 14172926Sek110237 error = zap_lookup(spa->spa_meta_objset, 14182926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 14192926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 14202926Sek110237 if (error != 0 && error != ENOENT) { 14212926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14222926Sek110237 VDEV_AUX_CORRUPT_DATA); 14232926Sek110237 error = EIO; 14242926Sek110237 goto out; 14252926Sek110237 } 14262926Sek110237 14272926Sek110237 /* 14282082Seschrock * Load any hot spares for this pool. 14292082Seschrock */ 14302082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14315450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 14322082Seschrock if (error != 0 && error != ENOENT) { 14332082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14342082Seschrock VDEV_AUX_CORRUPT_DATA); 14352082Seschrock error = EIO; 14362082Seschrock goto out; 14372082Seschrock } 14382082Seschrock if (error == 0) { 14394577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 14405450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 14415450Sbrendan &spa->spa_spares.sav_config) != 0) { 14422082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14432082Seschrock VDEV_AUX_CORRUPT_DATA); 14442082Seschrock error = EIO; 14452082Seschrock goto out; 14462082Seschrock } 14472082Seschrock 14487754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14492082Seschrock spa_load_spares(spa); 14507754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14512082Seschrock } 14522082Seschrock 14535450Sbrendan /* 14545450Sbrendan * Load any level 2 ARC devices for this pool. 14555450Sbrendan */ 14565450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14575450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 14585450Sbrendan &spa->spa_l2cache.sav_object); 14595450Sbrendan if (error != 0 && error != ENOENT) { 14605450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14615450Sbrendan VDEV_AUX_CORRUPT_DATA); 14625450Sbrendan error = EIO; 14635450Sbrendan goto out; 14645450Sbrendan } 14655450Sbrendan if (error == 0) { 14665450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 14675450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 14685450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 14695450Sbrendan vdev_set_state(rvd, B_TRUE, 14705450Sbrendan VDEV_STATE_CANT_OPEN, 14715450Sbrendan VDEV_AUX_CORRUPT_DATA); 14725450Sbrendan error = EIO; 14735450Sbrendan goto out; 14745450Sbrendan } 14755450Sbrendan 14767754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14775450Sbrendan spa_load_l2cache(spa); 14787754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14795450Sbrendan } 14805450Sbrendan 148110594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 148210594SGeorge.Wilson@Sun.COM &nvroot) == 0); 148310594SGeorge.Wilson@Sun.COM spa_load_log_state(spa, nvroot); 148410594SGeorge.Wilson@Sun.COM nvlist_free(nvconfig); 14859701SGeorge.Wilson@Sun.COM 14867294Sperrin if (spa_check_logs(spa)) { 14877294Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14887294Sperrin VDEV_AUX_BAD_LOG); 14897294Sperrin error = ENXIO; 14907294Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 14917294Sperrin goto out; 14927294Sperrin } 14937294Sperrin 14947294Sperrin 14955094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 14964543Smarks 14973912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14983912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 14993912Slling 15003912Slling if (error && error != ENOENT) { 15013912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15023912Slling VDEV_AUX_CORRUPT_DATA); 15033912Slling error = EIO; 15043912Slling goto out; 15053912Slling } 15063912Slling 15073912Slling if (error == 0) { 15083912Slling (void) zap_lookup(spa->spa_meta_objset, 15093912Slling spa->spa_pool_props_object, 15104451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 15113912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 15124451Seschrock (void) zap_lookup(spa->spa_meta_objset, 15134451Seschrock spa->spa_pool_props_object, 15144451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 15154451Seschrock sizeof (uint64_t), 1, &autoreplace); 151610672SEric.Schrock@Sun.COM spa->spa_autoreplace = (autoreplace != 0); 15174543Smarks (void) zap_lookup(spa->spa_meta_objset, 15184543Smarks spa->spa_pool_props_object, 15194543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 15204543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 15215329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 15225329Sgw25295 spa->spa_pool_props_object, 15235329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 15245329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 15259816SGeorge.Wilson@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 15269816SGeorge.Wilson@Sun.COM spa->spa_pool_props_object, 15279816SGeorge.Wilson@Sun.COM zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 15289816SGeorge.Wilson@Sun.COM sizeof (uint64_t), 1, &spa->spa_autoexpand); 15293912Slling } 15303912Slling 15312082Seschrock /* 15324451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 15334451Seschrock * the ZFS DE that it should not issue any faults for unopenable 15344451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 15354451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 15364451Seschrock * over. 15374451Seschrock */ 153810672SEric.Schrock@Sun.COM if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 15394451Seschrock spa_check_removed(spa->spa_root_vdev); 154010672SEric.Schrock@Sun.COM /* 154110672SEric.Schrock@Sun.COM * For the import case, this is done in spa_import(), because 154210672SEric.Schrock@Sun.COM * at this point we're using the spare definitions from 154310672SEric.Schrock@Sun.COM * the MOS config, not necessarily from the userland config. 154410672SEric.Schrock@Sun.COM */ 154510672SEric.Schrock@Sun.COM if (state != SPA_LOAD_IMPORT) { 154610672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 154710672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 154810672SEric.Schrock@Sun.COM } 154910672SEric.Schrock@Sun.COM } 15504451Seschrock 15514451Seschrock /* 15521986Seschrock * Load the vdev state for all toplevel vdevs. 1553789Sahrens */ 15541986Seschrock vdev_load(rvd); 1555789Sahrens 1556789Sahrens /* 1557789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1558789Sahrens */ 15597754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1560789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 15617754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1562789Sahrens 1563789Sahrens /* 1564789Sahrens * Check the state of the root vdev. If it can't be opened, it 1565789Sahrens * indicates one or more toplevel vdevs are faulted. 1566789Sahrens */ 15671544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 15681544Seschrock error = ENXIO; 15691544Seschrock goto out; 15701544Seschrock } 1571789Sahrens 15728241SJeff.Bonwick@Sun.COM if (spa_writeable(spa)) { 15731635Sbonwick dmu_tx_t *tx; 15741635Sbonwick int need_update = B_FALSE; 15758241SJeff.Bonwick@Sun.COM 15768241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 15771601Sbonwick 15781635Sbonwick /* 15791635Sbonwick * Claim log blocks that haven't been committed yet. 15801635Sbonwick * This must all happen in a single txg. 15811635Sbonwick */ 15821601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1583789Sahrens spa_first_txg(spa)); 15847754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 15852417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1586789Sahrens dmu_tx_commit(tx); 1587789Sahrens 15889701SGeorge.Wilson@Sun.COM spa->spa_log_state = SPA_LOG_GOOD; 1589789Sahrens spa->spa_sync_on = B_TRUE; 1590789Sahrens txg_sync_start(spa->spa_dsl_pool); 1591789Sahrens 1592789Sahrens /* 1593789Sahrens * Wait for all claims to sync. 1594789Sahrens */ 1595789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15961585Sbonwick 15971585Sbonwick /* 15981635Sbonwick * If the config cache is stale, or we have uninitialized 15991635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 160010100SLin.Ling@Sun.COM * 160110100SLin.Ling@Sun.COM * If spa_load_verbatim is true, trust the current 160210100SLin.Ling@Sun.COM * in-core spa_config and update the disk labels. 16031585Sbonwick */ 16041635Sbonwick if (config_cache_txg != spa->spa_config_txg || 160510100SLin.Ling@Sun.COM state == SPA_LOAD_IMPORT || spa->spa_load_verbatim) 16061635Sbonwick need_update = B_TRUE; 16071635Sbonwick 16088241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 16091635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 16101635Sbonwick need_update = B_TRUE; 16111585Sbonwick 16121585Sbonwick /* 16131635Sbonwick * Update the config cache asychronously in case we're the 16141635Sbonwick * root pool, in which case the config cache isn't writable yet. 16151585Sbonwick */ 16161635Sbonwick if (need_update) 16171635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 16188241SJeff.Bonwick@Sun.COM 16198241SJeff.Bonwick@Sun.COM /* 16208241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 16218241SJeff.Bonwick@Sun.COM */ 16228241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 16238241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 162410298SMatthew.Ahrens@Sun.COM 162510298SMatthew.Ahrens@Sun.COM /* 162610298SMatthew.Ahrens@Sun.COM * Delete any inconsistent datasets. 162710298SMatthew.Ahrens@Sun.COM */ 162810298SMatthew.Ahrens@Sun.COM (void) dmu_objset_find(spa_name(spa), 162910298SMatthew.Ahrens@Sun.COM dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 163010342Schris.kirby@sun.com 163110342Schris.kirby@sun.com /* 163210342Schris.kirby@sun.com * Clean up any stale temporary dataset userrefs. 163310342Schris.kirby@sun.com */ 163410342Schris.kirby@sun.com dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1635789Sahrens } 1636789Sahrens 16371544Seschrock error = 0; 16381544Seschrock out: 16397046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 16402082Seschrock if (error && error != EBADF) 16417294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 16421544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 16431544Seschrock spa->spa_ena = 0; 16441544Seschrock 16451544Seschrock return (error); 1646789Sahrens } 1647789Sahrens 1648789Sahrens /* 1649789Sahrens * Pool Open/Import 1650789Sahrens * 1651789Sahrens * The import case is identical to an open except that the configuration is sent 1652789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1653789Sahrens * case of an open, the pool configuration will exist in the 16544451Seschrock * POOL_STATE_UNINITIALIZED state. 1655789Sahrens * 1656789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1657789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1658789Sahrens * ambiguous state. 1659789Sahrens */ 1660789Sahrens static int 1661789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1662789Sahrens { 1663789Sahrens spa_t *spa; 1664789Sahrens int error; 1665789Sahrens int locked = B_FALSE; 1666789Sahrens 1667789Sahrens *spapp = NULL; 1668789Sahrens 1669789Sahrens /* 1670789Sahrens * As disgusting as this is, we need to support recursive calls to this 1671789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1672789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1673789Sahrens * avoid dsl_dir_open() calling this in the first place. 1674789Sahrens */ 1675789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1676789Sahrens mutex_enter(&spa_namespace_lock); 1677789Sahrens locked = B_TRUE; 1678789Sahrens } 1679789Sahrens 1680789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1681789Sahrens if (locked) 1682789Sahrens mutex_exit(&spa_namespace_lock); 1683789Sahrens return (ENOENT); 1684789Sahrens } 1685789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1686789Sahrens 16878241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 1688789Sahrens 16891635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1690789Sahrens 1691789Sahrens if (error == EBADF) { 1692789Sahrens /* 16931986Seschrock * If vdev_validate() returns failure (indicated by 16941986Seschrock * EBADF), it indicates that one of the vdevs indicates 16951986Seschrock * that the pool has been exported or destroyed. If 16961986Seschrock * this is the case, the config cache is out of sync and 16971986Seschrock * we should remove the pool from the namespace. 1698789Sahrens */ 1699789Sahrens spa_unload(spa); 1700789Sahrens spa_deactivate(spa); 17016643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1702789Sahrens spa_remove(spa); 1703789Sahrens if (locked) 1704789Sahrens mutex_exit(&spa_namespace_lock); 1705789Sahrens return (ENOENT); 17061544Seschrock } 17071544Seschrock 17081544Seschrock if (error) { 1709789Sahrens /* 1710789Sahrens * We can't open the pool, but we still have useful 1711789Sahrens * information: the state of each vdev after the 1712789Sahrens * attempted vdev_open(). Return this to the user. 1713789Sahrens */ 17147754SJeff.Bonwick@Sun.COM if (config != NULL && spa->spa_root_vdev != NULL) 1715789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1716789Sahrens B_TRUE); 1717789Sahrens spa_unload(spa); 1718789Sahrens spa_deactivate(spa); 17191544Seschrock spa->spa_last_open_failed = B_TRUE; 1720789Sahrens if (locked) 1721789Sahrens mutex_exit(&spa_namespace_lock); 1722789Sahrens *spapp = NULL; 1723789Sahrens return (error); 17241544Seschrock } else { 17251544Seschrock spa->spa_last_open_failed = B_FALSE; 1726789Sahrens } 1727789Sahrens } 1728789Sahrens 1729789Sahrens spa_open_ref(spa, tag); 17304451Seschrock 1731789Sahrens if (locked) 1732789Sahrens mutex_exit(&spa_namespace_lock); 1733789Sahrens 1734789Sahrens *spapp = spa; 1735789Sahrens 17367754SJeff.Bonwick@Sun.COM if (config != NULL) 1737789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1738789Sahrens 1739789Sahrens return (0); 1740789Sahrens } 1741789Sahrens 1742789Sahrens int 1743789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1744789Sahrens { 1745789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1746789Sahrens } 1747789Sahrens 17481544Seschrock /* 17491544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 17501544Seschrock * preventing it from being exported or destroyed. 17511544Seschrock */ 17521544Seschrock spa_t * 17531544Seschrock spa_inject_addref(char *name) 17541544Seschrock { 17551544Seschrock spa_t *spa; 17561544Seschrock 17571544Seschrock mutex_enter(&spa_namespace_lock); 17581544Seschrock if ((spa = spa_lookup(name)) == NULL) { 17591544Seschrock mutex_exit(&spa_namespace_lock); 17601544Seschrock return (NULL); 17611544Seschrock } 17621544Seschrock spa->spa_inject_ref++; 17631544Seschrock mutex_exit(&spa_namespace_lock); 17641544Seschrock 17651544Seschrock return (spa); 17661544Seschrock } 17671544Seschrock 17681544Seschrock void 17691544Seschrock spa_inject_delref(spa_t *spa) 17701544Seschrock { 17711544Seschrock mutex_enter(&spa_namespace_lock); 17721544Seschrock spa->spa_inject_ref--; 17731544Seschrock mutex_exit(&spa_namespace_lock); 17741544Seschrock } 17751544Seschrock 17765450Sbrendan /* 17775450Sbrendan * Add spares device information to the nvlist. 17785450Sbrendan */ 17792082Seschrock static void 17802082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 17812082Seschrock { 17822082Seschrock nvlist_t **spares; 17832082Seschrock uint_t i, nspares; 17842082Seschrock nvlist_t *nvroot; 17852082Seschrock uint64_t guid; 17862082Seschrock vdev_stat_t *vs; 17872082Seschrock uint_t vsc; 17883377Seschrock uint64_t pool; 17892082Seschrock 17909425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17919425SEric.Schrock@Sun.COM 17925450Sbrendan if (spa->spa_spares.sav_count == 0) 17932082Seschrock return; 17942082Seschrock 17952082Seschrock VERIFY(nvlist_lookup_nvlist(config, 17962082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 17975450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 17982082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17992082Seschrock if (nspares != 0) { 18002082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 18012082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 18022082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 18032082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 18042082Seschrock 18052082Seschrock /* 18062082Seschrock * Go through and find any spares which have since been 18072082Seschrock * repurposed as an active spare. If this is the case, update 18082082Seschrock * their status appropriately. 18092082Seschrock */ 18102082Seschrock for (i = 0; i < nspares; i++) { 18112082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 18122082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 18137214Slling if (spa_spare_exists(guid, &pool, NULL) && 18147214Slling pool != 0ULL) { 18152082Seschrock VERIFY(nvlist_lookup_uint64_array( 18162082Seschrock spares[i], ZPOOL_CONFIG_STATS, 18172082Seschrock (uint64_t **)&vs, &vsc) == 0); 18182082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 18192082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 18202082Seschrock } 18212082Seschrock } 18222082Seschrock } 18232082Seschrock } 18242082Seschrock 18255450Sbrendan /* 18265450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 18275450Sbrendan */ 18285450Sbrendan static void 18295450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 18305450Sbrendan { 18315450Sbrendan nvlist_t **l2cache; 18325450Sbrendan uint_t i, j, nl2cache; 18335450Sbrendan nvlist_t *nvroot; 18345450Sbrendan uint64_t guid; 18355450Sbrendan vdev_t *vd; 18365450Sbrendan vdev_stat_t *vs; 18375450Sbrendan uint_t vsc; 18385450Sbrendan 18399425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 18409425SEric.Schrock@Sun.COM 18415450Sbrendan if (spa->spa_l2cache.sav_count == 0) 18425450Sbrendan return; 18435450Sbrendan 18445450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 18455450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 18465450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 18475450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18485450Sbrendan if (nl2cache != 0) { 18495450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 18505450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 18515450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 18525450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18535450Sbrendan 18545450Sbrendan /* 18555450Sbrendan * Update level 2 cache device stats. 18565450Sbrendan */ 18575450Sbrendan 18585450Sbrendan for (i = 0; i < nl2cache; i++) { 18595450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 18605450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 18615450Sbrendan 18625450Sbrendan vd = NULL; 18635450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 18645450Sbrendan if (guid == 18655450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 18665450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 18675450Sbrendan break; 18685450Sbrendan } 18695450Sbrendan } 18705450Sbrendan ASSERT(vd != NULL); 18715450Sbrendan 18725450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 18735450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 18745450Sbrendan vdev_get_stats(vd, vs); 18755450Sbrendan } 18765450Sbrendan } 18775450Sbrendan } 18785450Sbrendan 1879789Sahrens int 18801544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1881789Sahrens { 1882789Sahrens int error; 1883789Sahrens spa_t *spa; 1884789Sahrens 1885789Sahrens *config = NULL; 1886789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1887789Sahrens 18889425SEric.Schrock@Sun.COM if (spa != NULL) { 18899425SEric.Schrock@Sun.COM /* 18909425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 18919425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 18929425SEric.Schrock@Sun.COM * self-inconsistent. 18939425SEric.Schrock@Sun.COM */ 18949425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 18959425SEric.Schrock@Sun.COM 18969425SEric.Schrock@Sun.COM if (*config != NULL) { 18977754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 18989425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 18999425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 19009425SEric.Schrock@Sun.COM 19019425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 19029425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 19039425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 19049425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 19059425SEric.Schrock@Sun.COM 19069425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 19079425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 19089425SEric.Schrock@Sun.COM } 19092082Seschrock } 19102082Seschrock 19111544Seschrock /* 19121544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 19131544Seschrock * and call spa_lookup() directly. 19141544Seschrock */ 19151544Seschrock if (altroot) { 19161544Seschrock if (spa == NULL) { 19171544Seschrock mutex_enter(&spa_namespace_lock); 19181544Seschrock spa = spa_lookup(name); 19191544Seschrock if (spa) 19201544Seschrock spa_altroot(spa, altroot, buflen); 19211544Seschrock else 19221544Seschrock altroot[0] = '\0'; 19231544Seschrock spa = NULL; 19241544Seschrock mutex_exit(&spa_namespace_lock); 19251544Seschrock } else { 19261544Seschrock spa_altroot(spa, altroot, buflen); 19271544Seschrock } 19281544Seschrock } 19291544Seschrock 19309425SEric.Schrock@Sun.COM if (spa != NULL) { 19319425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 1932789Sahrens spa_close(spa, FTAG); 19339425SEric.Schrock@Sun.COM } 1934789Sahrens 1935789Sahrens return (error); 1936789Sahrens } 1937789Sahrens 1938789Sahrens /* 19395450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 19405450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 19415450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 19425450Sbrendan * specified, as long as they are well-formed. 19432082Seschrock */ 19442082Seschrock static int 19455450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 19465450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 19475450Sbrendan vdev_labeltype_t label) 19482082Seschrock { 19495450Sbrendan nvlist_t **dev; 19505450Sbrendan uint_t i, ndev; 19512082Seschrock vdev_t *vd; 19522082Seschrock int error; 19532082Seschrock 19547754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 19557754SJeff.Bonwick@Sun.COM 19562082Seschrock /* 19575450Sbrendan * It's acceptable to have no devs specified. 19582082Seschrock */ 19595450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 19602082Seschrock return (0); 19612082Seschrock 19625450Sbrendan if (ndev == 0) 19632082Seschrock return (EINVAL); 19642082Seschrock 19652082Seschrock /* 19665450Sbrendan * Make sure the pool is formatted with a version that supports this 19675450Sbrendan * device type. 19682082Seschrock */ 19695450Sbrendan if (spa_version(spa) < version) 19702082Seschrock return (ENOTSUP); 19712082Seschrock 19723377Seschrock /* 19735450Sbrendan * Set the pending device list so we correctly handle device in-use 19743377Seschrock * checking. 19753377Seschrock */ 19765450Sbrendan sav->sav_pending = dev; 19775450Sbrendan sav->sav_npending = ndev; 19785450Sbrendan 19795450Sbrendan for (i = 0; i < ndev; i++) { 19805450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 19812082Seschrock mode)) != 0) 19823377Seschrock goto out; 19832082Seschrock 19842082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 19852082Seschrock vdev_free(vd); 19863377Seschrock error = EINVAL; 19873377Seschrock goto out; 19882082Seschrock } 19892082Seschrock 19905450Sbrendan /* 19917754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 19927754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 19935450Sbrendan */ 19947754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 19955450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 19965450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 19975450Sbrendan error = ENOTBLK; 19985450Sbrendan goto out; 19995450Sbrendan } 20007754SJeff.Bonwick@Sun.COM #endif 20012082Seschrock vd->vdev_top = vd; 20023377Seschrock 20033377Seschrock if ((error = vdev_open(vd)) == 0 && 20045450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 20055450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 20063377Seschrock vd->vdev_guid) == 0); 20072082Seschrock } 20082082Seschrock 20092082Seschrock vdev_free(vd); 20103377Seschrock 20115450Sbrendan if (error && 20125450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 20133377Seschrock goto out; 20143377Seschrock else 20153377Seschrock error = 0; 20162082Seschrock } 20172082Seschrock 20183377Seschrock out: 20195450Sbrendan sav->sav_pending = NULL; 20205450Sbrendan sav->sav_npending = 0; 20213377Seschrock return (error); 20222082Seschrock } 20232082Seschrock 20245450Sbrendan static int 20255450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 20265450Sbrendan { 20275450Sbrendan int error; 20285450Sbrendan 20297754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 20307754SJeff.Bonwick@Sun.COM 20315450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20325450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 20335450Sbrendan VDEV_LABEL_SPARE)) != 0) { 20345450Sbrendan return (error); 20355450Sbrendan } 20365450Sbrendan 20375450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20385450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 20395450Sbrendan VDEV_LABEL_L2CACHE)); 20405450Sbrendan } 20415450Sbrendan 20425450Sbrendan static void 20435450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 20445450Sbrendan const char *config) 20455450Sbrendan { 20465450Sbrendan int i; 20475450Sbrendan 20485450Sbrendan if (sav->sav_config != NULL) { 20495450Sbrendan nvlist_t **olddevs; 20505450Sbrendan uint_t oldndevs; 20515450Sbrendan nvlist_t **newdevs; 20525450Sbrendan 20535450Sbrendan /* 20545450Sbrendan * Generate new dev list by concatentating with the 20555450Sbrendan * current dev list. 20565450Sbrendan */ 20575450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 20585450Sbrendan &olddevs, &oldndevs) == 0); 20595450Sbrendan 20605450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 20615450Sbrendan (ndevs + oldndevs), KM_SLEEP); 20625450Sbrendan for (i = 0; i < oldndevs; i++) 20635450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 20645450Sbrendan KM_SLEEP) == 0); 20655450Sbrendan for (i = 0; i < ndevs; i++) 20665450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 20675450Sbrendan KM_SLEEP) == 0); 20685450Sbrendan 20695450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 20705450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 20715450Sbrendan 20725450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 20735450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 20745450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 20755450Sbrendan nvlist_free(newdevs[i]); 20765450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 20775450Sbrendan } else { 20785450Sbrendan /* 20795450Sbrendan * Generate a new dev list. 20805450Sbrendan */ 20815450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 20825450Sbrendan KM_SLEEP) == 0); 20835450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 20845450Sbrendan devs, ndevs) == 0); 20855450Sbrendan } 20865450Sbrendan } 20875450Sbrendan 20885450Sbrendan /* 20895450Sbrendan * Stop and drop level 2 ARC devices 20905450Sbrendan */ 20915450Sbrendan void 20925450Sbrendan spa_l2cache_drop(spa_t *spa) 20935450Sbrendan { 20945450Sbrendan vdev_t *vd; 20955450Sbrendan int i; 20965450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 20975450Sbrendan 20985450Sbrendan for (i = 0; i < sav->sav_count; i++) { 20995450Sbrendan uint64_t pool; 21005450Sbrendan 21015450Sbrendan vd = sav->sav_vdevs[i]; 21025450Sbrendan ASSERT(vd != NULL); 21035450Sbrendan 21048241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 21058241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 21065450Sbrendan l2arc_remove_vdev(vd); 21075450Sbrendan if (vd->vdev_isl2cache) 21085450Sbrendan spa_l2cache_remove(vd); 21095450Sbrendan vdev_clear_stats(vd); 21105450Sbrendan (void) vdev_close(vd); 21115450Sbrendan } 21125450Sbrendan } 21135450Sbrendan 21142082Seschrock /* 2115789Sahrens * Pool Creation 2116789Sahrens */ 2117789Sahrens int 21185094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 21197184Stimh const char *history_str, nvlist_t *zplprops) 2120789Sahrens { 2121789Sahrens spa_t *spa; 21225094Slling char *altroot = NULL; 21231635Sbonwick vdev_t *rvd; 2124789Sahrens dsl_pool_t *dp; 2125789Sahrens dmu_tx_t *tx; 21269816SGeorge.Wilson@Sun.COM int error = 0; 2127789Sahrens uint64_t txg = TXG_INITIAL; 21285450Sbrendan nvlist_t **spares, **l2cache; 21295450Sbrendan uint_t nspares, nl2cache; 21305094Slling uint64_t version; 2131789Sahrens 2132789Sahrens /* 2133789Sahrens * If this pool already exists, return failure. 2134789Sahrens */ 2135789Sahrens mutex_enter(&spa_namespace_lock); 2136789Sahrens if (spa_lookup(pool) != NULL) { 2137789Sahrens mutex_exit(&spa_namespace_lock); 2138789Sahrens return (EEXIST); 2139789Sahrens } 2140789Sahrens 2141789Sahrens /* 2142789Sahrens * Allocate a new spa_t structure. 2143789Sahrens */ 21445094Slling (void) nvlist_lookup_string(props, 21455094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21461635Sbonwick spa = spa_add(pool, altroot); 21478241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2148789Sahrens 2149789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 21505094Slling 21515094Slling if (props && (error = spa_prop_validate(spa, props))) { 21525094Slling spa_deactivate(spa); 21535094Slling spa_remove(spa); 21546643Seschrock mutex_exit(&spa_namespace_lock); 21555094Slling return (error); 21565094Slling } 21575094Slling 21585094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 21595094Slling &version) != 0) 21605094Slling version = SPA_VERSION; 21615094Slling ASSERT(version <= SPA_VERSION); 21625094Slling spa->spa_uberblock.ub_version = version; 2163789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2164789Sahrens 21651635Sbonwick /* 21669234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 21679234SGeorge.Wilson@Sun.COM */ 21689630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 21699630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 21709234SGeorge.Wilson@Sun.COM 21719234SGeorge.Wilson@Sun.COM /* 21721635Sbonwick * Create the root vdev. 21731635Sbonwick */ 21747754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21751635Sbonwick 21762082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21772082Seschrock 21782082Seschrock ASSERT(error != 0 || rvd != NULL); 21792082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21802082Seschrock 21815913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21821635Sbonwick error = EINVAL; 21832082Seschrock 21842082Seschrock if (error == 0 && 21852082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 21865450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 21872082Seschrock VDEV_ALLOC_ADD)) == 0) { 21889816SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 21899816SGeorge.Wilson@Sun.COM vdev_metaslab_set_size(rvd->vdev_child[c]); 21909816SGeorge.Wilson@Sun.COM vdev_expand(rvd->vdev_child[c], txg); 21919816SGeorge.Wilson@Sun.COM } 21921635Sbonwick } 21931635Sbonwick 21947754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2195789Sahrens 21962082Seschrock if (error != 0) { 2197789Sahrens spa_unload(spa); 2198789Sahrens spa_deactivate(spa); 2199789Sahrens spa_remove(spa); 2200789Sahrens mutex_exit(&spa_namespace_lock); 2201789Sahrens return (error); 2202789Sahrens } 2203789Sahrens 22042082Seschrock /* 22052082Seschrock * Get the list of spares, if specified. 22062082Seschrock */ 22072082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 22082082Seschrock &spares, &nspares) == 0) { 22095450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 22102082Seschrock KM_SLEEP) == 0); 22115450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 22122082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 22137754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 22142082Seschrock spa_load_spares(spa); 22157754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 22165450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 22175450Sbrendan } 22185450Sbrendan 22195450Sbrendan /* 22205450Sbrendan * Get the list of level 2 cache devices, if specified. 22215450Sbrendan */ 22225450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 22235450Sbrendan &l2cache, &nl2cache) == 0) { 22245450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 22255450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 22265450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 22275450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 22287754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 22295450Sbrendan spa_load_l2cache(spa); 22307754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 22315450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 22322082Seschrock } 22332082Seschrock 22347184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2235789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2236789Sahrens 2237789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2238789Sahrens 2239789Sahrens /* 2240789Sahrens * Create the pool config object. 2241789Sahrens */ 2242789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 22437497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2244789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2245789Sahrens 22461544Seschrock if (zap_add(spa->spa_meta_objset, 2247789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 22481544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 22491544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 22501544Seschrock } 2251789Sahrens 22525094Slling /* Newly created pools with the right version are always deflated. */ 22535094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 22545094Slling spa->spa_deflate = TRUE; 22555094Slling if (zap_add(spa->spa_meta_objset, 22565094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 22575094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 22585094Slling cmn_err(CE_PANIC, "failed to add deflate"); 22595094Slling } 22602082Seschrock } 22612082Seschrock 2262789Sahrens /* 2263789Sahrens * Create the deferred-free bplist object. Turn off compression 2264789Sahrens * because sync-to-convergence takes longer if the blocksize 2265789Sahrens * keeps changing. 2266789Sahrens */ 2267789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2268789Sahrens 1 << 14, tx); 2269789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2270789Sahrens ZIO_COMPRESS_OFF, tx); 2271789Sahrens 22721544Seschrock if (zap_add(spa->spa_meta_objset, 2273789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 22741544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 22751544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 22761544Seschrock } 2277789Sahrens 22782926Sek110237 /* 22792926Sek110237 * Create the pool's history object. 22802926Sek110237 */ 22815094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 22825094Slling spa_history_create_obj(spa, tx); 22835094Slling 22845094Slling /* 22855094Slling * Set pool properties. 22865094Slling */ 22875094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 22885094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22895329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 22909816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 22918525SEric.Schrock@Sun.COM if (props != NULL) { 22928525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 22935094Slling spa_sync_props(spa, props, CRED(), tx); 22948525SEric.Schrock@Sun.COM } 22952926Sek110237 2296789Sahrens dmu_tx_commit(tx); 2297789Sahrens 2298789Sahrens spa->spa_sync_on = B_TRUE; 2299789Sahrens txg_sync_start(spa->spa_dsl_pool); 2300789Sahrens 2301789Sahrens /* 2302789Sahrens * We explicitly wait for the first transaction to complete so that our 2303789Sahrens * bean counters are appropriately updated. 2304789Sahrens */ 2305789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2306789Sahrens 23076643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2308789Sahrens 23095094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 23104715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 23119946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_CREATE); 23124715Sek110237 23138667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 23148667SGeorge.Wilson@Sun.COM 2315789Sahrens mutex_exit(&spa_namespace_lock); 2316789Sahrens 2317789Sahrens return (0); 2318789Sahrens } 2319789Sahrens 23206423Sgw25295 #ifdef _KERNEL 23216423Sgw25295 /* 23229790SLin.Ling@Sun.COM * Get the root pool information from the root disk, then import the root pool 23239790SLin.Ling@Sun.COM * during the system boot up time. 23246423Sgw25295 */ 23259790SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 23269790SLin.Ling@Sun.COM 23279790SLin.Ling@Sun.COM static nvlist_t * 23289790SLin.Ling@Sun.COM spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 23296423Sgw25295 { 23309790SLin.Ling@Sun.COM nvlist_t *config; 23316423Sgw25295 nvlist_t *nvtop, *nvroot; 23326423Sgw25295 uint64_t pgid; 23336423Sgw25295 23349790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 23359790SLin.Ling@Sun.COM return (NULL); 23369790SLin.Ling@Sun.COM 23376423Sgw25295 /* 23386423Sgw25295 * Add this top-level vdev to the child array. 23396423Sgw25295 */ 23409790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 23419790SLin.Ling@Sun.COM &nvtop) == 0); 23429790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 23439790SLin.Ling@Sun.COM &pgid) == 0); 23449790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 23456423Sgw25295 23466423Sgw25295 /* 23476423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 23486423Sgw25295 */ 23496423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 23509790SLin.Ling@Sun.COM VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 23519790SLin.Ling@Sun.COM VDEV_TYPE_ROOT) == 0); 23526423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 23536423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 23546423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 23556423Sgw25295 &nvtop, 1) == 0); 23566423Sgw25295 23576423Sgw25295 /* 23586423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 23596423Sgw25295 * this pool's configuration (remove the old, add the new). 23606423Sgw25295 */ 23616423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 23626423Sgw25295 nvlist_free(nvroot); 23639790SLin.Ling@Sun.COM return (config); 23646423Sgw25295 } 23656423Sgw25295 23666423Sgw25295 /* 23679790SLin.Ling@Sun.COM * Walk the vdev tree and see if we can find a device with "better" 23689790SLin.Ling@Sun.COM * configuration. A configuration is "better" if the label on that 23699790SLin.Ling@Sun.COM * device has a more recent txg. 23706423Sgw25295 */ 23719790SLin.Ling@Sun.COM static void 23729790SLin.Ling@Sun.COM spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 23737147Staylor { 23749816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 23759790SLin.Ling@Sun.COM spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 23769790SLin.Ling@Sun.COM 23779790SLin.Ling@Sun.COM if (vd->vdev_ops->vdev_op_leaf) { 23789790SLin.Ling@Sun.COM nvlist_t *label; 23799790SLin.Ling@Sun.COM uint64_t label_txg; 23809790SLin.Ling@Sun.COM 23819790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 23829790SLin.Ling@Sun.COM &label) != 0) 23839790SLin.Ling@Sun.COM return; 23849790SLin.Ling@Sun.COM 23859790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 23869790SLin.Ling@Sun.COM &label_txg) == 0); 23879790SLin.Ling@Sun.COM 23889790SLin.Ling@Sun.COM /* 23899790SLin.Ling@Sun.COM * Do we have a better boot device? 23909790SLin.Ling@Sun.COM */ 23919790SLin.Ling@Sun.COM if (label_txg > *txg) { 23929790SLin.Ling@Sun.COM *txg = label_txg; 23939790SLin.Ling@Sun.COM *avd = vd; 23947147Staylor } 23959790SLin.Ling@Sun.COM nvlist_free(label); 23967147Staylor } 23977147Staylor } 23987147Staylor 23996423Sgw25295 /* 24006423Sgw25295 * Import a root pool. 24016423Sgw25295 * 24027147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 24037147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 24047147Staylor * The GRUB "findroot" command will return the vdev we should boot. 24056423Sgw25295 * 24066423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 24076423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 24086423Sgw25295 * e.g. 24096423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 24106423Sgw25295 */ 24116423Sgw25295 int 24127147Staylor spa_import_rootpool(char *devpath, char *devid) 24136423Sgw25295 { 24149790SLin.Ling@Sun.COM spa_t *spa; 24159790SLin.Ling@Sun.COM vdev_t *rvd, *bvd, *avd = NULL; 24169790SLin.Ling@Sun.COM nvlist_t *config, *nvtop; 24179790SLin.Ling@Sun.COM uint64_t guid, txg; 24186423Sgw25295 char *pname; 24196423Sgw25295 int error; 24206423Sgw25295 24216423Sgw25295 /* 24229790SLin.Ling@Sun.COM * Read the label from the boot device and generate a configuration. 24236423Sgw25295 */ 2424*10822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 2425*10822SJack.Meng@Sun.COM #if defined(_OBP) && defined(_KERNEL) 2426*10822SJack.Meng@Sun.COM if (config == NULL) { 2427*10822SJack.Meng@Sun.COM if (strstr(devpath, "/iscsi/ssd") != NULL) { 2428*10822SJack.Meng@Sun.COM /* iscsi boot */ 2429*10822SJack.Meng@Sun.COM get_iscsi_bootpath_phy(devpath); 2430*10822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 2431*10822SJack.Meng@Sun.COM } 2432*10822SJack.Meng@Sun.COM } 2433*10822SJack.Meng@Sun.COM #endif 2434*10822SJack.Meng@Sun.COM if (config == NULL) { 24359790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 24369790SLin.Ling@Sun.COM devpath); 24379790SLin.Ling@Sun.COM return (EIO); 24389790SLin.Ling@Sun.COM } 24399790SLin.Ling@Sun.COM 24409790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 24419790SLin.Ling@Sun.COM &pname) == 0); 24429790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 24436423Sgw25295 24449425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 24459425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 24469425SEric.Schrock@Sun.COM /* 24479425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 24489425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 24499425SEric.Schrock@Sun.COM */ 24509425SEric.Schrock@Sun.COM spa_remove(spa); 24519425SEric.Schrock@Sun.COM } 24529425SEric.Schrock@Sun.COM 24539425SEric.Schrock@Sun.COM spa = spa_add(pname, NULL); 24549425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 245510100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 24569790SLin.Ling@Sun.COM 24579790SLin.Ling@Sun.COM /* 24589790SLin.Ling@Sun.COM * Build up a vdev tree based on the boot device's label config. 24599790SLin.Ling@Sun.COM */ 24609790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 24619790SLin.Ling@Sun.COM &nvtop) == 0); 24629790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24639790SLin.Ling@Sun.COM error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 24649790SLin.Ling@Sun.COM VDEV_ALLOC_ROOTPOOL); 24659790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24669790SLin.Ling@Sun.COM if (error) { 24679790SLin.Ling@Sun.COM mutex_exit(&spa_namespace_lock); 24689790SLin.Ling@Sun.COM nvlist_free(config); 24699790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 24709790SLin.Ling@Sun.COM pname); 24719790SLin.Ling@Sun.COM return (error); 24729790SLin.Ling@Sun.COM } 24739790SLin.Ling@Sun.COM 24749790SLin.Ling@Sun.COM /* 24759790SLin.Ling@Sun.COM * Get the boot vdev. 24769790SLin.Ling@Sun.COM */ 24779790SLin.Ling@Sun.COM if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 24789790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 24799790SLin.Ling@Sun.COM (u_longlong_t)guid); 24809790SLin.Ling@Sun.COM error = ENOENT; 24819790SLin.Ling@Sun.COM goto out; 24829790SLin.Ling@Sun.COM } 24839790SLin.Ling@Sun.COM 24849790SLin.Ling@Sun.COM /* 24859790SLin.Ling@Sun.COM * Determine if there is a better boot device. 24869790SLin.Ling@Sun.COM */ 24879790SLin.Ling@Sun.COM avd = bvd; 24889790SLin.Ling@Sun.COM spa_alt_rootvdev(rvd, &avd, &txg); 24899790SLin.Ling@Sun.COM if (avd != bvd) { 24909790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 24919790SLin.Ling@Sun.COM "try booting from '%s'", avd->vdev_path); 24929790SLin.Ling@Sun.COM error = EINVAL; 24939790SLin.Ling@Sun.COM goto out; 24949790SLin.Ling@Sun.COM } 24959790SLin.Ling@Sun.COM 24969790SLin.Ling@Sun.COM /* 24979790SLin.Ling@Sun.COM * If the boot device is part of a spare vdev then ensure that 24989790SLin.Ling@Sun.COM * we're booting off the active spare. 24999790SLin.Ling@Sun.COM */ 25009790SLin.Ling@Sun.COM if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 25019790SLin.Ling@Sun.COM !bvd->vdev_isspare) { 25029790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is currently spared. Please " 25039790SLin.Ling@Sun.COM "try booting from '%s'", 25049790SLin.Ling@Sun.COM bvd->vdev_parent->vdev_child[1]->vdev_path); 25059790SLin.Ling@Sun.COM error = EINVAL; 25069790SLin.Ling@Sun.COM goto out; 25079790SLin.Ling@Sun.COM } 25089790SLin.Ling@Sun.COM 25099790SLin.Ling@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25109790SLin.Ling@Sun.COM error = 0; 25119946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 25129790SLin.Ling@Sun.COM out: 25139790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 25149790SLin.Ling@Sun.COM vdev_free(rvd); 25159790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 25169425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25176423Sgw25295 25189790SLin.Ling@Sun.COM nvlist_free(config); 25196423Sgw25295 return (error); 25206423Sgw25295 } 25219790SLin.Ling@Sun.COM 25226423Sgw25295 #endif 25236423Sgw25295 25246423Sgw25295 /* 25259425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 25269425SEric.Schrock@Sun.COM * boot. 25279425SEric.Schrock@Sun.COM */ 25289425SEric.Schrock@Sun.COM int 25299425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 25309425SEric.Schrock@Sun.COM { 25319425SEric.Schrock@Sun.COM spa_t *spa; 25329425SEric.Schrock@Sun.COM char *altroot = NULL; 25339425SEric.Schrock@Sun.COM 25349425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25359425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 25369425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25379425SEric.Schrock@Sun.COM return (EEXIST); 25389425SEric.Schrock@Sun.COM } 25399425SEric.Schrock@Sun.COM 25409425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25419425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25429425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25439425SEric.Schrock@Sun.COM 254410100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 254510000SVictor.Latushkin@Sun.COM 25469425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25479425SEric.Schrock@Sun.COM 25489425SEric.Schrock@Sun.COM if (props != NULL) 25499425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25509425SEric.Schrock@Sun.COM 25519425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 25529425SEric.Schrock@Sun.COM 25539425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25549946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 25559425SEric.Schrock@Sun.COM 25569425SEric.Schrock@Sun.COM return (0); 25579425SEric.Schrock@Sun.COM } 25589425SEric.Schrock@Sun.COM 25599425SEric.Schrock@Sun.COM /* 25606423Sgw25295 * Import a non-root pool into the system. 25616423Sgw25295 */ 25626423Sgw25295 int 25636423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 25646423Sgw25295 { 25659425SEric.Schrock@Sun.COM spa_t *spa; 25669425SEric.Schrock@Sun.COM char *altroot = NULL; 25679425SEric.Schrock@Sun.COM int error; 25689425SEric.Schrock@Sun.COM nvlist_t *nvroot; 25699425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 25709425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 25719425SEric.Schrock@Sun.COM 25729425SEric.Schrock@Sun.COM /* 25739425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 25749425SEric.Schrock@Sun.COM */ 25759425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25769425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 25779425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25789425SEric.Schrock@Sun.COM return (EEXIST); 25799425SEric.Schrock@Sun.COM } 25809425SEric.Schrock@Sun.COM 25819425SEric.Schrock@Sun.COM /* 25829425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 25839425SEric.Schrock@Sun.COM */ 25849425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25859425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25869425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25879425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 25889425SEric.Schrock@Sun.COM 25899425SEric.Schrock@Sun.COM /* 25909630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 25919630SJeff.Bonwick@Sun.COM */ 25929630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 25939630SJeff.Bonwick@Sun.COM 25949630SJeff.Bonwick@Sun.COM /* 25959425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25969425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 25979425SEric.Schrock@Sun.COM * doing an import. 25989425SEric.Schrock@Sun.COM */ 25999425SEric.Schrock@Sun.COM error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 26009425SEric.Schrock@Sun.COM 26019425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26029425SEric.Schrock@Sun.COM /* 26039425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 26049425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 26059425SEric.Schrock@Sun.COM */ 26069425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 26079425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 26089425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 26099425SEric.Schrock@Sun.COM spa_load_spares(spa); 26109425SEric.Schrock@Sun.COM } 26119425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 26129425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 26139425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 26149425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 26159425SEric.Schrock@Sun.COM } 26169425SEric.Schrock@Sun.COM 26179425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 26189425SEric.Schrock@Sun.COM &nvroot) == 0); 26199425SEric.Schrock@Sun.COM if (error == 0) 26209425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 26219425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 26229425SEric.Schrock@Sun.COM if (error == 0) 26239425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 26249425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 26259425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26269425SEric.Schrock@Sun.COM 26279425SEric.Schrock@Sun.COM if (props != NULL) 26289425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 26299425SEric.Schrock@Sun.COM 26309425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 26319425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 26329425SEric.Schrock@Sun.COM spa_unload(spa); 26339425SEric.Schrock@Sun.COM spa_deactivate(spa); 26349425SEric.Schrock@Sun.COM spa_remove(spa); 26359425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26369425SEric.Schrock@Sun.COM return (error); 26379425SEric.Schrock@Sun.COM } 26389425SEric.Schrock@Sun.COM 26399630SJeff.Bonwick@Sun.COM spa_async_resume(spa); 26409630SJeff.Bonwick@Sun.COM 26419425SEric.Schrock@Sun.COM /* 26429425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 26439425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 26449425SEric.Schrock@Sun.COM */ 26459425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26469425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 26479425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 26489425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 26499425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 26509425SEric.Schrock@Sun.COM else 26519425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 26529425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26539425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26549425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26559425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26569425SEric.Schrock@Sun.COM spa_load_spares(spa); 26579425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26589425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 26599425SEric.Schrock@Sun.COM } 26609425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26619425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 26629425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 26639425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26649425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26659425SEric.Schrock@Sun.COM else 26669425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26679425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26689425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26699425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26709425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26719425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 26729425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26739425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 26749425SEric.Schrock@Sun.COM } 26759425SEric.Schrock@Sun.COM 267610672SEric.Schrock@Sun.COM /* 267710672SEric.Schrock@Sun.COM * Check for any removed devices. 267810672SEric.Schrock@Sun.COM */ 267910672SEric.Schrock@Sun.COM if (spa->spa_autoreplace) { 268010672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 268110672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 268210672SEric.Schrock@Sun.COM } 268310672SEric.Schrock@Sun.COM 26849425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 26859425SEric.Schrock@Sun.COM /* 26869425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 26879425SEric.Schrock@Sun.COM */ 268810100SLin.Ling@Sun.COM spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 26899425SEric.Schrock@Sun.COM } 26909425SEric.Schrock@Sun.COM 26919816SGeorge.Wilson@Sun.COM /* 26929816SGeorge.Wilson@Sun.COM * It's possible that the pool was expanded while it was exported. 26939816SGeorge.Wilson@Sun.COM * We kick off an async task to handle this for us. 26949816SGeorge.Wilson@Sun.COM */ 26959816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 26969816SGeorge.Wilson@Sun.COM 26979425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26989946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 26999425SEric.Schrock@Sun.COM 27009425SEric.Schrock@Sun.COM return (0); 27016643Seschrock } 27026643Seschrock 27036643Seschrock 2704789Sahrens /* 2705789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2706789Sahrens * to get the vdev stats associated with the imported devices. 2707789Sahrens */ 2708789Sahrens #define TRYIMPORT_NAME "$import" 2709789Sahrens 2710789Sahrens nvlist_t * 2711789Sahrens spa_tryimport(nvlist_t *tryconfig) 2712789Sahrens { 2713789Sahrens nvlist_t *config = NULL; 2714789Sahrens char *poolname; 2715789Sahrens spa_t *spa; 2716789Sahrens uint64_t state; 27178680SLin.Ling@Sun.COM int error; 2718789Sahrens 2719789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2720789Sahrens return (NULL); 2721789Sahrens 2722789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2723789Sahrens return (NULL); 2724789Sahrens 27251635Sbonwick /* 27261635Sbonwick * Create and initialize the spa structure. 27271635Sbonwick */ 2728789Sahrens mutex_enter(&spa_namespace_lock); 27291635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 27308241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 2731789Sahrens 2732789Sahrens /* 27331635Sbonwick * Pass off the heavy lifting to spa_load(). 27341732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 27351732Sbonwick * is actually the one to trust when doing an import. 2736789Sahrens */ 27378680SLin.Ling@Sun.COM error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2738789Sahrens 2739789Sahrens /* 2740789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2741789Sahrens */ 2742789Sahrens if (spa->spa_root_vdev != NULL) { 2743789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2744789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2745789Sahrens poolname) == 0); 2746789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2747789Sahrens state) == 0); 27483975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 27493975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 27502082Seschrock 27512082Seschrock /* 27526423Sgw25295 * If the bootfs property exists on this pool then we 27536423Sgw25295 * copy it out so that external consumers can tell which 27546423Sgw25295 * pools are bootable. 27556423Sgw25295 */ 27568680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 27576423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27586423Sgw25295 27596423Sgw25295 /* 27606423Sgw25295 * We have to play games with the name since the 27616423Sgw25295 * pool was opened as TRYIMPORT_NAME. 27626423Sgw25295 */ 27637754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 27646423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 27656423Sgw25295 char *cp; 27666423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27676423Sgw25295 27686423Sgw25295 cp = strchr(tmpname, '/'); 27696423Sgw25295 if (cp == NULL) { 27706423Sgw25295 (void) strlcpy(dsname, tmpname, 27716423Sgw25295 MAXPATHLEN); 27726423Sgw25295 } else { 27736423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 27746423Sgw25295 "%s/%s", poolname, ++cp); 27756423Sgw25295 } 27766423Sgw25295 VERIFY(nvlist_add_string(config, 27776423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 27786423Sgw25295 kmem_free(dsname, MAXPATHLEN); 27796423Sgw25295 } 27806423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 27816423Sgw25295 } 27826423Sgw25295 27836423Sgw25295 /* 27845450Sbrendan * Add the list of hot spares and level 2 cache devices. 27852082Seschrock */ 27869425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 27872082Seschrock spa_add_spares(spa, config); 27885450Sbrendan spa_add_l2cache(spa, config); 27899425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2790789Sahrens } 2791789Sahrens 2792789Sahrens spa_unload(spa); 2793789Sahrens spa_deactivate(spa); 2794789Sahrens spa_remove(spa); 2795789Sahrens mutex_exit(&spa_namespace_lock); 2796789Sahrens 2797789Sahrens return (config); 2798789Sahrens } 2799789Sahrens 2800789Sahrens /* 2801789Sahrens * Pool export/destroy 2802789Sahrens * 2803789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2804789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2805789Sahrens * update the pool state and sync all the labels to disk, removing the 28068211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 28078211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 2808789Sahrens */ 2809789Sahrens static int 28107214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 28118211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 2812789Sahrens { 2813789Sahrens spa_t *spa; 2814789Sahrens 28151775Sbillm if (oldconfig) 28161775Sbillm *oldconfig = NULL; 28171775Sbillm 28188241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 2819789Sahrens return (EROFS); 2820789Sahrens 2821789Sahrens mutex_enter(&spa_namespace_lock); 2822789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2823789Sahrens mutex_exit(&spa_namespace_lock); 2824789Sahrens return (ENOENT); 2825789Sahrens } 2826789Sahrens 2827789Sahrens /* 28281544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 28291544Seschrock * reacquire the namespace lock, and see if we can export. 28301544Seschrock */ 28311544Seschrock spa_open_ref(spa, FTAG); 28321544Seschrock mutex_exit(&spa_namespace_lock); 28331544Seschrock spa_async_suspend(spa); 28341544Seschrock mutex_enter(&spa_namespace_lock); 28351544Seschrock spa_close(spa, FTAG); 28361544Seschrock 28371544Seschrock /* 2838789Sahrens * The pool will be in core if it's openable, 2839789Sahrens * in which case we can modify its state. 2840789Sahrens */ 2841789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2842789Sahrens /* 2843789Sahrens * Objsets may be open only because they're dirty, so we 2844789Sahrens * have to force it to sync before checking spa_refcnt. 2845789Sahrens */ 2846789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2847789Sahrens 28481544Seschrock /* 28491544Seschrock * A pool cannot be exported or destroyed if there are active 28501544Seschrock * references. If we are resetting a pool, allow references by 28511544Seschrock * fault injection handlers. 28521544Seschrock */ 28531544Seschrock if (!spa_refcount_zero(spa) || 28541544Seschrock (spa->spa_inject_ref != 0 && 28551544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 28561544Seschrock spa_async_resume(spa); 2857789Sahrens mutex_exit(&spa_namespace_lock); 2858789Sahrens return (EBUSY); 2859789Sahrens } 2860789Sahrens 2861789Sahrens /* 28627214Slling * A pool cannot be exported if it has an active shared spare. 28637214Slling * This is to prevent other pools stealing the active spare 28647214Slling * from an exported pool. At user's own will, such pool can 28657214Slling * be forcedly exported. 28667214Slling */ 28677214Slling if (!force && new_state == POOL_STATE_EXPORTED && 28687214Slling spa_has_active_shared_spare(spa)) { 28697214Slling spa_async_resume(spa); 28707214Slling mutex_exit(&spa_namespace_lock); 28717214Slling return (EXDEV); 28727214Slling } 28737214Slling 28747214Slling /* 2875789Sahrens * We want this to be reflected on every label, 2876789Sahrens * so mark them all dirty. spa_unload() will do the 2877789Sahrens * final sync that pushes these changes out. 2878789Sahrens */ 28798211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 28807754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28811544Seschrock spa->spa_state = new_state; 28821635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 28831544Seschrock vdev_config_dirty(spa->spa_root_vdev); 28847754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 28851544Seschrock } 2886789Sahrens } 2887789Sahrens 28884451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28894451Seschrock 2890789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2891789Sahrens spa_unload(spa); 2892789Sahrens spa_deactivate(spa); 2893789Sahrens } 2894789Sahrens 28951775Sbillm if (oldconfig && spa->spa_config) 28961775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 28971775Sbillm 28981544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 28998211SGeorge.Wilson@Sun.COM if (!hardforce) 29008211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 29011544Seschrock spa_remove(spa); 29021544Seschrock } 2903789Sahrens mutex_exit(&spa_namespace_lock); 2904789Sahrens 2905789Sahrens return (0); 2906789Sahrens } 2907789Sahrens 2908789Sahrens /* 2909789Sahrens * Destroy a storage pool. 2910789Sahrens */ 2911789Sahrens int 2912789Sahrens spa_destroy(char *pool) 2913789Sahrens { 29148211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 29158211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 2916789Sahrens } 2917789Sahrens 2918789Sahrens /* 2919789Sahrens * Export a storage pool. 2920789Sahrens */ 2921789Sahrens int 29228211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 29238211SGeorge.Wilson@Sun.COM boolean_t hardforce) 2924789Sahrens { 29258211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 29268211SGeorge.Wilson@Sun.COM force, hardforce)); 2927789Sahrens } 2928789Sahrens 2929789Sahrens /* 29301544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 29311544Seschrock * from the namespace in any way. 29321544Seschrock */ 29331544Seschrock int 29341544Seschrock spa_reset(char *pool) 29351544Seschrock { 29367214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 29378211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 29381544Seschrock } 29391544Seschrock 29401544Seschrock /* 2941789Sahrens * ========================================================================== 2942789Sahrens * Device manipulation 2943789Sahrens * ========================================================================== 2944789Sahrens */ 2945789Sahrens 2946789Sahrens /* 29474527Sperrin * Add a device to a storage pool. 2948789Sahrens */ 2949789Sahrens int 2950789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2951789Sahrens { 295210594SGeorge.Wilson@Sun.COM uint64_t txg, id; 29538241SJeff.Bonwick@Sun.COM int error; 2954789Sahrens vdev_t *rvd = spa->spa_root_vdev; 29551585Sbonwick vdev_t *vd, *tvd; 29565450Sbrendan nvlist_t **spares, **l2cache; 29575450Sbrendan uint_t nspares, nl2cache; 2958789Sahrens 2959789Sahrens txg = spa_vdev_enter(spa); 2960789Sahrens 29612082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 29622082Seschrock VDEV_ALLOC_ADD)) != 0) 29632082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 29642082Seschrock 29657754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 2966789Sahrens 29675450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 29685450Sbrendan &nspares) != 0) 29692082Seschrock nspares = 0; 29702082Seschrock 29715450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 29725450Sbrendan &nl2cache) != 0) 29735450Sbrendan nl2cache = 0; 29745450Sbrendan 29757754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 29762082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 29777754SJeff.Bonwick@Sun.COM 29787754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 29797754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 29807754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 29812082Seschrock 29823377Seschrock /* 29835450Sbrendan * We must validate the spares and l2cache devices after checking the 29845450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 29853377Seschrock */ 29867754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 29873377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 29883377Seschrock 29893377Seschrock /* 29903377Seschrock * Transfer each new top-level vdev from vd to rvd. 29913377Seschrock */ 29928241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 299310594SGeorge.Wilson@Sun.COM 299410594SGeorge.Wilson@Sun.COM /* 299510594SGeorge.Wilson@Sun.COM * Set the vdev id to the first hole, if one exists. 299610594SGeorge.Wilson@Sun.COM */ 299710594SGeorge.Wilson@Sun.COM for (id = 0; id < rvd->vdev_children; id++) { 299810594SGeorge.Wilson@Sun.COM if (rvd->vdev_child[id]->vdev_ishole) { 299910594SGeorge.Wilson@Sun.COM vdev_free(rvd->vdev_child[id]); 300010594SGeorge.Wilson@Sun.COM break; 300110594SGeorge.Wilson@Sun.COM } 300210594SGeorge.Wilson@Sun.COM } 30033377Seschrock tvd = vd->vdev_child[c]; 30043377Seschrock vdev_remove_child(vd, tvd); 300510594SGeorge.Wilson@Sun.COM tvd->vdev_id = id; 30063377Seschrock vdev_add_child(rvd, tvd); 30073377Seschrock vdev_config_dirty(tvd); 30083377Seschrock } 30093377Seschrock 30102082Seschrock if (nspares != 0) { 30115450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 30125450Sbrendan ZPOOL_CONFIG_SPARES); 30132082Seschrock spa_load_spares(spa); 30145450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 30155450Sbrendan } 30165450Sbrendan 30175450Sbrendan if (nl2cache != 0) { 30185450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 30195450Sbrendan ZPOOL_CONFIG_L2CACHE); 30205450Sbrendan spa_load_l2cache(spa); 30215450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3022789Sahrens } 3023789Sahrens 3024789Sahrens /* 30251585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 30261585Sbonwick * If other threads start allocating from these vdevs before we 30271585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 30281585Sbonwick * fail to open the pool because there are DVAs that the config cache 30291585Sbonwick * can't translate. Therefore, we first add the vdevs without 30301585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 30311635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 30321585Sbonwick * 30331585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 30341585Sbonwick * if we lose power at any point in this sequence, the remaining 30351585Sbonwick * steps will be completed the next time we load the pool. 3036789Sahrens */ 30371635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 30381585Sbonwick 30391635Sbonwick mutex_enter(&spa_namespace_lock); 30401635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 30411635Sbonwick mutex_exit(&spa_namespace_lock); 3042789Sahrens 30431635Sbonwick return (0); 3044789Sahrens } 3045789Sahrens 3046789Sahrens /* 3047789Sahrens * Attach a device to a mirror. The arguments are the path to any device 3048789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3049789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3050789Sahrens * 3051789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3052789Sahrens * existing device; in this case the two devices are made into their own 30534451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3054789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3055789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3056789Sahrens * completion of resilvering, the first disk (the one being replaced) 3057789Sahrens * is automatically detached. 3058789Sahrens */ 3059789Sahrens int 30601544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3061789Sahrens { 3062789Sahrens uint64_t txg, open_txg; 3063789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3064789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 30652082Seschrock vdev_ops_t *pvops; 30667313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 30677313SEric.Kustarz@Sun.COM int newvd_isspare; 30687313SEric.Kustarz@Sun.COM int error; 3069789Sahrens 3070789Sahrens txg = spa_vdev_enter(spa); 3071789Sahrens 30726643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3073789Sahrens 3074789Sahrens if (oldvd == NULL) 3075789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3076789Sahrens 30771585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 30781585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30791585Sbonwick 3080789Sahrens pvd = oldvd->vdev_parent; 3081789Sahrens 30822082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30834451Seschrock VDEV_ALLOC_ADD)) != 0) 30844451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30854451Seschrock 30864451Seschrock if (newrootvd->vdev_children != 1) 3087789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3088789Sahrens 3089789Sahrens newvd = newrootvd->vdev_child[0]; 3090789Sahrens 3091789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3092789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3093789Sahrens 30942082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3095789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3096789Sahrens 30974527Sperrin /* 30984527Sperrin * Spares can't replace logs 30994527Sperrin */ 31007326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 31014527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 31024527Sperrin 31032082Seschrock if (!replacing) { 31042082Seschrock /* 31052082Seschrock * For attach, the only allowable parent is a mirror or the root 31062082Seschrock * vdev. 31072082Seschrock */ 31082082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 31092082Seschrock pvd->vdev_ops != &vdev_root_ops) 31102082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 31112082Seschrock 31122082Seschrock pvops = &vdev_mirror_ops; 31132082Seschrock } else { 31142082Seschrock /* 31152082Seschrock * Active hot spares can only be replaced by inactive hot 31162082Seschrock * spares. 31172082Seschrock */ 31182082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 31192082Seschrock pvd->vdev_child[1] == oldvd && 31202082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 31212082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 31222082Seschrock 31232082Seschrock /* 31242082Seschrock * If the source is a hot spare, and the parent isn't already a 31252082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 31263377Seschrock * want to create a replacing vdev. The user is not allowed to 31273377Seschrock * attach to a spared vdev child unless the 'isspare' state is 31283377Seschrock * the same (spare replaces spare, non-spare replaces 31293377Seschrock * non-spare). 31302082Seschrock */ 31312082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 31322082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 31333377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 31343377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 31353377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 31362082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 31372082Seschrock newvd->vdev_isspare) 31382082Seschrock pvops = &vdev_spare_ops; 31392082Seschrock else 31402082Seschrock pvops = &vdev_replacing_ops; 31412082Seschrock } 31422082Seschrock 31431175Slling /* 31449816SGeorge.Wilson@Sun.COM * Make sure the new device is big enough. 31451175Slling */ 31469816SGeorge.Wilson@Sun.COM if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3147789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3148789Sahrens 31491732Sbonwick /* 31501732Sbonwick * The new device cannot have a higher alignment requirement 31511732Sbonwick * than the top-level vdev. 31521732Sbonwick */ 31531732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3154789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3155789Sahrens 3156789Sahrens /* 3157789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3158789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3159789Sahrens */ 3160789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3161789Sahrens spa_strfree(oldvd->vdev_path); 3162789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3163789Sahrens KM_SLEEP); 3164789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3165789Sahrens newvd->vdev_path, "old"); 3166789Sahrens if (oldvd->vdev_devid != NULL) { 3167789Sahrens spa_strfree(oldvd->vdev_devid); 3168789Sahrens oldvd->vdev_devid = NULL; 3169789Sahrens } 3170789Sahrens } 3171789Sahrens 3172789Sahrens /* 31732082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 31742082Seschrock * mirror/replacing/spare vdev above oldvd. 3175789Sahrens */ 3176789Sahrens if (pvd->vdev_ops != pvops) 3177789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3178789Sahrens 3179789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3180789Sahrens ASSERT(pvd->vdev_ops == pvops); 3181789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3182789Sahrens 3183789Sahrens /* 3184789Sahrens * Extract the new device from its root and add it to pvd. 3185789Sahrens */ 3186789Sahrens vdev_remove_child(newrootvd, newvd); 3187789Sahrens newvd->vdev_id = pvd->vdev_children; 318810594SGeorge.Wilson@Sun.COM newvd->vdev_crtxg = oldvd->vdev_crtxg; 3189789Sahrens vdev_add_child(pvd, newvd); 3190789Sahrens 3191789Sahrens tvd = newvd->vdev_top; 3192789Sahrens ASSERT(pvd->vdev_top == tvd); 3193789Sahrens ASSERT(tvd->vdev_parent == rvd); 3194789Sahrens 3195789Sahrens vdev_config_dirty(tvd); 3196789Sahrens 3197789Sahrens /* 3198789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3199789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3200789Sahrens */ 3201789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3202789Sahrens 32038241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 32048241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3205789Sahrens 32069425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 32073377Seschrock spa_spare_activate(newvd); 32089425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 32099425SEric.Schrock@Sun.COM } 32109425SEric.Schrock@Sun.COM 32117754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 32127754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 32137313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 32141544Seschrock 3215789Sahrens /* 3216789Sahrens * Mark newvd's DTL dirty in this txg. 3217789Sahrens */ 32181732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3219789Sahrens 3220789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3221789Sahrens 32229946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 32239946SMark.Musante@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 32249946SMark.Musante@Sun.COM replacing && newvd_isspare ? "spare in" : 32259946SMark.Musante@Sun.COM replacing ? "replace" : "attach", newvdpath, 32269946SMark.Musante@Sun.COM replacing ? "for" : "to", oldvdpath); 32277313SEric.Kustarz@Sun.COM 32287313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 32297313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 32307313SEric.Kustarz@Sun.COM 3231789Sahrens /* 32327046Sahrens * Kick off a resilver to update newvd. 3233789Sahrens */ 32347046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3235789Sahrens 3236789Sahrens return (0); 3237789Sahrens } 3238789Sahrens 3239789Sahrens /* 3240789Sahrens * Detach a device from a mirror or replacing vdev. 3241789Sahrens * If 'replace_done' is specified, only detach if the parent 3242789Sahrens * is a replacing vdev. 3243789Sahrens */ 3244789Sahrens int 32458241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3246789Sahrens { 3247789Sahrens uint64_t txg; 32488241SJeff.Bonwick@Sun.COM int error; 3249789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3250789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 32512082Seschrock boolean_t unspare = B_FALSE; 32522082Seschrock uint64_t unspare_guid; 32536673Seschrock size_t len; 3254789Sahrens 3255789Sahrens txg = spa_vdev_enter(spa); 3256789Sahrens 32576643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3258789Sahrens 3259789Sahrens if (vd == NULL) 3260789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3261789Sahrens 32621585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 32631585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32641585Sbonwick 3265789Sahrens pvd = vd->vdev_parent; 3266789Sahrens 3267789Sahrens /* 32688241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 32698241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32708241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 32718241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 32728241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 32738241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 32748241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 32758241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 32768241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 32778241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 32788241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 32798241SJeff.Bonwick@Sun.COM */ 32808241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 32818241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32828241SJeff.Bonwick@Sun.COM 32838241SJeff.Bonwick@Sun.COM /* 3284789Sahrens * If replace_done is specified, only remove this device if it's 32852082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 32862082Seschrock * disk can be removed. 3287789Sahrens */ 32882082Seschrock if (replace_done) { 32892082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 32902082Seschrock if (vd->vdev_id != 0) 32912082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32922082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 32932082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32942082Seschrock } 32952082Seschrock } 32962082Seschrock 32972082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 32984577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3299789Sahrens 3300789Sahrens /* 33012082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3302789Sahrens */ 3303789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 33042082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 33052082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3306789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3307789Sahrens 3308789Sahrens /* 33098241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 33108241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3311789Sahrens */ 33128241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3313789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3314789Sahrens 33158241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 33168241SJeff.Bonwick@Sun.COM 3317789Sahrens /* 33186673Seschrock * If we are detaching the second disk from a replacing vdev, then 33196673Seschrock * check to see if we changed the original vdev's path to have "/old" 33206673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 33216673Seschrock */ 33226673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 33236673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 33246673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 33256673Seschrock ASSERT(pvd->vdev_child[1] == vd); 33266673Seschrock cvd = pvd->vdev_child[0]; 33276673Seschrock len = strlen(vd->vdev_path); 33286673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 33296673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 33306673Seschrock spa_strfree(cvd->vdev_path); 33316673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 33326673Seschrock } 33336673Seschrock } 33346673Seschrock 33356673Seschrock /* 33362082Seschrock * If we are detaching the original disk from a spare, then it implies 33372082Seschrock * that the spare should become a real disk, and be removed from the 33382082Seschrock * active spare list for the pool. 33392082Seschrock */ 33402082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 33418241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 33422082Seschrock unspare = B_TRUE; 33432082Seschrock 33442082Seschrock /* 3345789Sahrens * Erase the disk labels so the disk can be used for other things. 3346789Sahrens * This must be done after all other error cases are handled, 3347789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3348789Sahrens * But if we can't do it, don't treat the error as fatal -- 3349789Sahrens * it may be that the unwritability of the disk is the reason 3350789Sahrens * it's being detached! 3351789Sahrens */ 33523377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3353789Sahrens 3354789Sahrens /* 3355789Sahrens * Remove vd from its parent and compact the parent's children. 3356789Sahrens */ 3357789Sahrens vdev_remove_child(pvd, vd); 3358789Sahrens vdev_compact_children(pvd); 3359789Sahrens 3360789Sahrens /* 3361789Sahrens * Remember one of the remaining children so we can get tvd below. 3362789Sahrens */ 3363789Sahrens cvd = pvd->vdev_child[0]; 3364789Sahrens 3365789Sahrens /* 33662082Seschrock * If we need to remove the remaining child from the list of hot spares, 33678241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 33688241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 33698241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 33708241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 33718241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 33728241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 33732082Seschrock */ 33742082Seschrock if (unspare) { 33752082Seschrock ASSERT(cvd->vdev_isspare); 33763377Seschrock spa_spare_remove(cvd); 33772082Seschrock unspare_guid = cvd->vdev_guid; 33788241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33792082Seschrock } 33802082Seschrock 33812082Seschrock /* 3382789Sahrens * If the parent mirror/replacing vdev only has one child, 3383789Sahrens * the parent is no longer needed. Remove it from the tree. 3384789Sahrens */ 3385789Sahrens if (pvd->vdev_children == 1) 3386789Sahrens vdev_remove_parent(cvd); 3387789Sahrens 3388789Sahrens /* 3389789Sahrens * We don't set tvd until now because the parent we just removed 3390789Sahrens * may have been the previous top-level vdev. 3391789Sahrens */ 3392789Sahrens tvd = cvd->vdev_top; 3393789Sahrens ASSERT(tvd->vdev_parent == rvd); 3394789Sahrens 3395789Sahrens /* 33963377Seschrock * Reevaluate the parent vdev state. 3397789Sahrens */ 33984451Seschrock vdev_propagate_state(cvd); 3399789Sahrens 3400789Sahrens /* 34019816SGeorge.Wilson@Sun.COM * If the 'autoexpand' property is set on the pool then automatically 34029816SGeorge.Wilson@Sun.COM * try to expand the size of the pool. For example if the device we 34039816SGeorge.Wilson@Sun.COM * just detached was smaller than the others, it may be possible to 34049816SGeorge.Wilson@Sun.COM * add metaslabs (i.e. grow the pool). We need to reopen the vdev 34059816SGeorge.Wilson@Sun.COM * first so that we can obtain the updated sizes of the leaf vdevs. 3406789Sahrens */ 34079816SGeorge.Wilson@Sun.COM if (spa->spa_autoexpand) { 34089816SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 34099816SGeorge.Wilson@Sun.COM vdev_expand(tvd, txg); 34109816SGeorge.Wilson@Sun.COM } 3411789Sahrens 3412789Sahrens vdev_config_dirty(tvd); 3413789Sahrens 3414789Sahrens /* 34153377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 34163377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 34173377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 34183377Seschrock * prevent vd from being accessed after it's freed. 3419789Sahrens */ 34208241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3421789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 34221732Sbonwick vd->vdev_detached = B_TRUE; 34231732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3424789Sahrens 34254451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 34264451Seschrock 34272082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 34282082Seschrock 34292082Seschrock /* 34303377Seschrock * If this was the removal of the original device in a hot spare vdev, 34313377Seschrock * then we want to go through and remove the device from the hot spare 34323377Seschrock * list of every other pool. 34332082Seschrock */ 34342082Seschrock if (unspare) { 34358241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 34362082Seschrock spa = NULL; 34372082Seschrock mutex_enter(&spa_namespace_lock); 34382082Seschrock while ((spa = spa_next(spa)) != NULL) { 34392082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 34402082Seschrock continue; 34418241SJeff.Bonwick@Sun.COM if (spa == myspa) 34428241SJeff.Bonwick@Sun.COM continue; 34437793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 34447793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 34452082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 34467793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 34477793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 34482082Seschrock } 34492082Seschrock mutex_exit(&spa_namespace_lock); 34502082Seschrock } 34512082Seschrock 34522082Seschrock return (error); 34532082Seschrock } 34542082Seschrock 34557754SJeff.Bonwick@Sun.COM static nvlist_t * 34567754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 34572082Seschrock { 34587754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 34597754SJeff.Bonwick@Sun.COM uint64_t guid; 34607754SJeff.Bonwick@Sun.COM 34617754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 34627754SJeff.Bonwick@Sun.COM &guid) == 0); 34637754SJeff.Bonwick@Sun.COM 34647754SJeff.Bonwick@Sun.COM if (guid == target_guid) 34657754SJeff.Bonwick@Sun.COM return (nvpp[i]); 34662082Seschrock } 34672082Seschrock 34687754SJeff.Bonwick@Sun.COM return (NULL); 34695450Sbrendan } 34705450Sbrendan 34717754SJeff.Bonwick@Sun.COM static void 34727754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 34737754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 34745450Sbrendan { 34757754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 34767754SJeff.Bonwick@Sun.COM 34777754SJeff.Bonwick@Sun.COM if (count > 1) 34787754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 34797754SJeff.Bonwick@Sun.COM 34807754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 34817754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 34827754SJeff.Bonwick@Sun.COM continue; 34837754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 34845450Sbrendan } 34855450Sbrendan 34867754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 34877754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 34887754SJeff.Bonwick@Sun.COM 34897754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 34907754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 34917754SJeff.Bonwick@Sun.COM 34927754SJeff.Bonwick@Sun.COM if (count > 1) 34937754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 34945450Sbrendan } 34955450Sbrendan 34965450Sbrendan /* 349710594SGeorge.Wilson@Sun.COM * Removing a device from the vdev namespace requires several steps 349810594SGeorge.Wilson@Sun.COM * and can take a significant amount of time. As a result we use 349910594SGeorge.Wilson@Sun.COM * the spa_vdev_config_[enter/exit] functions which allow us to 350010594SGeorge.Wilson@Sun.COM * grab and release the spa_config_lock while still holding the namespace 350110594SGeorge.Wilson@Sun.COM * lock. During each step the configuration is synced out. 350210594SGeorge.Wilson@Sun.COM */ 350310594SGeorge.Wilson@Sun.COM 350410594SGeorge.Wilson@Sun.COM /* 350510594SGeorge.Wilson@Sun.COM * Initial phase of device removal - stop future allocations from this device. 350610594SGeorge.Wilson@Sun.COM */ 350710594SGeorge.Wilson@Sun.COM void 350810594SGeorge.Wilson@Sun.COM spa_vdev_remove_start(spa_t *spa, vdev_t *vd) 350910594SGeorge.Wilson@Sun.COM { 351010594SGeorge.Wilson@Sun.COM metaslab_group_t *mg = vd->vdev_mg; 351110594SGeorge.Wilson@Sun.COM 351210594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 351310594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 351410594SGeorge.Wilson@Sun.COM 351510594SGeorge.Wilson@Sun.COM /* 351610594SGeorge.Wilson@Sun.COM * Remove our vdev from the allocatable vdevs 351710594SGeorge.Wilson@Sun.COM */ 351810594SGeorge.Wilson@Sun.COM if (mg) 351910594SGeorge.Wilson@Sun.COM metaslab_class_remove(mg->mg_class, mg); 352010594SGeorge.Wilson@Sun.COM } 352110594SGeorge.Wilson@Sun.COM 352210594SGeorge.Wilson@Sun.COM /* 352310594SGeorge.Wilson@Sun.COM * Evacuate the device. 352410594SGeorge.Wilson@Sun.COM */ 352510594SGeorge.Wilson@Sun.COM int 352610594SGeorge.Wilson@Sun.COM spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 352710594SGeorge.Wilson@Sun.COM { 352810594SGeorge.Wilson@Sun.COM uint64_t txg; 352910594SGeorge.Wilson@Sun.COM int error; 353010594SGeorge.Wilson@Sun.COM 353110594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 353210594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 353310594SGeorge.Wilson@Sun.COM 353410594SGeorge.Wilson@Sun.COM /* 353510594SGeorge.Wilson@Sun.COM * Evacuate the device. We don't hold the config lock as writer 353610594SGeorge.Wilson@Sun.COM * since we need to do I/O but we do keep the 353710594SGeorge.Wilson@Sun.COM * spa_namespace_lock held. Once this completes the device 353810594SGeorge.Wilson@Sun.COM * should no longer have any blocks allocated on it. 353910594SGeorge.Wilson@Sun.COM */ 354010594SGeorge.Wilson@Sun.COM if (vd->vdev_islog) { 354110594SGeorge.Wilson@Sun.COM /* 354210594SGeorge.Wilson@Sun.COM * Evacuate the device. 354310594SGeorge.Wilson@Sun.COM */ 354410594SGeorge.Wilson@Sun.COM if (error = dmu_objset_find(spa_name(spa), 354510594SGeorge.Wilson@Sun.COM zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { 354610594SGeorge.Wilson@Sun.COM uint64_t txg; 354710594SGeorge.Wilson@Sun.COM 354810594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 354910594SGeorge.Wilson@Sun.COM metaslab_class_add(spa->spa_log_class, 355010594SGeorge.Wilson@Sun.COM vd->vdev_mg); 355110594SGeorge.Wilson@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 355210594SGeorge.Wilson@Sun.COM } 355310594SGeorge.Wilson@Sun.COM txg_wait_synced(spa_get_dsl(spa), 0); 355410594SGeorge.Wilson@Sun.COM } 355510594SGeorge.Wilson@Sun.COM 355610594SGeorge.Wilson@Sun.COM /* 355710594SGeorge.Wilson@Sun.COM * Remove any remaining MOS metadata associated with the device. 355810594SGeorge.Wilson@Sun.COM */ 355910594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 356010594SGeorge.Wilson@Sun.COM vd->vdev_removing = B_TRUE; 356110594SGeorge.Wilson@Sun.COM vdev_dirty(vd, 0, NULL, txg); 356210594SGeorge.Wilson@Sun.COM vdev_config_dirty(vd); 356310594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 356410594SGeorge.Wilson@Sun.COM 356510594SGeorge.Wilson@Sun.COM return (0); 356610594SGeorge.Wilson@Sun.COM } 356710594SGeorge.Wilson@Sun.COM 356810594SGeorge.Wilson@Sun.COM /* 356910594SGeorge.Wilson@Sun.COM * Complete the removal by cleaning up the namespace. 357010594SGeorge.Wilson@Sun.COM */ 357110594SGeorge.Wilson@Sun.COM void 357210594SGeorge.Wilson@Sun.COM spa_vdev_remove_done(spa_t *spa, vdev_t *vd) 357310594SGeorge.Wilson@Sun.COM { 357410594SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 357510594SGeorge.Wilson@Sun.COM metaslab_group_t *mg = vd->vdev_mg; 357610594SGeorge.Wilson@Sun.COM uint64_t id = vd->vdev_id; 357710594SGeorge.Wilson@Sun.COM boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 357810594SGeorge.Wilson@Sun.COM 357910594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 358010594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 358110594SGeorge.Wilson@Sun.COM 358210594SGeorge.Wilson@Sun.COM (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 358310594SGeorge.Wilson@Sun.COM vdev_free(vd); 358410594SGeorge.Wilson@Sun.COM 358510594SGeorge.Wilson@Sun.COM /* 358610594SGeorge.Wilson@Sun.COM * It's possible that another thread is trying todo a spa_vdev_add() 358710594SGeorge.Wilson@Sun.COM * at the same time we're trying remove it. As a result the 358810594SGeorge.Wilson@Sun.COM * added vdev may not have initialized its metaslabs yet. 358910594SGeorge.Wilson@Sun.COM */ 359010594SGeorge.Wilson@Sun.COM if (mg != NULL) 359110594SGeorge.Wilson@Sun.COM metaslab_group_destroy(mg); 359210594SGeorge.Wilson@Sun.COM 359310594SGeorge.Wilson@Sun.COM if (last_vdev) { 359410594SGeorge.Wilson@Sun.COM vdev_compact_children(rvd); 359510594SGeorge.Wilson@Sun.COM } else { 359610594SGeorge.Wilson@Sun.COM vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 359710594SGeorge.Wilson@Sun.COM vdev_add_child(rvd, vd); 359810594SGeorge.Wilson@Sun.COM } 359910594SGeorge.Wilson@Sun.COM vdev_config_dirty(rvd); 360010594SGeorge.Wilson@Sun.COM 360110594SGeorge.Wilson@Sun.COM /* 360210594SGeorge.Wilson@Sun.COM * Reassess the health of our root vdev. 360310594SGeorge.Wilson@Sun.COM */ 360410594SGeorge.Wilson@Sun.COM vdev_reopen(rvd); 360510594SGeorge.Wilson@Sun.COM } 360610594SGeorge.Wilson@Sun.COM 360710594SGeorge.Wilson@Sun.COM /* 36085450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 360910594SGeorge.Wilson@Sun.COM * spares, slogs, and level 2 ARC devices. 36105450Sbrendan */ 36115450Sbrendan int 36125450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 36135450Sbrendan { 36145450Sbrendan vdev_t *vd; 36157754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 361610594SGeorge.Wilson@Sun.COM uint64_t txg = 0; 36175450Sbrendan uint_t nspares, nl2cache; 36185450Sbrendan int error = 0; 36198241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 36208241SJeff.Bonwick@Sun.COM 36218241SJeff.Bonwick@Sun.COM if (!locked) 36228241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 36235450Sbrendan 36246643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 36255450Sbrendan 36265450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 36275450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 36287754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 36297754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 36307754SJeff.Bonwick@Sun.COM /* 36317754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 36327754SJeff.Bonwick@Sun.COM * in this pool. 36337754SJeff.Bonwick@Sun.COM */ 36347754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 36357754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 36367754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 36377754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 36387754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 36397754SJeff.Bonwick@Sun.COM } else { 36407754SJeff.Bonwick@Sun.COM error = EBUSY; 36417754SJeff.Bonwick@Sun.COM } 36427754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 36435450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 36447754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 36457754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 36467754SJeff.Bonwick@Sun.COM /* 36477754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 36487754SJeff.Bonwick@Sun.COM */ 36497754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 36507754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 36515450Sbrendan spa_load_l2cache(spa); 36525450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 365310594SGeorge.Wilson@Sun.COM } else if (vd != NULL && vd->vdev_islog) { 365410594SGeorge.Wilson@Sun.COM ASSERT(!locked); 365510594SGeorge.Wilson@Sun.COM 365610594SGeorge.Wilson@Sun.COM /* 365710594SGeorge.Wilson@Sun.COM * XXX - Once we have bp-rewrite this should 365810594SGeorge.Wilson@Sun.COM * become the common case. 365910594SGeorge.Wilson@Sun.COM */ 366010594SGeorge.Wilson@Sun.COM 366110594SGeorge.Wilson@Sun.COM /* 366210594SGeorge.Wilson@Sun.COM * 1. Stop allocations 366310594SGeorge.Wilson@Sun.COM * 2. Evacuate the device (i.e. kill off stubby and 366410594SGeorge.Wilson@Sun.COM * metadata) and wait for it to complete (i.e. sync). 366510594SGeorge.Wilson@Sun.COM * 3. Cleanup the vdev namespace. 366610594SGeorge.Wilson@Sun.COM */ 366710594SGeorge.Wilson@Sun.COM spa_vdev_remove_start(spa, vd); 366810594SGeorge.Wilson@Sun.COM 366910594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 367010594SGeorge.Wilson@Sun.COM if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) 367110594SGeorge.Wilson@Sun.COM return (error); 367210594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 367310594SGeorge.Wilson@Sun.COM 367410594SGeorge.Wilson@Sun.COM spa_vdev_remove_done(spa, vd); 367510594SGeorge.Wilson@Sun.COM 36767754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 36777754SJeff.Bonwick@Sun.COM /* 36787754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 36797754SJeff.Bonwick@Sun.COM */ 36807754SJeff.Bonwick@Sun.COM error = ENOTSUP; 36817754SJeff.Bonwick@Sun.COM } else { 36827754SJeff.Bonwick@Sun.COM /* 36837754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 36847754SJeff.Bonwick@Sun.COM */ 36857754SJeff.Bonwick@Sun.COM error = ENOENT; 36865450Sbrendan } 36872082Seschrock 36888241SJeff.Bonwick@Sun.COM if (!locked) 36898241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 36908241SJeff.Bonwick@Sun.COM 36918241SJeff.Bonwick@Sun.COM return (error); 3692789Sahrens } 3693789Sahrens 3694789Sahrens /* 36954451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 36964451Seschrock * current spared, so we can detach it. 3697789Sahrens */ 36981544Seschrock static vdev_t * 36994451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3700789Sahrens { 37011544Seschrock vdev_t *newvd, *oldvd; 37029816SGeorge.Wilson@Sun.COM 37039816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 37044451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 37051544Seschrock if (oldvd != NULL) 37061544Seschrock return (oldvd); 37071544Seschrock } 3708789Sahrens 37094451Seschrock /* 37104451Seschrock * Check for a completed replacement. 37114451Seschrock */ 3712789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 37131544Seschrock oldvd = vd->vdev_child[0]; 37141544Seschrock newvd = vd->vdev_child[1]; 3715789Sahrens 37168241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 37178241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 37181544Seschrock return (oldvd); 37191544Seschrock } 3720789Sahrens 37214451Seschrock /* 37224451Seschrock * Check for a completed resilver with the 'unspare' flag set. 37234451Seschrock */ 37244451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 37254451Seschrock newvd = vd->vdev_child[0]; 37264451Seschrock oldvd = vd->vdev_child[1]; 37274451Seschrock 37284451Seschrock if (newvd->vdev_unspare && 37298241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 37308241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 37314451Seschrock newvd->vdev_unspare = 0; 37324451Seschrock return (oldvd); 37334451Seschrock } 37344451Seschrock } 37354451Seschrock 37361544Seschrock return (NULL); 3737789Sahrens } 3738789Sahrens 37391544Seschrock static void 37404451Seschrock spa_vdev_resilver_done(spa_t *spa) 3741789Sahrens { 37428241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 37438241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 37448241SJeff.Bonwick@Sun.COM 37458241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3746789Sahrens 37474451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 37488241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 37498241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 37501544Seschrock guid = vd->vdev_guid; 37518241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 37528241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 37538241SJeff.Bonwick@Sun.COM sguid = 0; 37542082Seschrock /* 37552082Seschrock * If we have just finished replacing a hot spared device, then 37562082Seschrock * we need to detach the parent's first child (the original hot 37572082Seschrock * spare) as well. 37582082Seschrock */ 37598241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 37602082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 37618241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 37628241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 37632082Seschrock } 37648241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 37658241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 37661544Seschrock return; 37678241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 37682082Seschrock return; 37698241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3770789Sahrens } 3771789Sahrens 37728241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 3773789Sahrens } 3774789Sahrens 3775789Sahrens /* 37769425SEric.Schrock@Sun.COM * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 37779425SEric.Schrock@Sun.COM * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 37781354Seschrock */ 37791354Seschrock int 37809425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 37819425SEric.Schrock@Sun.COM boolean_t ispath) 37821354Seschrock { 37836643Seschrock vdev_t *vd; 37841354Seschrock uint64_t txg; 37851354Seschrock 37861354Seschrock txg = spa_vdev_enter(spa); 37871354Seschrock 37889425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 37895450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 37901354Seschrock 37911585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 37921585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37931585Sbonwick 37949425SEric.Schrock@Sun.COM if (ispath) { 37959425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 37969425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 37979425SEric.Schrock@Sun.COM } else { 37989425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 37999425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 38009425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 38019425SEric.Schrock@Sun.COM } 38021354Seschrock 38031354Seschrock vdev_config_dirty(vd->vdev_top); 38041354Seschrock 38051354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 38061354Seschrock } 38071354Seschrock 38089425SEric.Schrock@Sun.COM int 38099425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 38109425SEric.Schrock@Sun.COM { 38119425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 38129425SEric.Schrock@Sun.COM } 38139425SEric.Schrock@Sun.COM 38149425SEric.Schrock@Sun.COM int 38159425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 38169425SEric.Schrock@Sun.COM { 38179425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 38189425SEric.Schrock@Sun.COM } 38199425SEric.Schrock@Sun.COM 38201354Seschrock /* 3821789Sahrens * ========================================================================== 3822789Sahrens * SPA Scrubbing 3823789Sahrens * ========================================================================== 3824789Sahrens */ 3825789Sahrens 38267046Sahrens int 38277046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3828789Sahrens { 38297754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 38304808Sek110237 3831789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3832789Sahrens return (ENOTSUP); 3833789Sahrens 3834789Sahrens /* 38357046Sahrens * If a resilver was requested, but there is no DTL on a 38367046Sahrens * writeable leaf device, we have nothing to do. 3837789Sahrens */ 38387046Sahrens if (type == POOL_SCRUB_RESILVER && 38397046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 38407046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 38411544Seschrock return (0); 38421544Seschrock } 3843789Sahrens 38447046Sahrens if (type == POOL_SCRUB_EVERYTHING && 38457046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 38467046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 38477046Sahrens return (EBUSY); 38487046Sahrens 38497046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 38507046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 38517046Sahrens } else if (type == POOL_SCRUB_NONE) { 38527046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 38531544Seschrock } else { 38547046Sahrens return (EINVAL); 38551544Seschrock } 3856789Sahrens } 3857789Sahrens 38581544Seschrock /* 38591544Seschrock * ========================================================================== 38601544Seschrock * SPA async task processing 38611544Seschrock * ========================================================================== 38621544Seschrock */ 38631544Seschrock 38641544Seschrock static void 38654451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3866789Sahrens { 38677361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 38687361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 38697361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 387010575SEric.Schrock@Sun.COM 387110575SEric.Schrock@Sun.COM /* 387210575SEric.Schrock@Sun.COM * We want to clear the stats, but we don't want to do a full 387310575SEric.Schrock@Sun.COM * vdev_clear() as that will cause us to throw away 387410575SEric.Schrock@Sun.COM * degraded/faulted state as well as attempt to reopen the 387510575SEric.Schrock@Sun.COM * device, all of which is a waste. 387610575SEric.Schrock@Sun.COM */ 387710575SEric.Schrock@Sun.COM vd->vdev_stat.vs_read_errors = 0; 387810575SEric.Schrock@Sun.COM vd->vdev_stat.vs_write_errors = 0; 387910575SEric.Schrock@Sun.COM vd->vdev_stat.vs_checksum_errors = 0; 388010575SEric.Schrock@Sun.COM 38817754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 38821544Seschrock } 38837361SBrendan.Gregg@Sun.COM 38847754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 38857361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 38861544Seschrock } 38871544Seschrock 38881544Seschrock static void 38897754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 38907754SJeff.Bonwick@Sun.COM { 38917754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 38927754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 38937754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 38947754SJeff.Bonwick@Sun.COM } 38957754SJeff.Bonwick@Sun.COM 38967754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 38977754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 38987754SJeff.Bonwick@Sun.COM } 38997754SJeff.Bonwick@Sun.COM 39007754SJeff.Bonwick@Sun.COM static void 39019816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa_t *spa, vdev_t *vd) 39029816SGeorge.Wilson@Sun.COM { 39039816SGeorge.Wilson@Sun.COM sysevent_id_t eid; 39049816SGeorge.Wilson@Sun.COM nvlist_t *attr; 39059816SGeorge.Wilson@Sun.COM char *physpath; 39069816SGeorge.Wilson@Sun.COM 39079816SGeorge.Wilson@Sun.COM if (!spa->spa_autoexpand) 39089816SGeorge.Wilson@Sun.COM return; 39099816SGeorge.Wilson@Sun.COM 39109816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 39119816SGeorge.Wilson@Sun.COM vdev_t *cvd = vd->vdev_child[c]; 39129816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, cvd); 39139816SGeorge.Wilson@Sun.COM } 39149816SGeorge.Wilson@Sun.COM 39159816SGeorge.Wilson@Sun.COM if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 39169816SGeorge.Wilson@Sun.COM return; 39179816SGeorge.Wilson@Sun.COM 39189816SGeorge.Wilson@Sun.COM physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 39199816SGeorge.Wilson@Sun.COM (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 39209816SGeorge.Wilson@Sun.COM 39219816SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 39229816SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 39239816SGeorge.Wilson@Sun.COM 39249816SGeorge.Wilson@Sun.COM (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 39259816SGeorge.Wilson@Sun.COM ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 39269816SGeorge.Wilson@Sun.COM 39279816SGeorge.Wilson@Sun.COM nvlist_free(attr); 39289816SGeorge.Wilson@Sun.COM kmem_free(physpath, MAXPATHLEN); 39299816SGeorge.Wilson@Sun.COM } 39309816SGeorge.Wilson@Sun.COM 39319816SGeorge.Wilson@Sun.COM static void 39321544Seschrock spa_async_thread(spa_t *spa) 39331544Seschrock { 39347754SJeff.Bonwick@Sun.COM int tasks; 39351544Seschrock 39361544Seschrock ASSERT(spa->spa_sync_on); 3937789Sahrens 39381544Seschrock mutex_enter(&spa->spa_async_lock); 39391544Seschrock tasks = spa->spa_async_tasks; 39401544Seschrock spa->spa_async_tasks = 0; 39411544Seschrock mutex_exit(&spa->spa_async_lock); 39421544Seschrock 39431544Seschrock /* 39441635Sbonwick * See if the config needs to be updated. 39451635Sbonwick */ 39461635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 39479816SGeorge.Wilson@Sun.COM uint64_t oldsz, space_update; 39489816SGeorge.Wilson@Sun.COM 39491635Sbonwick mutex_enter(&spa_namespace_lock); 39509816SGeorge.Wilson@Sun.COM oldsz = spa_get_space(spa); 39511635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 39529816SGeorge.Wilson@Sun.COM space_update = spa_get_space(spa) - oldsz; 39531635Sbonwick mutex_exit(&spa_namespace_lock); 39549816SGeorge.Wilson@Sun.COM 39559816SGeorge.Wilson@Sun.COM /* 39569816SGeorge.Wilson@Sun.COM * If the pool grew as a result of the config update, 39579816SGeorge.Wilson@Sun.COM * then log an internal history event. 39589816SGeorge.Wilson@Sun.COM */ 39599816SGeorge.Wilson@Sun.COM if (space_update) { 39609946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 39619946SMark.Musante@Sun.COM spa, NULL, CRED(), 39629946SMark.Musante@Sun.COM "pool '%s' size: %llu(+%llu)", 39639946SMark.Musante@Sun.COM spa_name(spa), spa_get_space(spa), 39649946SMark.Musante@Sun.COM space_update); 39659816SGeorge.Wilson@Sun.COM } 39661635Sbonwick } 39671635Sbonwick 39681635Sbonwick /* 39694451Seschrock * See if any devices need to be marked REMOVED. 39701544Seschrock */ 39717754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 397210685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 39734451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 39747754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 39757361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 39767754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 39777361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 39787754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 39797754SJeff.Bonwick@Sun.COM } 39807754SJeff.Bonwick@Sun.COM 39819816SGeorge.Wilson@Sun.COM if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 39829816SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 39839816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, spa->spa_root_vdev); 39849816SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 39859816SGeorge.Wilson@Sun.COM } 39869816SGeorge.Wilson@Sun.COM 39877754SJeff.Bonwick@Sun.COM /* 39887754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 39897754SJeff.Bonwick@Sun.COM */ 39907754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 399110685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 39927754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 39937754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 39944451Seschrock } 39951544Seschrock 39961544Seschrock /* 39971544Seschrock * If any devices are done replacing, detach them. 39981544Seschrock */ 39994451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 40004451Seschrock spa_vdev_resilver_done(spa); 4001789Sahrens 40021544Seschrock /* 40031544Seschrock * Kick off a resilver. 40041544Seschrock */ 40057046Sahrens if (tasks & SPA_ASYNC_RESILVER) 40067046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 40071544Seschrock 40081544Seschrock /* 40091544Seschrock * Let the world know that we're done. 40101544Seschrock */ 40111544Seschrock mutex_enter(&spa->spa_async_lock); 40121544Seschrock spa->spa_async_thread = NULL; 40131544Seschrock cv_broadcast(&spa->spa_async_cv); 40141544Seschrock mutex_exit(&spa->spa_async_lock); 40151544Seschrock thread_exit(); 40161544Seschrock } 40171544Seschrock 40181544Seschrock void 40191544Seschrock spa_async_suspend(spa_t *spa) 40201544Seschrock { 40211544Seschrock mutex_enter(&spa->spa_async_lock); 40221544Seschrock spa->spa_async_suspended++; 40231544Seschrock while (spa->spa_async_thread != NULL) 40241544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 40251544Seschrock mutex_exit(&spa->spa_async_lock); 40261544Seschrock } 40271544Seschrock 40281544Seschrock void 40291544Seschrock spa_async_resume(spa_t *spa) 40301544Seschrock { 40311544Seschrock mutex_enter(&spa->spa_async_lock); 40321544Seschrock ASSERT(spa->spa_async_suspended != 0); 40331544Seschrock spa->spa_async_suspended--; 40341544Seschrock mutex_exit(&spa->spa_async_lock); 40351544Seschrock } 40361544Seschrock 40371544Seschrock static void 40381544Seschrock spa_async_dispatch(spa_t *spa) 40391544Seschrock { 40401544Seschrock mutex_enter(&spa->spa_async_lock); 40411544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 40421635Sbonwick spa->spa_async_thread == NULL && 40431635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 40441544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 40451544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 40461544Seschrock mutex_exit(&spa->spa_async_lock); 40471544Seschrock } 40481544Seschrock 40491544Seschrock void 40501544Seschrock spa_async_request(spa_t *spa, int task) 40511544Seschrock { 40521544Seschrock mutex_enter(&spa->spa_async_lock); 40531544Seschrock spa->spa_async_tasks |= task; 40541544Seschrock mutex_exit(&spa->spa_async_lock); 4055789Sahrens } 4056789Sahrens 4057789Sahrens /* 4058789Sahrens * ========================================================================== 4059789Sahrens * SPA syncing routines 4060789Sahrens * ========================================================================== 4061789Sahrens */ 4062789Sahrens 4063789Sahrens static void 4064789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 4065789Sahrens { 4066789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 4067789Sahrens dmu_tx_t *tx; 4068789Sahrens blkptr_t blk; 4069789Sahrens uint64_t itor = 0; 4070789Sahrens zio_t *zio; 4071789Sahrens int error; 4072789Sahrens uint8_t c = 1; 4073789Sahrens 40747754SJeff.Bonwick@Sun.COM zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 40757754SJeff.Bonwick@Sun.COM 40767754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 40777754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 40787754SJeff.Bonwick@Sun.COM zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 40797754SJeff.Bonwick@Sun.COM ZIO_FLAG_MUSTSUCCEED)); 40807754SJeff.Bonwick@Sun.COM } 4081789Sahrens 4082789Sahrens error = zio_wait(zio); 4083789Sahrens ASSERT3U(error, ==, 0); 4084789Sahrens 4085789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 4086789Sahrens bplist_vacate(bpl, tx); 4087789Sahrens 4088789Sahrens /* 4089789Sahrens * Pre-dirty the first block so we sync to convergence faster. 4090789Sahrens * (Usually only the first block is needed.) 4091789Sahrens */ 4092789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 4093789Sahrens dmu_tx_commit(tx); 4094789Sahrens } 4095789Sahrens 4096789Sahrens static void 40972082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 40982082Seschrock { 40992082Seschrock char *packed = NULL; 41007497STim.Haley@Sun.COM size_t bufsize; 41012082Seschrock size_t nvsize = 0; 41022082Seschrock dmu_buf_t *db; 41032082Seschrock 41042082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 41052082Seschrock 41067497STim.Haley@Sun.COM /* 41077497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 41087497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 41097497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 41107497STim.Haley@Sun.COM */ 41117497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 41127497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 41132082Seschrock 41142082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 41152082Seschrock KM_SLEEP) == 0); 41167497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 41177497STim.Haley@Sun.COM 41187497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 41197497STim.Haley@Sun.COM 41207497STim.Haley@Sun.COM kmem_free(packed, bufsize); 41212082Seschrock 41222082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 41232082Seschrock dmu_buf_will_dirty(db, tx); 41242082Seschrock *(uint64_t *)db->db_data = nvsize; 41252082Seschrock dmu_buf_rele(db, FTAG); 41262082Seschrock } 41272082Seschrock 41282082Seschrock static void 41295450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 41305450Sbrendan const char *config, const char *entry) 41312082Seschrock { 41322082Seschrock nvlist_t *nvroot; 41335450Sbrendan nvlist_t **list; 41342082Seschrock int i; 41352082Seschrock 41365450Sbrendan if (!sav->sav_sync) 41372082Seschrock return; 41382082Seschrock 41392082Seschrock /* 41405450Sbrendan * Update the MOS nvlist describing the list of available devices. 41415450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 41424451Seschrock * valid and the vdevs are labeled appropriately. 41432082Seschrock */ 41445450Sbrendan if (sav->sav_object == 0) { 41455450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 41465450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 41475450Sbrendan sizeof (uint64_t), tx); 41482082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 41495450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 41505450Sbrendan &sav->sav_object, tx) == 0); 41512082Seschrock } 41522082Seschrock 41532082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 41545450Sbrendan if (sav->sav_count == 0) { 41555450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 41562082Seschrock } else { 41575450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 41585450Sbrendan for (i = 0; i < sav->sav_count; i++) 41595450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 41605450Sbrendan B_FALSE, B_FALSE, B_TRUE); 41615450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 41625450Sbrendan sav->sav_count) == 0); 41635450Sbrendan for (i = 0; i < sav->sav_count; i++) 41645450Sbrendan nvlist_free(list[i]); 41655450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 41662082Seschrock } 41672082Seschrock 41685450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 41692926Sek110237 nvlist_free(nvroot); 41702082Seschrock 41715450Sbrendan sav->sav_sync = B_FALSE; 41722082Seschrock } 41732082Seschrock 41742082Seschrock static void 4175789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 4176789Sahrens { 4177789Sahrens nvlist_t *config; 4178789Sahrens 41797754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 4180789Sahrens return; 4181789Sahrens 41827754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 41837754SJeff.Bonwick@Sun.COM 41847754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 41857754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 41867754SJeff.Bonwick@Sun.COM 41877754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 4188789Sahrens 41891635Sbonwick if (spa->spa_config_syncing) 41901635Sbonwick nvlist_free(spa->spa_config_syncing); 41911635Sbonwick spa->spa_config_syncing = config; 4192789Sahrens 41932082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 4194789Sahrens } 4195789Sahrens 41965094Slling /* 41975094Slling * Set zpool properties. 41985094Slling */ 41993912Slling static void 42004543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 42013912Slling { 42023912Slling spa_t *spa = arg1; 42035094Slling objset_t *mos = spa->spa_meta_objset; 42043912Slling nvlist_t *nvp = arg2; 42055094Slling nvpair_t *elem; 42064451Seschrock uint64_t intval; 42076643Seschrock char *strval; 42085094Slling zpool_prop_t prop; 42095094Slling const char *propname; 42105094Slling zprop_type_t proptype; 42115094Slling 42127754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 42137754SJeff.Bonwick@Sun.COM 42145094Slling elem = NULL; 42155094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 42165094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 42175094Slling case ZPOOL_PROP_VERSION: 42185094Slling /* 42195094Slling * Only set version for non-zpool-creation cases 42205094Slling * (set/import). spa_create() needs special care 42215094Slling * for version setting. 42225094Slling */ 42235094Slling if (tx->tx_txg != TXG_INITIAL) { 42245094Slling VERIFY(nvpair_value_uint64(elem, 42255094Slling &intval) == 0); 42265094Slling ASSERT(intval <= SPA_VERSION); 42275094Slling ASSERT(intval >= spa_version(spa)); 42285094Slling spa->spa_uberblock.ub_version = intval; 42295094Slling vdev_config_dirty(spa->spa_root_vdev); 42305094Slling } 42315094Slling break; 42325094Slling 42335094Slling case ZPOOL_PROP_ALTROOT: 42345094Slling /* 42355094Slling * 'altroot' is a non-persistent property. It should 42365094Slling * have been set temporarily at creation or import time. 42375094Slling */ 42385094Slling ASSERT(spa->spa_root != NULL); 42395094Slling break; 42405094Slling 42415363Seschrock case ZPOOL_PROP_CACHEFILE: 42425094Slling /* 42438525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 42445094Slling */ 42454543Smarks break; 42465094Slling default: 42475094Slling /* 42485094Slling * Set pool property values in the poolprops mos object. 42495094Slling */ 42505094Slling if (spa->spa_pool_props_object == 0) { 42515094Slling objset_t *mos = spa->spa_meta_objset; 42525094Slling 42535094Slling VERIFY((spa->spa_pool_props_object = 42545094Slling zap_create(mos, DMU_OT_POOL_PROPS, 42555094Slling DMU_OT_NONE, 0, tx)) > 0); 42565094Slling 42575094Slling VERIFY(zap_update(mos, 42585094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 42595094Slling 8, 1, &spa->spa_pool_props_object, tx) 42605094Slling == 0); 42615094Slling } 42625094Slling 42635094Slling /* normalize the property name */ 42645094Slling propname = zpool_prop_to_name(prop); 42655094Slling proptype = zpool_prop_get_type(prop); 42665094Slling 42675094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 42685094Slling ASSERT(proptype == PROP_TYPE_STRING); 42695094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 42705094Slling VERIFY(zap_update(mos, 42715094Slling spa->spa_pool_props_object, propname, 42725094Slling 1, strlen(strval) + 1, strval, tx) == 0); 42735094Slling 42745094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 42755094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 42765094Slling 42775094Slling if (proptype == PROP_TYPE_INDEX) { 42785094Slling const char *unused; 42795094Slling VERIFY(zpool_prop_index_to_string( 42805094Slling prop, intval, &unused) == 0); 42815094Slling } 42825094Slling VERIFY(zap_update(mos, 42835094Slling spa->spa_pool_props_object, propname, 42845094Slling 8, 1, &intval, tx) == 0); 42855094Slling } else { 42865094Slling ASSERT(0); /* not allowed */ 42875094Slling } 42885094Slling 42895329Sgw25295 switch (prop) { 42905329Sgw25295 case ZPOOL_PROP_DELEGATION: 42915094Slling spa->spa_delegation = intval; 42925329Sgw25295 break; 42935329Sgw25295 case ZPOOL_PROP_BOOTFS: 42945094Slling spa->spa_bootfs = intval; 42955329Sgw25295 break; 42965329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 42975329Sgw25295 spa->spa_failmode = intval; 42985329Sgw25295 break; 42999816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 43009816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = intval; 43019816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 43029816SGeorge.Wilson@Sun.COM break; 43035329Sgw25295 default: 43045329Sgw25295 break; 43055329Sgw25295 } 43063912Slling } 43075094Slling 43085094Slling /* log internal history if this is not a zpool create */ 43095094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 43105094Slling tx->tx_txg != TXG_INITIAL) { 43115094Slling spa_history_internal_log(LOG_POOL_PROPSET, 43125094Slling spa, tx, cr, "%s %lld %s", 43137754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 43145094Slling } 43153912Slling } 43167754SJeff.Bonwick@Sun.COM 43177754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 43183912Slling } 43193912Slling 4320789Sahrens /* 4321789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4322789Sahrens * part of the process, so we iterate until it converges. 4323789Sahrens */ 4324789Sahrens void 4325789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4326789Sahrens { 4327789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4328789Sahrens objset_t *mos = spa->spa_meta_objset; 4329789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 43301635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4331789Sahrens vdev_t *vd; 4332789Sahrens dmu_tx_t *tx; 4333789Sahrens int dirty_vdevs; 43347754SJeff.Bonwick@Sun.COM int error; 4335789Sahrens 4336789Sahrens /* 4337789Sahrens * Lock out configuration changes. 4338789Sahrens */ 43397754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4340789Sahrens 4341789Sahrens spa->spa_syncing_txg = txg; 4342789Sahrens spa->spa_sync_pass = 0; 4343789Sahrens 43447754SJeff.Bonwick@Sun.COM /* 43457754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 43467754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 43477754SJeff.Bonwick@Sun.COM */ 43487754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 43498241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 43508241SJeff.Bonwick@Sun.COM /* 43518241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 43528241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 43538241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 43548241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 43558241SJeff.Bonwick@Sun.COM * into the root vdev tree. 43568241SJeff.Bonwick@Sun.COM */ 43578241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43588241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 43598241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 43608241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 43618241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 43628241SJeff.Bonwick@Sun.COM } 43638241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43648241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 43657754SJeff.Bonwick@Sun.COM } 43667754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 43677754SJeff.Bonwick@Sun.COM 43681544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4369789Sahrens 43702082Seschrock tx = dmu_tx_create_assigned(dp, txg); 43712082Seschrock 43722082Seschrock /* 43734577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 43742082Seschrock * set spa_deflate if we have no raid-z vdevs. 43752082Seschrock */ 43764577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 43774577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 43782082Seschrock int i; 43792082Seschrock 43802082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 43812082Seschrock vd = rvd->vdev_child[i]; 43822082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 43832082Seschrock break; 43842082Seschrock } 43852082Seschrock if (i == rvd->vdev_children) { 43862082Seschrock spa->spa_deflate = TRUE; 43872082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 43882082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 43892082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 43902082Seschrock } 43912082Seschrock } 43922082Seschrock 43937046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 43947046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 43957046Sahrens dsl_pool_create_origin(dp, tx); 43967046Sahrens 43977046Sahrens /* Keeping the origin open increases spa_minref */ 43987046Sahrens spa->spa_minref += 3; 43997046Sahrens } 44007046Sahrens 44017046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 44027046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 44037046Sahrens dsl_pool_upgrade_clones(dp, tx); 44047046Sahrens } 44057046Sahrens 4406789Sahrens /* 4407789Sahrens * If anything has changed in this txg, push the deferred frees 4408789Sahrens * from the previous txg. If not, leave them alone so that we 4409789Sahrens * don't generate work on an otherwise idle system. 4410789Sahrens */ 4411789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 44122329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 44132329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4414789Sahrens spa_sync_deferred_frees(spa, txg); 4415789Sahrens 4416789Sahrens /* 4417789Sahrens * Iterate to convergence. 4418789Sahrens */ 4419789Sahrens do { 4420789Sahrens spa->spa_sync_pass++; 4421789Sahrens 4422789Sahrens spa_sync_config_object(spa, tx); 44235450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 44245450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 44255450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 44265450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 44271544Seschrock spa_errlog_sync(spa, txg); 4428789Sahrens dsl_pool_sync(dp, txg); 4429789Sahrens 4430789Sahrens dirty_vdevs = 0; 4431789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4432789Sahrens vdev_sync(vd, txg); 4433789Sahrens dirty_vdevs++; 4434789Sahrens } 4435789Sahrens 4436789Sahrens bplist_sync(bpl, tx); 4437789Sahrens } while (dirty_vdevs); 4438789Sahrens 4439789Sahrens bplist_close(bpl); 4440789Sahrens 4441789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4442789Sahrens 4443789Sahrens /* 4444789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4445789Sahrens * to commit the transaction group. 44461635Sbonwick * 44475688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 44485688Sbonwick * random top-level vdevs that are known to be visible in the 44497754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 44507754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4451789Sahrens */ 44527754SJeff.Bonwick@Sun.COM for (;;) { 44537754SJeff.Bonwick@Sun.COM /* 44547754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 44557754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 44567754SJeff.Bonwick@Sun.COM */ 44577754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 44587754SJeff.Bonwick@Sun.COM 44597754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 44607754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 44617754SJeff.Bonwick@Sun.COM int svdcount = 0; 44627754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 44637754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 44649816SGeorge.Wilson@Sun.COM 44659816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 44667754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 44677754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 44687754SJeff.Bonwick@Sun.COM continue; 44697754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 44707754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 44717754SJeff.Bonwick@Sun.COM break; 44727754SJeff.Bonwick@Sun.COM } 44739725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 44749725SEric.Schrock@Sun.COM if (error != 0) 44759725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 44769725SEric.Schrock@Sun.COM B_TRUE); 44777754SJeff.Bonwick@Sun.COM } else { 44787754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 44799725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 44809725SEric.Schrock@Sun.COM if (error != 0) 44819725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 44829725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 44831635Sbonwick } 44847754SJeff.Bonwick@Sun.COM 44857754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 44867754SJeff.Bonwick@Sun.COM 44877754SJeff.Bonwick@Sun.COM if (error == 0) 44887754SJeff.Bonwick@Sun.COM break; 44897754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 44907754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 44911635Sbonwick } 44922082Seschrock dmu_tx_commit(tx); 44932082Seschrock 44941635Sbonwick /* 44951635Sbonwick * Clear the dirty config list. 44961635Sbonwick */ 44977754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 44981635Sbonwick vdev_config_clean(vd); 44991635Sbonwick 45001635Sbonwick /* 45011635Sbonwick * Now that the new config has synced transactionally, 45021635Sbonwick * let it become visible to the config cache. 45031635Sbonwick */ 45041635Sbonwick if (spa->spa_config_syncing != NULL) { 45051635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 45061635Sbonwick spa->spa_config_txg = txg; 45071635Sbonwick spa->spa_config_syncing = NULL; 45081635Sbonwick } 4509789Sahrens 4510789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4511789Sahrens 4512789Sahrens /* 4513789Sahrens * Clean up the ZIL records for the synced txg. 4514789Sahrens */ 4515789Sahrens dsl_pool_zil_clean(dp); 4516789Sahrens 4517789Sahrens /* 4518789Sahrens * Update usable space statistics. 4519789Sahrens */ 4520789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4521789Sahrens vdev_sync_done(vd, txg); 4522789Sahrens 4523789Sahrens /* 4524789Sahrens * It had better be the case that we didn't dirty anything 45252082Seschrock * since vdev_config_sync(). 4526789Sahrens */ 4527789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4528789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4529789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4530789Sahrens ASSERT(bpl->bpl_queue == NULL); 4531789Sahrens 45327754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 45331544Seschrock 45341544Seschrock /* 45351544Seschrock * If any async tasks have been requested, kick them off. 45361544Seschrock */ 45371544Seschrock spa_async_dispatch(spa); 4538789Sahrens } 4539789Sahrens 4540789Sahrens /* 4541789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4542789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4543789Sahrens * sync. 4544789Sahrens */ 4545789Sahrens void 4546789Sahrens spa_sync_allpools(void) 4547789Sahrens { 4548789Sahrens spa_t *spa = NULL; 4549789Sahrens mutex_enter(&spa_namespace_lock); 4550789Sahrens while ((spa = spa_next(spa)) != NULL) { 45517754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4552789Sahrens continue; 4553789Sahrens spa_open_ref(spa, FTAG); 4554789Sahrens mutex_exit(&spa_namespace_lock); 4555789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4556789Sahrens mutex_enter(&spa_namespace_lock); 4557789Sahrens spa_close(spa, FTAG); 4558789Sahrens } 4559789Sahrens mutex_exit(&spa_namespace_lock); 4560789Sahrens } 4561789Sahrens 4562789Sahrens /* 4563789Sahrens * ========================================================================== 4564789Sahrens * Miscellaneous routines 4565789Sahrens * ========================================================================== 4566789Sahrens */ 4567789Sahrens 4568789Sahrens /* 4569789Sahrens * Remove all pools in the system. 4570789Sahrens */ 4571789Sahrens void 4572789Sahrens spa_evict_all(void) 4573789Sahrens { 4574789Sahrens spa_t *spa; 4575789Sahrens 4576789Sahrens /* 4577789Sahrens * Remove all cached state. All pools should be closed now, 4578789Sahrens * so every spa in the AVL tree should be unreferenced. 4579789Sahrens */ 4580789Sahrens mutex_enter(&spa_namespace_lock); 4581789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4582789Sahrens /* 45831544Seschrock * Stop async tasks. The async thread may need to detach 45841544Seschrock * a device that's been replaced, which requires grabbing 45851544Seschrock * spa_namespace_lock, so we must drop it here. 4586789Sahrens */ 4587789Sahrens spa_open_ref(spa, FTAG); 4588789Sahrens mutex_exit(&spa_namespace_lock); 45891544Seschrock spa_async_suspend(spa); 45904808Sek110237 mutex_enter(&spa_namespace_lock); 4591789Sahrens spa_close(spa, FTAG); 4592789Sahrens 4593789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4594789Sahrens spa_unload(spa); 4595789Sahrens spa_deactivate(spa); 4596789Sahrens } 4597789Sahrens spa_remove(spa); 4598789Sahrens } 4599789Sahrens mutex_exit(&spa_namespace_lock); 4600789Sahrens } 46011544Seschrock 46021544Seschrock vdev_t * 46039425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 46041544Seschrock { 46056643Seschrock vdev_t *vd; 46066643Seschrock int i; 46076643Seschrock 46086643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 46096643Seschrock return (vd); 46106643Seschrock 46119425SEric.Schrock@Sun.COM if (aux) { 46126643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 46136643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 46146643Seschrock if (vd->vdev_guid == guid) 46156643Seschrock return (vd); 46166643Seschrock } 46179425SEric.Schrock@Sun.COM 46189425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 46199425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 46209425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 46219425SEric.Schrock@Sun.COM return (vd); 46229425SEric.Schrock@Sun.COM } 46236643Seschrock } 46246643Seschrock 46256643Seschrock return (NULL); 46261544Seschrock } 46271760Seschrock 46281760Seschrock void 46295094Slling spa_upgrade(spa_t *spa, uint64_t version) 46301760Seschrock { 46317754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 46321760Seschrock 46331760Seschrock /* 46341760Seschrock * This should only be called for a non-faulted pool, and since a 46351760Seschrock * future version would result in an unopenable pool, this shouldn't be 46361760Seschrock * possible. 46371760Seschrock */ 46384577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 46395094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 46405094Slling 46415094Slling spa->spa_uberblock.ub_version = version; 46421760Seschrock vdev_config_dirty(spa->spa_root_vdev); 46431760Seschrock 46447754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 46452082Seschrock 46462082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 46471760Seschrock } 46482082Seschrock 46492082Seschrock boolean_t 46502082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 46512082Seschrock { 46522082Seschrock int i; 46533377Seschrock uint64_t spareguid; 46545450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 46555450Sbrendan 46565450Sbrendan for (i = 0; i < sav->sav_count; i++) 46575450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 46582082Seschrock return (B_TRUE); 46592082Seschrock 46605450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 46615450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 46625450Sbrendan &spareguid) == 0 && spareguid == guid) 46633377Seschrock return (B_TRUE); 46643377Seschrock } 46653377Seschrock 46662082Seschrock return (B_FALSE); 46672082Seschrock } 46683912Slling 46694451Seschrock /* 46707214Slling * Check if a pool has an active shared spare device. 46717214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 46727214Slling */ 46737214Slling static boolean_t 46747214Slling spa_has_active_shared_spare(spa_t *spa) 46757214Slling { 46767214Slling int i, refcnt; 46777214Slling uint64_t pool; 46787214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 46797214Slling 46807214Slling for (i = 0; i < sav->sav_count; i++) { 46817214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 46827214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 46837214Slling refcnt > 2) 46847214Slling return (B_TRUE); 46857214Slling } 46867214Slling 46877214Slling return (B_FALSE); 46887214Slling } 46897214Slling 46907214Slling /* 46914451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 46924451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 46934451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 46944451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 46954451Seschrock * or zdb as real changes. 46964451Seschrock */ 46974451Seschrock void 46984451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 46994451Seschrock { 47004451Seschrock #ifdef _KERNEL 47014451Seschrock sysevent_t *ev; 47024451Seschrock sysevent_attr_list_t *attr = NULL; 47034451Seschrock sysevent_value_t value; 47044451Seschrock sysevent_id_t eid; 47054451Seschrock 47064451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 47074451Seschrock SE_SLEEP); 47084451Seschrock 47094451Seschrock value.value_type = SE_DATA_TYPE_STRING; 47104451Seschrock value.value.sv_string = spa_name(spa); 47114451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 47124451Seschrock goto done; 47134451Seschrock 47144451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 47154451Seschrock value.value.sv_uint64 = spa_guid(spa); 47164451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 47174451Seschrock goto done; 47184451Seschrock 47194451Seschrock if (vd) { 47204451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 47214451Seschrock value.value.sv_uint64 = vd->vdev_guid; 47224451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 47234451Seschrock SE_SLEEP) != 0) 47244451Seschrock goto done; 47254451Seschrock 47264451Seschrock if (vd->vdev_path) { 47274451Seschrock value.value_type = SE_DATA_TYPE_STRING; 47284451Seschrock value.value.sv_string = vd->vdev_path; 47294451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 47304451Seschrock &value, SE_SLEEP) != 0) 47314451Seschrock goto done; 47324451Seschrock } 47334451Seschrock } 47344451Seschrock 47355756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 47365756Seschrock goto done; 47375756Seschrock attr = NULL; 47385756Seschrock 47394451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 47404451Seschrock 47414451Seschrock done: 47424451Seschrock if (attr) 47434451Seschrock sysevent_free_attr(attr); 47444451Seschrock sysevent_free(ev); 47454451Seschrock #endif 47464451Seschrock } 4747