1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/zio_compress.h> 39789Sahrens #include <sys/dmu.h> 40789Sahrens #include <sys/dmu_tx.h> 41789Sahrens #include <sys/zap.h> 42789Sahrens #include <sys/zil.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 45*10594SGeorge.Wilson@Sun.COM #include <sys/metaslab_impl.h> 46789Sahrens #include <sys/uberblock_impl.h> 47789Sahrens #include <sys/txg.h> 48789Sahrens #include <sys/avl.h> 49789Sahrens #include <sys/dmu_traverse.h> 503912Slling #include <sys/dmu_objset.h> 51789Sahrens #include <sys/unique.h> 52789Sahrens #include <sys/dsl_pool.h> 533912Slling #include <sys/dsl_dataset.h> 54789Sahrens #include <sys/dsl_dir.h> 55789Sahrens #include <sys/dsl_prop.h> 563912Slling #include <sys/dsl_synctask.h> 57789Sahrens #include <sys/fs/zfs.h> 585450Sbrendan #include <sys/arc.h> 59789Sahrens #include <sys/callb.h> 603975Sek110237 #include <sys/systeminfo.h> 613975Sek110237 #include <sys/sunddi.h> 626423Sgw25295 #include <sys/spa_boot.h> 639816SGeorge.Wilson@Sun.COM #include <sys/zfs_ioctl.h> 64789Sahrens 658662SJordan.Vaughan@Sun.com #ifdef _KERNEL 668662SJordan.Vaughan@Sun.com #include <sys/zone.h> 678662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 688662SJordan.Vaughan@Sun.com 695094Slling #include "zfs_prop.h" 705913Sperrin #include "zfs_comutil.h" 715094Slling 729515SJonathan.Adams@Sun.COM enum zti_modes { 739515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 749515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 759515SJonathan.Adams@Sun.COM zti_mode_tune, /* fill from zio_taskq_tune_* */ 769515SJonathan.Adams@Sun.COM zti_nmodes 777754SJeff.Bonwick@Sun.COM }; 782986Sek110237 799515SJonathan.Adams@Sun.COM #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 809515SJonathan.Adams@Sun.COM #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 819515SJonathan.Adams@Sun.COM #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 829515SJonathan.Adams@Sun.COM 839515SJonathan.Adams@Sun.COM #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 849515SJonathan.Adams@Sun.COM 859515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 869515SJonathan.Adams@Sun.COM const char *zti_name; 879515SJonathan.Adams@Sun.COM struct { 889515SJonathan.Adams@Sun.COM enum zti_modes zti_mode; 899515SJonathan.Adams@Sun.COM uint_t zti_value; 909515SJonathan.Adams@Sun.COM } zti_nthreads[ZIO_TASKQ_TYPES]; 919515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 929515SJonathan.Adams@Sun.COM 939515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 949515SJonathan.Adams@Sun.COM "issue", "intr" 959515SJonathan.Adams@Sun.COM }; 969515SJonathan.Adams@Sun.COM 979515SJonathan.Adams@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 989515SJonathan.Adams@Sun.COM /* ISSUE INTR */ 999515SJonathan.Adams@Sun.COM { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1009515SJonathan.Adams@Sun.COM { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1019515SJonathan.Adams@Sun.COM { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1029515SJonathan.Adams@Sun.COM { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1039515SJonathan.Adams@Sun.COM { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1049515SJonathan.Adams@Sun.COM { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1059515SJonathan.Adams@Sun.COM }; 1069515SJonathan.Adams@Sun.COM 1079515SJonathan.Adams@Sun.COM enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1089515SJonathan.Adams@Sun.COM uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1099515SJonathan.Adams@Sun.COM 1105094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1117214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1125094Slling 1135094Slling /* 1145094Slling * ========================================================================== 1155094Slling * SPA properties routines 1165094Slling * ========================================================================== 1175094Slling */ 1185094Slling 1195094Slling /* 1205094Slling * Add a (source=src, propname=propval) list to an nvlist. 1215094Slling */ 1225949Slling static void 1235094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1245094Slling uint64_t intval, zprop_source_t src) 1255094Slling { 1265094Slling const char *propname = zpool_prop_to_name(prop); 1275094Slling nvlist_t *propval; 1285949Slling 1295949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1305949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1315949Slling 1325949Slling if (strval != NULL) 1335949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1345949Slling else 1355949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1365949Slling 1375949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1385094Slling nvlist_free(propval); 1395094Slling } 1405094Slling 1415094Slling /* 1425094Slling * Get property values from the spa configuration. 1435094Slling */ 1445949Slling static void 1455094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1465094Slling { 1478525SEric.Schrock@Sun.COM uint64_t size; 1488525SEric.Schrock@Sun.COM uint64_t used; 1495094Slling uint64_t cap, version; 1505094Slling zprop_source_t src = ZPROP_SRC_NONE; 1516643Seschrock spa_config_dirent_t *dp; 1525094Slling 1537754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1547754SJeff.Bonwick@Sun.COM 1558525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 1568525SEric.Schrock@Sun.COM size = spa_get_space(spa); 1578525SEric.Schrock@Sun.COM used = spa_get_alloc(spa); 1588525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1598525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 1608525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 1618525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 1628525SEric.Schrock@Sun.COM size - used, src); 1638525SEric.Schrock@Sun.COM 1648525SEric.Schrock@Sun.COM cap = (size == 0) ? 0 : (used * 100 / size); 1658525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1668525SEric.Schrock@Sun.COM 1678525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1688525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1698525SEric.Schrock@Sun.COM 1708525SEric.Schrock@Sun.COM version = spa_version(spa); 1718525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1728525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1738525SEric.Schrock@Sun.COM else 1748525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1758525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1768525SEric.Schrock@Sun.COM } 1775949Slling 1785949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1795949Slling 1805949Slling if (spa->spa_root != NULL) 1815949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1825949Slling 0, ZPROP_SRC_LOCAL); 1835094Slling 1846643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 1856643Seschrock if (dp->scd_path == NULL) { 1865949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1876643Seschrock "none", 0, ZPROP_SRC_LOCAL); 1886643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1895949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1906643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1915363Seschrock } 1925363Seschrock } 1935094Slling } 1945094Slling 1955094Slling /* 1965094Slling * Get zpool property values. 1975094Slling */ 1985094Slling int 1995094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 2005094Slling { 2015094Slling zap_cursor_t zc; 2025094Slling zap_attribute_t za; 2035094Slling objset_t *mos = spa->spa_meta_objset; 2045094Slling int err; 2055094Slling 2065949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2075094Slling 2087754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2097754SJeff.Bonwick@Sun.COM 2105094Slling /* 2115094Slling * Get properties from the spa config. 2125094Slling */ 2135949Slling spa_prop_get_config(spa, nvp); 2145094Slling 2155094Slling /* If no pool property object, no more prop to get. */ 2165094Slling if (spa->spa_pool_props_object == 0) { 2175094Slling mutex_exit(&spa->spa_props_lock); 2185094Slling return (0); 2195094Slling } 2205094Slling 2215094Slling /* 2225094Slling * Get properties from the MOS pool property object. 2235094Slling */ 2245094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2255094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2265094Slling zap_cursor_advance(&zc)) { 2275094Slling uint64_t intval = 0; 2285094Slling char *strval = NULL; 2295094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2305094Slling zpool_prop_t prop; 2315094Slling 2325094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2335094Slling continue; 2345094Slling 2355094Slling switch (za.za_integer_length) { 2365094Slling case 8: 2375094Slling /* integer property */ 2385094Slling if (za.za_first_integer != 2395094Slling zpool_prop_default_numeric(prop)) 2405094Slling src = ZPROP_SRC_LOCAL; 2415094Slling 2425094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2435094Slling dsl_pool_t *dp; 2445094Slling dsl_dataset_t *ds = NULL; 2455094Slling 2465094Slling dp = spa_get_dsl(spa); 2475094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2486689Smaybee if (err = dsl_dataset_hold_obj(dp, 2496689Smaybee za.za_first_integer, FTAG, &ds)) { 2505094Slling rw_exit(&dp->dp_config_rwlock); 2515094Slling break; 2525094Slling } 2535094Slling 2545094Slling strval = kmem_alloc( 2555094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2565094Slling KM_SLEEP); 2575094Slling dsl_dataset_name(ds, strval); 2586689Smaybee dsl_dataset_rele(ds, FTAG); 2595094Slling rw_exit(&dp->dp_config_rwlock); 2605094Slling } else { 2615094Slling strval = NULL; 2625094Slling intval = za.za_first_integer; 2635094Slling } 2645094Slling 2655949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2665094Slling 2675094Slling if (strval != NULL) 2685094Slling kmem_free(strval, 2695094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2705094Slling 2715094Slling break; 2725094Slling 2735094Slling case 1: 2745094Slling /* string property */ 2755094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2765094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2775094Slling za.za_name, 1, za.za_num_integers, strval); 2785094Slling if (err) { 2795094Slling kmem_free(strval, za.za_num_integers); 2805094Slling break; 2815094Slling } 2825949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2835094Slling kmem_free(strval, za.za_num_integers); 2845094Slling break; 2855094Slling 2865094Slling default: 2875094Slling break; 2885094Slling } 2895094Slling } 2905094Slling zap_cursor_fini(&zc); 2915094Slling mutex_exit(&spa->spa_props_lock); 2925094Slling out: 2935094Slling if (err && err != ENOENT) { 2945094Slling nvlist_free(*nvp); 2955949Slling *nvp = NULL; 2965094Slling return (err); 2975094Slling } 2985094Slling 2995094Slling return (0); 3005094Slling } 3015094Slling 3025094Slling /* 3035094Slling * Validate the given pool properties nvlist and modify the list 3045094Slling * for the property values to be set. 3055094Slling */ 3065094Slling static int 3075094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3085094Slling { 3095094Slling nvpair_t *elem; 3105094Slling int error = 0, reset_bootfs = 0; 3115094Slling uint64_t objnum; 3125094Slling 3135094Slling elem = NULL; 3145094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3155094Slling zpool_prop_t prop; 3165094Slling char *propname, *strval; 3175094Slling uint64_t intval; 3185094Slling objset_t *os; 3195363Seschrock char *slash; 3205094Slling 3215094Slling propname = nvpair_name(elem); 3225094Slling 3235094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3245094Slling return (EINVAL); 3255094Slling 3265094Slling switch (prop) { 3275094Slling case ZPOOL_PROP_VERSION: 3285094Slling error = nvpair_value_uint64(elem, &intval); 3295094Slling if (!error && 3305094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3315094Slling error = EINVAL; 3325094Slling break; 3335094Slling 3345094Slling case ZPOOL_PROP_DELEGATION: 3355094Slling case ZPOOL_PROP_AUTOREPLACE: 3367538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3379816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 3385094Slling error = nvpair_value_uint64(elem, &intval); 3395094Slling if (!error && intval > 1) 3405094Slling error = EINVAL; 3415094Slling break; 3425094Slling 3435094Slling case ZPOOL_PROP_BOOTFS: 3449630SJeff.Bonwick@Sun.COM /* 3459630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3469630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3479630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3489630SJeff.Bonwick@Sun.COM */ 3495094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3505094Slling error = ENOTSUP; 3515094Slling break; 3525094Slling } 3535094Slling 3545094Slling /* 3557042Sgw25295 * Make sure the vdev config is bootable 3565094Slling */ 3577042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3585094Slling error = ENOTSUP; 3595094Slling break; 3605094Slling } 3615094Slling 3625094Slling reset_bootfs = 1; 3635094Slling 3645094Slling error = nvpair_value_string(elem, &strval); 3655094Slling 3665094Slling if (!error) { 3677042Sgw25295 uint64_t compress; 3687042Sgw25295 3695094Slling if (strval == NULL || strval[0] == '\0') { 3705094Slling objnum = zpool_prop_default_numeric( 3715094Slling ZPOOL_PROP_BOOTFS); 3725094Slling break; 3735094Slling } 3745094Slling 37510298SMatthew.Ahrens@Sun.COM if (error = dmu_objset_hold(strval, FTAG, &os)) 3765094Slling break; 3777042Sgw25295 37810298SMatthew.Ahrens@Sun.COM /* Must be ZPL and not gzip compressed. */ 37910298SMatthew.Ahrens@Sun.COM 38010298SMatthew.Ahrens@Sun.COM if (dmu_objset_type(os) != DMU_OST_ZFS) { 38110298SMatthew.Ahrens@Sun.COM error = ENOTSUP; 38210298SMatthew.Ahrens@Sun.COM } else if ((error = dsl_prop_get_integer(strval, 3837042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 3847042Sgw25295 &compress, NULL)) == 0 && 3857042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 3867042Sgw25295 error = ENOTSUP; 3877042Sgw25295 } else { 3887042Sgw25295 objnum = dmu_objset_id(os); 3897042Sgw25295 } 39010298SMatthew.Ahrens@Sun.COM dmu_objset_rele(os, FTAG); 3915094Slling } 3925094Slling break; 3937754SJeff.Bonwick@Sun.COM 3945329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3955329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3965329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3975329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3985329Sgw25295 error = EINVAL; 3995329Sgw25295 4005329Sgw25295 /* 4015329Sgw25295 * This is a special case which only occurs when 4025329Sgw25295 * the pool has completely failed. This allows 4035329Sgw25295 * the user to change the in-core failmode property 4045329Sgw25295 * without syncing it out to disk (I/Os might 4055329Sgw25295 * currently be blocked). We do this by returning 4065329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4075329Sgw25295 * into thinking we encountered a property validation 4085329Sgw25295 * error. 4095329Sgw25295 */ 4107754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4115329Sgw25295 spa->spa_failmode = intval; 4125329Sgw25295 error = EIO; 4135329Sgw25295 } 4145329Sgw25295 break; 4155363Seschrock 4165363Seschrock case ZPOOL_PROP_CACHEFILE: 4175363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4185363Seschrock break; 4195363Seschrock 4205363Seschrock if (strval[0] == '\0') 4215363Seschrock break; 4225363Seschrock 4235363Seschrock if (strcmp(strval, "none") == 0) 4245363Seschrock break; 4255363Seschrock 4265363Seschrock if (strval[0] != '/') { 4275363Seschrock error = EINVAL; 4285363Seschrock break; 4295363Seschrock } 4305363Seschrock 4315363Seschrock slash = strrchr(strval, '/'); 4325363Seschrock ASSERT(slash != NULL); 4335363Seschrock 4345363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4355363Seschrock strcmp(slash, "/..") == 0) 4365363Seschrock error = EINVAL; 4375363Seschrock break; 4385094Slling } 4395094Slling 4405094Slling if (error) 4415094Slling break; 4425094Slling } 4435094Slling 4445094Slling if (!error && reset_bootfs) { 4455094Slling error = nvlist_remove(props, 4465094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4475094Slling 4485094Slling if (!error) { 4495094Slling error = nvlist_add_uint64(props, 4505094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4515094Slling } 4525094Slling } 4535094Slling 4545094Slling return (error); 4555094Slling } 4565094Slling 4578525SEric.Schrock@Sun.COM void 4588525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4598525SEric.Schrock@Sun.COM { 4608525SEric.Schrock@Sun.COM char *cachefile; 4618525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4628525SEric.Schrock@Sun.COM 4638525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4648525SEric.Schrock@Sun.COM &cachefile) != 0) 4658525SEric.Schrock@Sun.COM return; 4668525SEric.Schrock@Sun.COM 4678525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4688525SEric.Schrock@Sun.COM KM_SLEEP); 4698525SEric.Schrock@Sun.COM 4708525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 4718525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 4728525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 4738525SEric.Schrock@Sun.COM dp->scd_path = NULL; 4748525SEric.Schrock@Sun.COM else 4758525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 4768525SEric.Schrock@Sun.COM 4778525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 4788525SEric.Schrock@Sun.COM if (need_sync) 4798525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 4808525SEric.Schrock@Sun.COM } 4818525SEric.Schrock@Sun.COM 4825094Slling int 4835094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4845094Slling { 4855094Slling int error; 4868525SEric.Schrock@Sun.COM nvpair_t *elem; 4878525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 4888525SEric.Schrock@Sun.COM zpool_prop_t prop; 4895094Slling 4905094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4915094Slling return (error); 4925094Slling 4938525SEric.Schrock@Sun.COM elem = NULL; 4948525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 4958525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 4968525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 4978525SEric.Schrock@Sun.COM return (EINVAL); 4988525SEric.Schrock@Sun.COM 4998525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 5008525SEric.Schrock@Sun.COM continue; 5018525SEric.Schrock@Sun.COM 5028525SEric.Schrock@Sun.COM need_sync = B_TRUE; 5038525SEric.Schrock@Sun.COM break; 5048525SEric.Schrock@Sun.COM } 5058525SEric.Schrock@Sun.COM 5068525SEric.Schrock@Sun.COM if (need_sync) 5078525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5088525SEric.Schrock@Sun.COM spa, nvp, 3)); 5098525SEric.Schrock@Sun.COM else 5108525SEric.Schrock@Sun.COM return (0); 5115094Slling } 5125094Slling 5135094Slling /* 5145094Slling * If the bootfs property value is dsobj, clear it. 5155094Slling */ 5165094Slling void 5175094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5185094Slling { 5195094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5205094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5215094Slling spa->spa_pool_props_object, 5225094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5235094Slling spa->spa_bootfs = 0; 5245094Slling } 5255094Slling } 5265094Slling 527789Sahrens /* 528789Sahrens * ========================================================================== 529789Sahrens * SPA state manipulation (open/create/destroy/import/export) 530789Sahrens * ========================================================================== 531789Sahrens */ 532789Sahrens 5331544Seschrock static int 5341544Seschrock spa_error_entry_compare(const void *a, const void *b) 5351544Seschrock { 5361544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5371544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5381544Seschrock int ret; 5391544Seschrock 5401544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5411544Seschrock sizeof (zbookmark_t)); 5421544Seschrock 5431544Seschrock if (ret < 0) 5441544Seschrock return (-1); 5451544Seschrock else if (ret > 0) 5461544Seschrock return (1); 5471544Seschrock else 5481544Seschrock return (0); 5491544Seschrock } 5501544Seschrock 5511544Seschrock /* 5521544Seschrock * Utility function which retrieves copies of the current logs and 5531544Seschrock * re-initializes them in the process. 5541544Seschrock */ 5551544Seschrock void 5561544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5571544Seschrock { 5581544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5591544Seschrock 5601544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5611544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5621544Seschrock 5631544Seschrock avl_create(&spa->spa_errlist_scrub, 5641544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5651544Seschrock offsetof(spa_error_entry_t, se_avl)); 5661544Seschrock avl_create(&spa->spa_errlist_last, 5671544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5681544Seschrock offsetof(spa_error_entry_t, se_avl)); 5691544Seschrock } 5701544Seschrock 571789Sahrens /* 572789Sahrens * Activate an uninitialized pool. 573789Sahrens */ 574789Sahrens static void 5758241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 576789Sahrens { 577789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 578789Sahrens 579789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5808241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 581789Sahrens 582*10594SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 583*10594SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 584789Sahrens 5857754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 5869515SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t]; 5877754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 5889515SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 5899515SJonathan.Adams@Sun.COM uint_t value = ztip->zti_nthreads[q].zti_value; 5909515SJonathan.Adams@Sun.COM char name[32]; 5919515SJonathan.Adams@Sun.COM 5929515SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 5939515SJonathan.Adams@Sun.COM "%s_%s", ztip->zti_name, zio_taskq_types[q]); 5949515SJonathan.Adams@Sun.COM 5959515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) { 5969515SJonathan.Adams@Sun.COM mode = zio_taskq_tune_mode; 5979515SJonathan.Adams@Sun.COM value = zio_taskq_tune_value; 5989515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) 5999515SJonathan.Adams@Sun.COM mode = zti_mode_online_percent; 6009515SJonathan.Adams@Sun.COM } 6019515SJonathan.Adams@Sun.COM 6029515SJonathan.Adams@Sun.COM switch (mode) { 6039515SJonathan.Adams@Sun.COM case zti_mode_fixed: 6049515SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 6059515SJonathan.Adams@Sun.COM value = MAX(value, 1); 6069515SJonathan.Adams@Sun.COM 6079515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6089515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6099515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE); 6109515SJonathan.Adams@Sun.COM break; 6119515SJonathan.Adams@Sun.COM 6129515SJonathan.Adams@Sun.COM case zti_mode_online_percent: 6139515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6149515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6159515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6169515SJonathan.Adams@Sun.COM break; 6179515SJonathan.Adams@Sun.COM 6189515SJonathan.Adams@Sun.COM case zti_mode_tune: 6199515SJonathan.Adams@Sun.COM default: 6209515SJonathan.Adams@Sun.COM panic("unrecognized mode for " 6219515SJonathan.Adams@Sun.COM "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6229515SJonathan.Adams@Sun.COM "in spa_activate()", 6239515SJonathan.Adams@Sun.COM t, q, mode, value); 6249515SJonathan.Adams@Sun.COM break; 6259515SJonathan.Adams@Sun.COM } 6267754SJeff.Bonwick@Sun.COM } 627789Sahrens } 628789Sahrens 6297754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 6307754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 6317754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 6327754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 633789Sahrens 634789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 635789Sahrens offsetof(struct vdev, vdev_txg_node)); 6361544Seschrock 6371544Seschrock avl_create(&spa->spa_errlist_scrub, 6381544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6391544Seschrock offsetof(spa_error_entry_t, se_avl)); 6401544Seschrock avl_create(&spa->spa_errlist_last, 6411544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6421544Seschrock offsetof(spa_error_entry_t, se_avl)); 643789Sahrens } 644789Sahrens 645789Sahrens /* 646789Sahrens * Opposite of spa_activate(). 647789Sahrens */ 648789Sahrens static void 649789Sahrens spa_deactivate(spa_t *spa) 650789Sahrens { 651789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 652789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 653789Sahrens ASSERT(spa->spa_root_vdev == NULL); 6549630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 655789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 656789Sahrens 657789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 658789Sahrens 6597754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 6607754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 6617754SJeff.Bonwick@Sun.COM 6627754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 6637754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6647754SJeff.Bonwick@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 6657754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 6667754SJeff.Bonwick@Sun.COM } 667789Sahrens } 668789Sahrens 669789Sahrens metaslab_class_destroy(spa->spa_normal_class); 670789Sahrens spa->spa_normal_class = NULL; 671789Sahrens 6724527Sperrin metaslab_class_destroy(spa->spa_log_class); 6734527Sperrin spa->spa_log_class = NULL; 6744527Sperrin 6751544Seschrock /* 6761544Seschrock * If this was part of an import or the open otherwise failed, we may 6771544Seschrock * still have errors left in the queues. Empty them just in case. 6781544Seschrock */ 6791544Seschrock spa_errlog_drain(spa); 6801544Seschrock 6811544Seschrock avl_destroy(&spa->spa_errlist_scrub); 6821544Seschrock avl_destroy(&spa->spa_errlist_last); 6831544Seschrock 684789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 685789Sahrens } 686789Sahrens 687789Sahrens /* 688789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 689789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 690789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 691789Sahrens * All vdev validation is done by the vdev_alloc() routine. 692789Sahrens */ 6932082Seschrock static int 6942082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 6952082Seschrock uint_t id, int atype) 696789Sahrens { 697789Sahrens nvlist_t **child; 6989816SGeorge.Wilson@Sun.COM uint_t children; 6992082Seschrock int error; 7002082Seschrock 7012082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 7022082Seschrock return (error); 7032082Seschrock 7042082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 7052082Seschrock return (0); 706789Sahrens 7077754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 7087754SJeff.Bonwick@Sun.COM &child, &children); 7097754SJeff.Bonwick@Sun.COM 7107754SJeff.Bonwick@Sun.COM if (error == ENOENT) 7117754SJeff.Bonwick@Sun.COM return (0); 7127754SJeff.Bonwick@Sun.COM 7137754SJeff.Bonwick@Sun.COM if (error) { 7142082Seschrock vdev_free(*vdp); 7152082Seschrock *vdp = NULL; 7162082Seschrock return (EINVAL); 717789Sahrens } 718789Sahrens 7199816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 7202082Seschrock vdev_t *vd; 7212082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 7222082Seschrock atype)) != 0) { 7232082Seschrock vdev_free(*vdp); 7242082Seschrock *vdp = NULL; 7252082Seschrock return (error); 726789Sahrens } 727789Sahrens } 728789Sahrens 7292082Seschrock ASSERT(*vdp != NULL); 7302082Seschrock 7312082Seschrock return (0); 732789Sahrens } 733789Sahrens 734789Sahrens /* 735789Sahrens * Opposite of spa_load(). 736789Sahrens */ 737789Sahrens static void 738789Sahrens spa_unload(spa_t *spa) 739789Sahrens { 7402082Seschrock int i; 7412082Seschrock 7427754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 7437754SJeff.Bonwick@Sun.COM 744789Sahrens /* 7451544Seschrock * Stop async tasks. 7461544Seschrock */ 7471544Seschrock spa_async_suspend(spa); 7481544Seschrock 7491544Seschrock /* 750789Sahrens * Stop syncing. 751789Sahrens */ 752789Sahrens if (spa->spa_sync_on) { 753789Sahrens txg_sync_stop(spa->spa_dsl_pool); 754789Sahrens spa->spa_sync_on = B_FALSE; 755789Sahrens } 756789Sahrens 757789Sahrens /* 7587754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 759789Sahrens */ 7609234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 7619234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 7629234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 7639234SGeorge.Wilson@Sun.COM } 764789Sahrens 765789Sahrens /* 766789Sahrens * Close the dsl pool. 767789Sahrens */ 768789Sahrens if (spa->spa_dsl_pool) { 769789Sahrens dsl_pool_close(spa->spa_dsl_pool); 770789Sahrens spa->spa_dsl_pool = NULL; 771789Sahrens } 772789Sahrens 7738241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7748241SJeff.Bonwick@Sun.COM 7758241SJeff.Bonwick@Sun.COM /* 7768241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 7778241SJeff.Bonwick@Sun.COM */ 7788241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 7798241SJeff.Bonwick@Sun.COM 780789Sahrens /* 781789Sahrens * Close all vdevs. 782789Sahrens */ 7831585Sbonwick if (spa->spa_root_vdev) 784789Sahrens vdev_free(spa->spa_root_vdev); 7851585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 7861544Seschrock 7875450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7885450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 7895450Sbrendan if (spa->spa_spares.sav_vdevs) { 7905450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7915450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7925450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7935450Sbrendan } 7945450Sbrendan if (spa->spa_spares.sav_config) { 7955450Sbrendan nvlist_free(spa->spa_spares.sav_config); 7965450Sbrendan spa->spa_spares.sav_config = NULL; 7972082Seschrock } 7987377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 7995450Sbrendan 8005450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 8015450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 8025450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 8035450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 8045450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 8055450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 8065450Sbrendan } 8075450Sbrendan if (spa->spa_l2cache.sav_config) { 8085450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 8095450Sbrendan spa->spa_l2cache.sav_config = NULL; 8102082Seschrock } 8117377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 8122082Seschrock 8131544Seschrock spa->spa_async_suspended = 0; 8148241SJeff.Bonwick@Sun.COM 8158241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 816789Sahrens } 817789Sahrens 818789Sahrens /* 8192082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 8202082Seschrock * this pool. When this is called, we have some form of basic information in 8215450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 8225450Sbrendan * then re-generate a more complete list including status information. 8232082Seschrock */ 8242082Seschrock static void 8252082Seschrock spa_load_spares(spa_t *spa) 8262082Seschrock { 8272082Seschrock nvlist_t **spares; 8282082Seschrock uint_t nspares; 8292082Seschrock int i; 8303377Seschrock vdev_t *vd, *tvd; 8312082Seschrock 8327754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 8337754SJeff.Bonwick@Sun.COM 8342082Seschrock /* 8352082Seschrock * First, close and free any existing spare vdevs. 8362082Seschrock */ 8375450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8385450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 8393377Seschrock 8403377Seschrock /* Undo the call to spa_activate() below */ 8416643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8426643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 8433377Seschrock spa_spare_remove(tvd); 8443377Seschrock vdev_close(vd); 8453377Seschrock vdev_free(vd); 8462082Seschrock } 8473377Seschrock 8485450Sbrendan if (spa->spa_spares.sav_vdevs) 8495450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8505450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8515450Sbrendan 8525450Sbrendan if (spa->spa_spares.sav_config == NULL) 8532082Seschrock nspares = 0; 8542082Seschrock else 8555450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 8562082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8572082Seschrock 8585450Sbrendan spa->spa_spares.sav_count = (int)nspares; 8595450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8602082Seschrock 8612082Seschrock if (nspares == 0) 8622082Seschrock return; 8632082Seschrock 8642082Seschrock /* 8652082Seschrock * Construct the array of vdevs, opening them to get status in the 8663377Seschrock * process. For each spare, there is potentially two different vdev_t 8673377Seschrock * structures associated with it: one in the list of spares (used only 8683377Seschrock * for basic validation purposes) and one in the active vdev 8693377Seschrock * configuration (if it's spared in). During this phase we open and 8703377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 8713377Seschrock * active configuration, then we also mark this vdev as an active spare. 8722082Seschrock */ 8735450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 8745450Sbrendan KM_SLEEP); 8755450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8762082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 8772082Seschrock VDEV_ALLOC_SPARE) == 0); 8782082Seschrock ASSERT(vd != NULL); 8792082Seschrock 8805450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 8812082Seschrock 8826643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8836643Seschrock B_FALSE)) != NULL) { 8843377Seschrock if (!tvd->vdev_isspare) 8853377Seschrock spa_spare_add(tvd); 8863377Seschrock 8873377Seschrock /* 8883377Seschrock * We only mark the spare active if we were successfully 8893377Seschrock * able to load the vdev. Otherwise, importing a pool 8903377Seschrock * with a bad active spare would result in strange 8913377Seschrock * behavior, because multiple pool would think the spare 8923377Seschrock * is actively in use. 8933377Seschrock * 8943377Seschrock * There is a vulnerability here to an equally bizarre 8953377Seschrock * circumstance, where a dead active spare is later 8963377Seschrock * brought back to life (onlined or otherwise). Given 8973377Seschrock * the rarity of this scenario, and the extra complexity 8983377Seschrock * it adds, we ignore the possibility. 8993377Seschrock */ 9003377Seschrock if (!vdev_is_dead(tvd)) 9013377Seschrock spa_spare_activate(tvd); 9023377Seschrock } 9033377Seschrock 9047754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 9059425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 9067754SJeff.Bonwick@Sun.COM 9072082Seschrock if (vdev_open(vd) != 0) 9082082Seschrock continue; 9092082Seschrock 9105450Sbrendan if (vdev_validate_aux(vd) == 0) 9115450Sbrendan spa_spare_add(vd); 9122082Seschrock } 9132082Seschrock 9142082Seschrock /* 9152082Seschrock * Recompute the stashed list of spares, with status information 9162082Seschrock * this time. 9172082Seschrock */ 9185450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 9192082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 9202082Seschrock 9215450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 9225450Sbrendan KM_SLEEP); 9235450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9245450Sbrendan spares[i] = vdev_config_generate(spa, 9255450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 9265450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 9275450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 9285450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9292082Seschrock nvlist_free(spares[i]); 9305450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 9315450Sbrendan } 9325450Sbrendan 9335450Sbrendan /* 9345450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 9355450Sbrendan * this pool. When this is called, we have some form of basic information in 9365450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 9375450Sbrendan * then re-generate a more complete list including status information. 9385450Sbrendan * Devices which are already active have their details maintained, and are 9395450Sbrendan * not re-opened. 9405450Sbrendan */ 9415450Sbrendan static void 9425450Sbrendan spa_load_l2cache(spa_t *spa) 9435450Sbrendan { 9445450Sbrendan nvlist_t **l2cache; 9455450Sbrendan uint_t nl2cache; 9465450Sbrendan int i, j, oldnvdevs; 9479816SGeorge.Wilson@Sun.COM uint64_t guid; 9485450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 9495450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 9505450Sbrendan 9517754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 9527754SJeff.Bonwick@Sun.COM 9535450Sbrendan if (sav->sav_config != NULL) { 9545450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 9555450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 9565450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 9575450Sbrendan } else { 9585450Sbrendan nl2cache = 0; 9595450Sbrendan } 9605450Sbrendan 9615450Sbrendan oldvdevs = sav->sav_vdevs; 9625450Sbrendan oldnvdevs = sav->sav_count; 9635450Sbrendan sav->sav_vdevs = NULL; 9645450Sbrendan sav->sav_count = 0; 9655450Sbrendan 9665450Sbrendan /* 9675450Sbrendan * Process new nvlist of vdevs. 9685450Sbrendan */ 9695450Sbrendan for (i = 0; i < nl2cache; i++) { 9705450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 9715450Sbrendan &guid) == 0); 9725450Sbrendan 9735450Sbrendan newvdevs[i] = NULL; 9745450Sbrendan for (j = 0; j < oldnvdevs; j++) { 9755450Sbrendan vd = oldvdevs[j]; 9765450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 9775450Sbrendan /* 9785450Sbrendan * Retain previous vdev for add/remove ops. 9795450Sbrendan */ 9805450Sbrendan newvdevs[i] = vd; 9815450Sbrendan oldvdevs[j] = NULL; 9825450Sbrendan break; 9835450Sbrendan } 9845450Sbrendan } 9855450Sbrendan 9865450Sbrendan if (newvdevs[i] == NULL) { 9875450Sbrendan /* 9885450Sbrendan * Create new vdev 9895450Sbrendan */ 9905450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 9915450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 9925450Sbrendan ASSERT(vd != NULL); 9935450Sbrendan newvdevs[i] = vd; 9945450Sbrendan 9955450Sbrendan /* 9965450Sbrendan * Commit this vdev as an l2cache device, 9975450Sbrendan * even if it fails to open. 9985450Sbrendan */ 9995450Sbrendan spa_l2cache_add(vd); 10005450Sbrendan 10016643Seschrock vd->vdev_top = vd; 10026643Seschrock vd->vdev_aux = sav; 10036643Seschrock 10046643Seschrock spa_l2cache_activate(vd); 10056643Seschrock 10065450Sbrendan if (vdev_open(vd) != 0) 10075450Sbrendan continue; 10085450Sbrendan 10095450Sbrendan (void) vdev_validate_aux(vd); 10105450Sbrendan 10119816SGeorge.Wilson@Sun.COM if (!vdev_is_dead(vd)) 10129816SGeorge.Wilson@Sun.COM l2arc_add_vdev(spa, vd); 10135450Sbrendan } 10145450Sbrendan } 10155450Sbrendan 10165450Sbrendan /* 10175450Sbrendan * Purge vdevs that were dropped 10185450Sbrendan */ 10195450Sbrendan for (i = 0; i < oldnvdevs; i++) { 10205450Sbrendan uint64_t pool; 10215450Sbrendan 10225450Sbrendan vd = oldvdevs[i]; 10235450Sbrendan if (vd != NULL) { 10248241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10258241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 10265450Sbrendan l2arc_remove_vdev(vd); 10275450Sbrendan (void) vdev_close(vd); 10285450Sbrendan spa_l2cache_remove(vd); 10295450Sbrendan } 10305450Sbrendan } 10315450Sbrendan 10325450Sbrendan if (oldvdevs) 10335450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 10345450Sbrendan 10355450Sbrendan if (sav->sav_config == NULL) 10365450Sbrendan goto out; 10375450Sbrendan 10385450Sbrendan sav->sav_vdevs = newvdevs; 10395450Sbrendan sav->sav_count = (int)nl2cache; 10405450Sbrendan 10415450Sbrendan /* 10425450Sbrendan * Recompute the stashed list of l2cache devices, with status 10435450Sbrendan * information this time. 10445450Sbrendan */ 10455450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 10465450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 10475450Sbrendan 10485450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 10495450Sbrendan for (i = 0; i < sav->sav_count; i++) 10505450Sbrendan l2cache[i] = vdev_config_generate(spa, 10515450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 10525450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 10535450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 10545450Sbrendan out: 10555450Sbrendan for (i = 0; i < sav->sav_count; i++) 10565450Sbrendan nvlist_free(l2cache[i]); 10575450Sbrendan if (sav->sav_count) 10585450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 10592082Seschrock } 10602082Seschrock 10612082Seschrock static int 10622082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 10632082Seschrock { 10642082Seschrock dmu_buf_t *db; 10652082Seschrock char *packed = NULL; 10662082Seschrock size_t nvsize = 0; 10672082Seschrock int error; 10682082Seschrock *value = NULL; 10692082Seschrock 10702082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 10712082Seschrock nvsize = *(uint64_t *)db->db_data; 10722082Seschrock dmu_buf_rele(db, FTAG); 10732082Seschrock 10742082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10759512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10769512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 10772082Seschrock if (error == 0) 10782082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 10792082Seschrock kmem_free(packed, nvsize); 10802082Seschrock 10812082Seschrock return (error); 10822082Seschrock } 10832082Seschrock 10842082Seschrock /* 10854451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10864451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10874451Seschrock */ 10884451Seschrock static void 10894451Seschrock spa_check_removed(vdev_t *vd) 10904451Seschrock { 10919816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 10924451Seschrock spa_check_removed(vd->vdev_child[c]); 10934451Seschrock 10944451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10954451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10964451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10974451Seschrock } 10984451Seschrock } 10994451Seschrock 11004451Seschrock /* 11019701SGeorge.Wilson@Sun.COM * Load the slog device state from the config object since it's possible 11029701SGeorge.Wilson@Sun.COM * that the label does not contain the most up-to-date information. 11039701SGeorge.Wilson@Sun.COM */ 11049701SGeorge.Wilson@Sun.COM void 1105*10594SGeorge.Wilson@Sun.COM spa_load_log_state(spa_t *spa, nvlist_t *nv) 11069701SGeorge.Wilson@Sun.COM { 1107*10594SGeorge.Wilson@Sun.COM vdev_t *ovd, *rvd = spa->spa_root_vdev; 1108*10594SGeorge.Wilson@Sun.COM 1109*10594SGeorge.Wilson@Sun.COM /* 1110*10594SGeorge.Wilson@Sun.COM * Load the original root vdev tree from the passed config. 1111*10594SGeorge.Wilson@Sun.COM */ 1112*10594SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1113*10594SGeorge.Wilson@Sun.COM VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1114*10594SGeorge.Wilson@Sun.COM 1115*10594SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 1116*10594SGeorge.Wilson@Sun.COM vdev_t *cvd = rvd->vdev_child[c]; 1117*10594SGeorge.Wilson@Sun.COM if (cvd->vdev_islog) 1118*10594SGeorge.Wilson@Sun.COM vdev_load_log_state(cvd, ovd->vdev_child[c]); 11199701SGeorge.Wilson@Sun.COM } 1120*10594SGeorge.Wilson@Sun.COM vdev_free(ovd); 1121*10594SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 11229701SGeorge.Wilson@Sun.COM } 11239701SGeorge.Wilson@Sun.COM 11249701SGeorge.Wilson@Sun.COM /* 11257294Sperrin * Check for missing log devices 11267294Sperrin */ 11277294Sperrin int 11287294Sperrin spa_check_logs(spa_t *spa) 11297294Sperrin { 11307294Sperrin switch (spa->spa_log_state) { 11317294Sperrin case SPA_LOG_MISSING: 11327294Sperrin /* need to recheck in case slog has been restored */ 11337294Sperrin case SPA_LOG_UNKNOWN: 11347294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 11357294Sperrin DS_FIND_CHILDREN)) { 11367294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 11377294Sperrin return (1); 11387294Sperrin } 11397294Sperrin break; 11407294Sperrin } 11417294Sperrin return (0); 11427294Sperrin } 11437294Sperrin 11447294Sperrin /* 1145789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 11461544Seschrock * source of configuration information. 1147789Sahrens */ 1148789Sahrens static int 11491544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1150789Sahrens { 1151789Sahrens int error = 0; 1152*10594SGeorge.Wilson@Sun.COM nvlist_t *nvconfig, *nvroot = NULL; 1153789Sahrens vdev_t *rvd; 1154789Sahrens uberblock_t *ub = &spa->spa_uberblock; 11551635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1156789Sahrens uint64_t pool_guid; 11572082Seschrock uint64_t version; 11584451Seschrock uint64_t autoreplace = 0; 11598241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 11607294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1161789Sahrens 11628241SJeff.Bonwick@Sun.COM /* 11638241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 11648241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 11658241SJeff.Bonwick@Sun.COM */ 11668241SJeff.Bonwick@Sun.COM if (!mosconfig) 11678241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 11688241SJeff.Bonwick@Sun.COM 11697754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 11707754SJeff.Bonwick@Sun.COM 11711544Seschrock spa->spa_load_state = state; 11721635Sbonwick 1173789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 11741733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 11751544Seschrock error = EINVAL; 11761544Seschrock goto out; 11771544Seschrock } 1178789Sahrens 11792082Seschrock /* 11802082Seschrock * Versioning wasn't explicitly added to the label until later, so if 11812082Seschrock * it's not present treat it as the initial version. 11822082Seschrock */ 11832082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 11844577Sahrens version = SPA_VERSION_INITIAL; 11852082Seschrock 11861733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 11871733Sbonwick &spa->spa_config_txg); 11881733Sbonwick 11891635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 11901544Seschrock spa_guid_exists(pool_guid, 0)) { 11911544Seschrock error = EEXIST; 11921544Seschrock goto out; 11931544Seschrock } 1194789Sahrens 11952174Seschrock spa->spa_load_guid = pool_guid; 11962174Seschrock 1197789Sahrens /* 11989234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 11999234SGeorge.Wilson@Sun.COM */ 12009630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 12019630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 12029234SGeorge.Wilson@Sun.COM 12039234SGeorge.Wilson@Sun.COM /* 12042082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 12052082Seschrock * value that will be returned by spa_version() since parsing the 12062082Seschrock * configuration requires knowing the version number. 1207789Sahrens */ 12087754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12092082Seschrock spa->spa_ubsync.ub_version = version; 12102082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 12117754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1212789Sahrens 12132082Seschrock if (error != 0) 12141544Seschrock goto out; 1215789Sahrens 12161585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1217789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1218789Sahrens 1219789Sahrens /* 1220789Sahrens * Try to open all vdevs, loading each label in the process. 1221789Sahrens */ 12227754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12234070Smc142369 error = vdev_open(rvd); 12247754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12254070Smc142369 if (error != 0) 12261544Seschrock goto out; 1227789Sahrens 1228789Sahrens /* 12299276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 12309276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 12319276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 12329276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 12339276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 12349276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 12359276SMark.Musante@Sun.COM * the vdev config. 12361986Seschrock */ 12379276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12389276SMark.Musante@Sun.COM error = vdev_validate(rvd); 12399276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12409276SMark.Musante@Sun.COM if (error != 0) 12419276SMark.Musante@Sun.COM goto out; 12421986Seschrock 12431986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 12441986Seschrock error = ENXIO; 12451986Seschrock goto out; 12461986Seschrock } 12471986Seschrock 12481986Seschrock /* 1249789Sahrens * Find the best uberblock. 1250789Sahrens */ 12517754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1252789Sahrens 1253789Sahrens /* 1254789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1255789Sahrens */ 1256789Sahrens if (ub->ub_txg == 0) { 12571760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12581760Seschrock VDEV_AUX_CORRUPT_DATA); 12591544Seschrock error = ENXIO; 12601544Seschrock goto out; 12611544Seschrock } 12621544Seschrock 12631544Seschrock /* 12641544Seschrock * If the pool is newer than the code, we can't open it. 12651544Seschrock */ 12664577Sahrens if (ub->ub_version > SPA_VERSION) { 12671760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12681760Seschrock VDEV_AUX_VERSION_NEWER); 12691544Seschrock error = ENOTSUP; 12701544Seschrock goto out; 1271789Sahrens } 1272789Sahrens 1273789Sahrens /* 1274789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1275789Sahrens * incomplete configuration. 1276789Sahrens */ 12771732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 12781544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12791544Seschrock VDEV_AUX_BAD_GUID_SUM); 12801544Seschrock error = ENXIO; 12811544Seschrock goto out; 1282789Sahrens } 1283789Sahrens 1284789Sahrens /* 1285789Sahrens * Initialize internal SPA structures. 1286789Sahrens */ 1287789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1288789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1289789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 12901544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 12911544Seschrock if (error) { 12921544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12931544Seschrock VDEV_AUX_CORRUPT_DATA); 12941544Seschrock goto out; 12951544Seschrock } 1296789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1297789Sahrens 12981544Seschrock if (zap_lookup(spa->spa_meta_objset, 1299789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 13001544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 13011544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13021544Seschrock VDEV_AUX_CORRUPT_DATA); 13031544Seschrock error = EIO; 13041544Seschrock goto out; 13051544Seschrock } 1306789Sahrens 1307*10594SGeorge.Wilson@Sun.COM if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 1308*10594SGeorge.Wilson@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1309*10594SGeorge.Wilson@Sun.COM VDEV_AUX_CORRUPT_DATA); 1310*10594SGeorge.Wilson@Sun.COM error = EIO; 1311*10594SGeorge.Wilson@Sun.COM goto out; 1312*10594SGeorge.Wilson@Sun.COM } 1313*10594SGeorge.Wilson@Sun.COM 1314789Sahrens if (!mosconfig) { 13153975Sek110237 uint64_t hostid; 13162082Seschrock 1317*10594SGeorge.Wilson@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 13187706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 13193975Sek110237 char *hostname; 13203975Sek110237 unsigned long myhostid = 0; 13213975Sek110237 1322*10594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_string(nvconfig, 13233975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 13243975Sek110237 13258662SJordan.Vaughan@Sun.com #ifdef _KERNEL 13268662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 13278662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 13288662SJordan.Vaughan@Sun.com /* 13298662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 13308662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 13318662SJordan.Vaughan@Sun.com */ 13323975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13338662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 13344178Slling if (hostid != 0 && myhostid != 0 && 13358662SJordan.Vaughan@Sun.com hostid != myhostid) { 13363975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 13373975Sek110237 "loaded as it was last accessed by " 13387706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 13393975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 13407754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 13413975Sek110237 (unsigned long)hostid); 13423975Sek110237 error = EBADF; 13433975Sek110237 goto out; 13443975Sek110237 } 13453975Sek110237 } 13463975Sek110237 1347*10594SGeorge.Wilson@Sun.COM spa_config_set(spa, nvconfig); 1348789Sahrens spa_unload(spa); 1349789Sahrens spa_deactivate(spa); 13508241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1351789Sahrens 1352*10594SGeorge.Wilson@Sun.COM return (spa_load(spa, nvconfig, state, B_TRUE)); 13531544Seschrock } 13541544Seschrock 13551544Seschrock if (zap_lookup(spa->spa_meta_objset, 13561544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 13571544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 13581544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13591544Seschrock VDEV_AUX_CORRUPT_DATA); 13601544Seschrock error = EIO; 13611544Seschrock goto out; 1362789Sahrens } 1363789Sahrens 13641544Seschrock /* 13652082Seschrock * Load the bit that tells us to use the new accounting function 13662082Seschrock * (raid-z deflation). If we have an older pool, this will not 13672082Seschrock * be present. 13682082Seschrock */ 13692082Seschrock error = zap_lookup(spa->spa_meta_objset, 13702082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 13712082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 13722082Seschrock if (error != 0 && error != ENOENT) { 13732082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13742082Seschrock VDEV_AUX_CORRUPT_DATA); 13752082Seschrock error = EIO; 13762082Seschrock goto out; 13772082Seschrock } 13782082Seschrock 13792082Seschrock /* 13801544Seschrock * Load the persistent error log. If we have an older pool, this will 13811544Seschrock * not be present. 13821544Seschrock */ 13831544Seschrock error = zap_lookup(spa->spa_meta_objset, 13841544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 13851544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 13861807Sbonwick if (error != 0 && error != ENOENT) { 13871544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13881544Seschrock VDEV_AUX_CORRUPT_DATA); 13891544Seschrock error = EIO; 13901544Seschrock goto out; 13911544Seschrock } 13921544Seschrock 13931544Seschrock error = zap_lookup(spa->spa_meta_objset, 13941544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 13951544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 13961544Seschrock if (error != 0 && error != ENOENT) { 13971544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13981544Seschrock VDEV_AUX_CORRUPT_DATA); 13991544Seschrock error = EIO; 14001544Seschrock goto out; 14011544Seschrock } 1402789Sahrens 1403789Sahrens /* 14042926Sek110237 * Load the history object. If we have an older pool, this 14052926Sek110237 * will not be present. 14062926Sek110237 */ 14072926Sek110237 error = zap_lookup(spa->spa_meta_objset, 14082926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 14092926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 14102926Sek110237 if (error != 0 && error != ENOENT) { 14112926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14122926Sek110237 VDEV_AUX_CORRUPT_DATA); 14132926Sek110237 error = EIO; 14142926Sek110237 goto out; 14152926Sek110237 } 14162926Sek110237 14172926Sek110237 /* 14182082Seschrock * Load any hot spares for this pool. 14192082Seschrock */ 14202082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14215450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 14222082Seschrock if (error != 0 && error != ENOENT) { 14232082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14242082Seschrock VDEV_AUX_CORRUPT_DATA); 14252082Seschrock error = EIO; 14262082Seschrock goto out; 14272082Seschrock } 14282082Seschrock if (error == 0) { 14294577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 14305450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 14315450Sbrendan &spa->spa_spares.sav_config) != 0) { 14322082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14332082Seschrock VDEV_AUX_CORRUPT_DATA); 14342082Seschrock error = EIO; 14352082Seschrock goto out; 14362082Seschrock } 14372082Seschrock 14387754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14392082Seschrock spa_load_spares(spa); 14407754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14412082Seschrock } 14422082Seschrock 14435450Sbrendan /* 14445450Sbrendan * Load any level 2 ARC devices for this pool. 14455450Sbrendan */ 14465450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14475450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 14485450Sbrendan &spa->spa_l2cache.sav_object); 14495450Sbrendan if (error != 0 && error != ENOENT) { 14505450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14515450Sbrendan VDEV_AUX_CORRUPT_DATA); 14525450Sbrendan error = EIO; 14535450Sbrendan goto out; 14545450Sbrendan } 14555450Sbrendan if (error == 0) { 14565450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 14575450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 14585450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 14595450Sbrendan vdev_set_state(rvd, B_TRUE, 14605450Sbrendan VDEV_STATE_CANT_OPEN, 14615450Sbrendan VDEV_AUX_CORRUPT_DATA); 14625450Sbrendan error = EIO; 14635450Sbrendan goto out; 14645450Sbrendan } 14655450Sbrendan 14667754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14675450Sbrendan spa_load_l2cache(spa); 14687754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14695450Sbrendan } 14705450Sbrendan 1471*10594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 1472*10594SGeorge.Wilson@Sun.COM &nvroot) == 0); 1473*10594SGeorge.Wilson@Sun.COM spa_load_log_state(spa, nvroot); 1474*10594SGeorge.Wilson@Sun.COM nvlist_free(nvconfig); 14759701SGeorge.Wilson@Sun.COM 14767294Sperrin if (spa_check_logs(spa)) { 14777294Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14787294Sperrin VDEV_AUX_BAD_LOG); 14797294Sperrin error = ENXIO; 14807294Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 14817294Sperrin goto out; 14827294Sperrin } 14837294Sperrin 14847294Sperrin 14855094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 14864543Smarks 14873912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14883912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 14893912Slling 14903912Slling if (error && error != ENOENT) { 14913912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14923912Slling VDEV_AUX_CORRUPT_DATA); 14933912Slling error = EIO; 14943912Slling goto out; 14953912Slling } 14963912Slling 14973912Slling if (error == 0) { 14983912Slling (void) zap_lookup(spa->spa_meta_objset, 14993912Slling spa->spa_pool_props_object, 15004451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 15013912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 15024451Seschrock (void) zap_lookup(spa->spa_meta_objset, 15034451Seschrock spa->spa_pool_props_object, 15044451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 15054451Seschrock sizeof (uint64_t), 1, &autoreplace); 15064543Smarks (void) zap_lookup(spa->spa_meta_objset, 15074543Smarks spa->spa_pool_props_object, 15084543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 15094543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 15105329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 15115329Sgw25295 spa->spa_pool_props_object, 15125329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 15135329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 15149816SGeorge.Wilson@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 15159816SGeorge.Wilson@Sun.COM spa->spa_pool_props_object, 15169816SGeorge.Wilson@Sun.COM zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 15179816SGeorge.Wilson@Sun.COM sizeof (uint64_t), 1, &spa->spa_autoexpand); 15183912Slling } 15193912Slling 15202082Seschrock /* 15214451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 15224451Seschrock * the ZFS DE that it should not issue any faults for unopenable 15234451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 15244451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 15254451Seschrock * over. 15264451Seschrock */ 15275756Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 15284451Seschrock spa_check_removed(spa->spa_root_vdev); 15294451Seschrock 15304451Seschrock /* 15311986Seschrock * Load the vdev state for all toplevel vdevs. 1532789Sahrens */ 15331986Seschrock vdev_load(rvd); 1534789Sahrens 1535789Sahrens /* 1536789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1537789Sahrens */ 15387754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1539789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 15407754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1541789Sahrens 1542789Sahrens /* 1543789Sahrens * Check the state of the root vdev. If it can't be opened, it 1544789Sahrens * indicates one or more toplevel vdevs are faulted. 1545789Sahrens */ 15461544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 15471544Seschrock error = ENXIO; 15481544Seschrock goto out; 15491544Seschrock } 1550789Sahrens 15518241SJeff.Bonwick@Sun.COM if (spa_writeable(spa)) { 15521635Sbonwick dmu_tx_t *tx; 15531635Sbonwick int need_update = B_FALSE; 15548241SJeff.Bonwick@Sun.COM 15558241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 15561601Sbonwick 15571635Sbonwick /* 15581635Sbonwick * Claim log blocks that haven't been committed yet. 15591635Sbonwick * This must all happen in a single txg. 15601635Sbonwick */ 15611601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1562789Sahrens spa_first_txg(spa)); 15637754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 15642417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1565789Sahrens dmu_tx_commit(tx); 1566789Sahrens 15679701SGeorge.Wilson@Sun.COM spa->spa_log_state = SPA_LOG_GOOD; 1568789Sahrens spa->spa_sync_on = B_TRUE; 1569789Sahrens txg_sync_start(spa->spa_dsl_pool); 1570789Sahrens 1571789Sahrens /* 1572789Sahrens * Wait for all claims to sync. 1573789Sahrens */ 1574789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15751585Sbonwick 15761585Sbonwick /* 15771635Sbonwick * If the config cache is stale, or we have uninitialized 15781635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 157910100SLin.Ling@Sun.COM * 158010100SLin.Ling@Sun.COM * If spa_load_verbatim is true, trust the current 158110100SLin.Ling@Sun.COM * in-core spa_config and update the disk labels. 15821585Sbonwick */ 15831635Sbonwick if (config_cache_txg != spa->spa_config_txg || 158410100SLin.Ling@Sun.COM state == SPA_LOAD_IMPORT || spa->spa_load_verbatim) 15851635Sbonwick need_update = B_TRUE; 15861635Sbonwick 15878241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 15881635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 15891635Sbonwick need_update = B_TRUE; 15901585Sbonwick 15911585Sbonwick /* 15921635Sbonwick * Update the config cache asychronously in case we're the 15931635Sbonwick * root pool, in which case the config cache isn't writable yet. 15941585Sbonwick */ 15951635Sbonwick if (need_update) 15961635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 15978241SJeff.Bonwick@Sun.COM 15988241SJeff.Bonwick@Sun.COM /* 15998241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 16008241SJeff.Bonwick@Sun.COM */ 16018241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 16028241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 160310298SMatthew.Ahrens@Sun.COM 160410298SMatthew.Ahrens@Sun.COM /* 160510298SMatthew.Ahrens@Sun.COM * Delete any inconsistent datasets. 160610298SMatthew.Ahrens@Sun.COM */ 160710298SMatthew.Ahrens@Sun.COM (void) dmu_objset_find(spa_name(spa), 160810298SMatthew.Ahrens@Sun.COM dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 160910342Schris.kirby@sun.com 161010342Schris.kirby@sun.com /* 161110342Schris.kirby@sun.com * Clean up any stale temporary dataset userrefs. 161210342Schris.kirby@sun.com */ 161310342Schris.kirby@sun.com dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1614789Sahrens } 1615789Sahrens 16161544Seschrock error = 0; 16171544Seschrock out: 16187046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 16192082Seschrock if (error && error != EBADF) 16207294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 16211544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 16221544Seschrock spa->spa_ena = 0; 16231544Seschrock 16241544Seschrock return (error); 1625789Sahrens } 1626789Sahrens 1627789Sahrens /* 1628789Sahrens * Pool Open/Import 1629789Sahrens * 1630789Sahrens * The import case is identical to an open except that the configuration is sent 1631789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1632789Sahrens * case of an open, the pool configuration will exist in the 16334451Seschrock * POOL_STATE_UNINITIALIZED state. 1634789Sahrens * 1635789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1636789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1637789Sahrens * ambiguous state. 1638789Sahrens */ 1639789Sahrens static int 1640789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1641789Sahrens { 1642789Sahrens spa_t *spa; 1643789Sahrens int error; 1644789Sahrens int locked = B_FALSE; 1645789Sahrens 1646789Sahrens *spapp = NULL; 1647789Sahrens 1648789Sahrens /* 1649789Sahrens * As disgusting as this is, we need to support recursive calls to this 1650789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1651789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1652789Sahrens * avoid dsl_dir_open() calling this in the first place. 1653789Sahrens */ 1654789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1655789Sahrens mutex_enter(&spa_namespace_lock); 1656789Sahrens locked = B_TRUE; 1657789Sahrens } 1658789Sahrens 1659789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1660789Sahrens if (locked) 1661789Sahrens mutex_exit(&spa_namespace_lock); 1662789Sahrens return (ENOENT); 1663789Sahrens } 1664789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1665789Sahrens 16668241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 1667789Sahrens 16681635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1669789Sahrens 1670789Sahrens if (error == EBADF) { 1671789Sahrens /* 16721986Seschrock * If vdev_validate() returns failure (indicated by 16731986Seschrock * EBADF), it indicates that one of the vdevs indicates 16741986Seschrock * that the pool has been exported or destroyed. If 16751986Seschrock * this is the case, the config cache is out of sync and 16761986Seschrock * we should remove the pool from the namespace. 1677789Sahrens */ 1678789Sahrens spa_unload(spa); 1679789Sahrens spa_deactivate(spa); 16806643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1681789Sahrens spa_remove(spa); 1682789Sahrens if (locked) 1683789Sahrens mutex_exit(&spa_namespace_lock); 1684789Sahrens return (ENOENT); 16851544Seschrock } 16861544Seschrock 16871544Seschrock if (error) { 1688789Sahrens /* 1689789Sahrens * We can't open the pool, but we still have useful 1690789Sahrens * information: the state of each vdev after the 1691789Sahrens * attempted vdev_open(). Return this to the user. 1692789Sahrens */ 16937754SJeff.Bonwick@Sun.COM if (config != NULL && spa->spa_root_vdev != NULL) 1694789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1695789Sahrens B_TRUE); 1696789Sahrens spa_unload(spa); 1697789Sahrens spa_deactivate(spa); 16981544Seschrock spa->spa_last_open_failed = B_TRUE; 1699789Sahrens if (locked) 1700789Sahrens mutex_exit(&spa_namespace_lock); 1701789Sahrens *spapp = NULL; 1702789Sahrens return (error); 17031544Seschrock } else { 17041544Seschrock spa->spa_last_open_failed = B_FALSE; 1705789Sahrens } 1706789Sahrens } 1707789Sahrens 1708789Sahrens spa_open_ref(spa, tag); 17094451Seschrock 1710789Sahrens if (locked) 1711789Sahrens mutex_exit(&spa_namespace_lock); 1712789Sahrens 1713789Sahrens *spapp = spa; 1714789Sahrens 17157754SJeff.Bonwick@Sun.COM if (config != NULL) 1716789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1717789Sahrens 1718789Sahrens return (0); 1719789Sahrens } 1720789Sahrens 1721789Sahrens int 1722789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1723789Sahrens { 1724789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1725789Sahrens } 1726789Sahrens 17271544Seschrock /* 17281544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 17291544Seschrock * preventing it from being exported or destroyed. 17301544Seschrock */ 17311544Seschrock spa_t * 17321544Seschrock spa_inject_addref(char *name) 17331544Seschrock { 17341544Seschrock spa_t *spa; 17351544Seschrock 17361544Seschrock mutex_enter(&spa_namespace_lock); 17371544Seschrock if ((spa = spa_lookup(name)) == NULL) { 17381544Seschrock mutex_exit(&spa_namespace_lock); 17391544Seschrock return (NULL); 17401544Seschrock } 17411544Seschrock spa->spa_inject_ref++; 17421544Seschrock mutex_exit(&spa_namespace_lock); 17431544Seschrock 17441544Seschrock return (spa); 17451544Seschrock } 17461544Seschrock 17471544Seschrock void 17481544Seschrock spa_inject_delref(spa_t *spa) 17491544Seschrock { 17501544Seschrock mutex_enter(&spa_namespace_lock); 17511544Seschrock spa->spa_inject_ref--; 17521544Seschrock mutex_exit(&spa_namespace_lock); 17531544Seschrock } 17541544Seschrock 17555450Sbrendan /* 17565450Sbrendan * Add spares device information to the nvlist. 17575450Sbrendan */ 17582082Seschrock static void 17592082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 17602082Seschrock { 17612082Seschrock nvlist_t **spares; 17622082Seschrock uint_t i, nspares; 17632082Seschrock nvlist_t *nvroot; 17642082Seschrock uint64_t guid; 17652082Seschrock vdev_stat_t *vs; 17662082Seschrock uint_t vsc; 17673377Seschrock uint64_t pool; 17682082Seschrock 17699425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17709425SEric.Schrock@Sun.COM 17715450Sbrendan if (spa->spa_spares.sav_count == 0) 17722082Seschrock return; 17732082Seschrock 17742082Seschrock VERIFY(nvlist_lookup_nvlist(config, 17752082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 17765450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 17772082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17782082Seschrock if (nspares != 0) { 17792082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 17802082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 17812082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 17822082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17832082Seschrock 17842082Seschrock /* 17852082Seschrock * Go through and find any spares which have since been 17862082Seschrock * repurposed as an active spare. If this is the case, update 17872082Seschrock * their status appropriately. 17882082Seschrock */ 17892082Seschrock for (i = 0; i < nspares; i++) { 17902082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 17912082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 17927214Slling if (spa_spare_exists(guid, &pool, NULL) && 17937214Slling pool != 0ULL) { 17942082Seschrock VERIFY(nvlist_lookup_uint64_array( 17952082Seschrock spares[i], ZPOOL_CONFIG_STATS, 17962082Seschrock (uint64_t **)&vs, &vsc) == 0); 17972082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 17982082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 17992082Seschrock } 18002082Seschrock } 18012082Seschrock } 18022082Seschrock } 18032082Seschrock 18045450Sbrendan /* 18055450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 18065450Sbrendan */ 18075450Sbrendan static void 18085450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 18095450Sbrendan { 18105450Sbrendan nvlist_t **l2cache; 18115450Sbrendan uint_t i, j, nl2cache; 18125450Sbrendan nvlist_t *nvroot; 18135450Sbrendan uint64_t guid; 18145450Sbrendan vdev_t *vd; 18155450Sbrendan vdev_stat_t *vs; 18165450Sbrendan uint_t vsc; 18175450Sbrendan 18189425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 18199425SEric.Schrock@Sun.COM 18205450Sbrendan if (spa->spa_l2cache.sav_count == 0) 18215450Sbrendan return; 18225450Sbrendan 18235450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 18245450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 18255450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 18265450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18275450Sbrendan if (nl2cache != 0) { 18285450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 18295450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 18305450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 18315450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18325450Sbrendan 18335450Sbrendan /* 18345450Sbrendan * Update level 2 cache device stats. 18355450Sbrendan */ 18365450Sbrendan 18375450Sbrendan for (i = 0; i < nl2cache; i++) { 18385450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 18395450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 18405450Sbrendan 18415450Sbrendan vd = NULL; 18425450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 18435450Sbrendan if (guid == 18445450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 18455450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 18465450Sbrendan break; 18475450Sbrendan } 18485450Sbrendan } 18495450Sbrendan ASSERT(vd != NULL); 18505450Sbrendan 18515450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 18525450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 18535450Sbrendan vdev_get_stats(vd, vs); 18545450Sbrendan } 18555450Sbrendan } 18565450Sbrendan } 18575450Sbrendan 1858789Sahrens int 18591544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1860789Sahrens { 1861789Sahrens int error; 1862789Sahrens spa_t *spa; 1863789Sahrens 1864789Sahrens *config = NULL; 1865789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1866789Sahrens 18679425SEric.Schrock@Sun.COM if (spa != NULL) { 18689425SEric.Schrock@Sun.COM /* 18699425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 18709425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 18719425SEric.Schrock@Sun.COM * self-inconsistent. 18729425SEric.Schrock@Sun.COM */ 18739425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 18749425SEric.Schrock@Sun.COM 18759425SEric.Schrock@Sun.COM if (*config != NULL) { 18767754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 18779425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 18789425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 18799425SEric.Schrock@Sun.COM 18809425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 18819425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 18829425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 18839425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 18849425SEric.Schrock@Sun.COM 18859425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 18869425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 18879425SEric.Schrock@Sun.COM } 18882082Seschrock } 18892082Seschrock 18901544Seschrock /* 18911544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 18921544Seschrock * and call spa_lookup() directly. 18931544Seschrock */ 18941544Seschrock if (altroot) { 18951544Seschrock if (spa == NULL) { 18961544Seschrock mutex_enter(&spa_namespace_lock); 18971544Seschrock spa = spa_lookup(name); 18981544Seschrock if (spa) 18991544Seschrock spa_altroot(spa, altroot, buflen); 19001544Seschrock else 19011544Seschrock altroot[0] = '\0'; 19021544Seschrock spa = NULL; 19031544Seschrock mutex_exit(&spa_namespace_lock); 19041544Seschrock } else { 19051544Seschrock spa_altroot(spa, altroot, buflen); 19061544Seschrock } 19071544Seschrock } 19081544Seschrock 19099425SEric.Schrock@Sun.COM if (spa != NULL) { 19109425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 1911789Sahrens spa_close(spa, FTAG); 19129425SEric.Schrock@Sun.COM } 1913789Sahrens 1914789Sahrens return (error); 1915789Sahrens } 1916789Sahrens 1917789Sahrens /* 19185450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 19195450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 19205450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 19215450Sbrendan * specified, as long as they are well-formed. 19222082Seschrock */ 19232082Seschrock static int 19245450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 19255450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 19265450Sbrendan vdev_labeltype_t label) 19272082Seschrock { 19285450Sbrendan nvlist_t **dev; 19295450Sbrendan uint_t i, ndev; 19302082Seschrock vdev_t *vd; 19312082Seschrock int error; 19322082Seschrock 19337754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 19347754SJeff.Bonwick@Sun.COM 19352082Seschrock /* 19365450Sbrendan * It's acceptable to have no devs specified. 19372082Seschrock */ 19385450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 19392082Seschrock return (0); 19402082Seschrock 19415450Sbrendan if (ndev == 0) 19422082Seschrock return (EINVAL); 19432082Seschrock 19442082Seschrock /* 19455450Sbrendan * Make sure the pool is formatted with a version that supports this 19465450Sbrendan * device type. 19472082Seschrock */ 19485450Sbrendan if (spa_version(spa) < version) 19492082Seschrock return (ENOTSUP); 19502082Seschrock 19513377Seschrock /* 19525450Sbrendan * Set the pending device list so we correctly handle device in-use 19533377Seschrock * checking. 19543377Seschrock */ 19555450Sbrendan sav->sav_pending = dev; 19565450Sbrendan sav->sav_npending = ndev; 19575450Sbrendan 19585450Sbrendan for (i = 0; i < ndev; i++) { 19595450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 19602082Seschrock mode)) != 0) 19613377Seschrock goto out; 19622082Seschrock 19632082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 19642082Seschrock vdev_free(vd); 19653377Seschrock error = EINVAL; 19663377Seschrock goto out; 19672082Seschrock } 19682082Seschrock 19695450Sbrendan /* 19707754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 19717754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 19725450Sbrendan */ 19737754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 19745450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 19755450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 19765450Sbrendan error = ENOTBLK; 19775450Sbrendan goto out; 19785450Sbrendan } 19797754SJeff.Bonwick@Sun.COM #endif 19802082Seschrock vd->vdev_top = vd; 19813377Seschrock 19823377Seschrock if ((error = vdev_open(vd)) == 0 && 19835450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 19845450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 19853377Seschrock vd->vdev_guid) == 0); 19862082Seschrock } 19872082Seschrock 19882082Seschrock vdev_free(vd); 19893377Seschrock 19905450Sbrendan if (error && 19915450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 19923377Seschrock goto out; 19933377Seschrock else 19943377Seschrock error = 0; 19952082Seschrock } 19962082Seschrock 19973377Seschrock out: 19985450Sbrendan sav->sav_pending = NULL; 19995450Sbrendan sav->sav_npending = 0; 20003377Seschrock return (error); 20012082Seschrock } 20022082Seschrock 20035450Sbrendan static int 20045450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 20055450Sbrendan { 20065450Sbrendan int error; 20075450Sbrendan 20087754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 20097754SJeff.Bonwick@Sun.COM 20105450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20115450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 20125450Sbrendan VDEV_LABEL_SPARE)) != 0) { 20135450Sbrendan return (error); 20145450Sbrendan } 20155450Sbrendan 20165450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20175450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 20185450Sbrendan VDEV_LABEL_L2CACHE)); 20195450Sbrendan } 20205450Sbrendan 20215450Sbrendan static void 20225450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 20235450Sbrendan const char *config) 20245450Sbrendan { 20255450Sbrendan int i; 20265450Sbrendan 20275450Sbrendan if (sav->sav_config != NULL) { 20285450Sbrendan nvlist_t **olddevs; 20295450Sbrendan uint_t oldndevs; 20305450Sbrendan nvlist_t **newdevs; 20315450Sbrendan 20325450Sbrendan /* 20335450Sbrendan * Generate new dev list by concatentating with the 20345450Sbrendan * current dev list. 20355450Sbrendan */ 20365450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 20375450Sbrendan &olddevs, &oldndevs) == 0); 20385450Sbrendan 20395450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 20405450Sbrendan (ndevs + oldndevs), KM_SLEEP); 20415450Sbrendan for (i = 0; i < oldndevs; i++) 20425450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 20435450Sbrendan KM_SLEEP) == 0); 20445450Sbrendan for (i = 0; i < ndevs; i++) 20455450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 20465450Sbrendan KM_SLEEP) == 0); 20475450Sbrendan 20485450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 20495450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 20505450Sbrendan 20515450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 20525450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 20535450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 20545450Sbrendan nvlist_free(newdevs[i]); 20555450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 20565450Sbrendan } else { 20575450Sbrendan /* 20585450Sbrendan * Generate a new dev list. 20595450Sbrendan */ 20605450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 20615450Sbrendan KM_SLEEP) == 0); 20625450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 20635450Sbrendan devs, ndevs) == 0); 20645450Sbrendan } 20655450Sbrendan } 20665450Sbrendan 20675450Sbrendan /* 20685450Sbrendan * Stop and drop level 2 ARC devices 20695450Sbrendan */ 20705450Sbrendan void 20715450Sbrendan spa_l2cache_drop(spa_t *spa) 20725450Sbrendan { 20735450Sbrendan vdev_t *vd; 20745450Sbrendan int i; 20755450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 20765450Sbrendan 20775450Sbrendan for (i = 0; i < sav->sav_count; i++) { 20785450Sbrendan uint64_t pool; 20795450Sbrendan 20805450Sbrendan vd = sav->sav_vdevs[i]; 20815450Sbrendan ASSERT(vd != NULL); 20825450Sbrendan 20838241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 20848241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 20855450Sbrendan l2arc_remove_vdev(vd); 20865450Sbrendan if (vd->vdev_isl2cache) 20875450Sbrendan spa_l2cache_remove(vd); 20885450Sbrendan vdev_clear_stats(vd); 20895450Sbrendan (void) vdev_close(vd); 20905450Sbrendan } 20915450Sbrendan } 20925450Sbrendan 20932082Seschrock /* 2094789Sahrens * Pool Creation 2095789Sahrens */ 2096789Sahrens int 20975094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 20987184Stimh const char *history_str, nvlist_t *zplprops) 2099789Sahrens { 2100789Sahrens spa_t *spa; 21015094Slling char *altroot = NULL; 21021635Sbonwick vdev_t *rvd; 2103789Sahrens dsl_pool_t *dp; 2104789Sahrens dmu_tx_t *tx; 21059816SGeorge.Wilson@Sun.COM int error = 0; 2106789Sahrens uint64_t txg = TXG_INITIAL; 21075450Sbrendan nvlist_t **spares, **l2cache; 21085450Sbrendan uint_t nspares, nl2cache; 21095094Slling uint64_t version; 2110789Sahrens 2111789Sahrens /* 2112789Sahrens * If this pool already exists, return failure. 2113789Sahrens */ 2114789Sahrens mutex_enter(&spa_namespace_lock); 2115789Sahrens if (spa_lookup(pool) != NULL) { 2116789Sahrens mutex_exit(&spa_namespace_lock); 2117789Sahrens return (EEXIST); 2118789Sahrens } 2119789Sahrens 2120789Sahrens /* 2121789Sahrens * Allocate a new spa_t structure. 2122789Sahrens */ 21235094Slling (void) nvlist_lookup_string(props, 21245094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21251635Sbonwick spa = spa_add(pool, altroot); 21268241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2127789Sahrens 2128789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 21295094Slling 21305094Slling if (props && (error = spa_prop_validate(spa, props))) { 21315094Slling spa_deactivate(spa); 21325094Slling spa_remove(spa); 21336643Seschrock mutex_exit(&spa_namespace_lock); 21345094Slling return (error); 21355094Slling } 21365094Slling 21375094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 21385094Slling &version) != 0) 21395094Slling version = SPA_VERSION; 21405094Slling ASSERT(version <= SPA_VERSION); 21415094Slling spa->spa_uberblock.ub_version = version; 2142789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2143789Sahrens 21441635Sbonwick /* 21459234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 21469234SGeorge.Wilson@Sun.COM */ 21479630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 21489630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 21499234SGeorge.Wilson@Sun.COM 21509234SGeorge.Wilson@Sun.COM /* 21511635Sbonwick * Create the root vdev. 21521635Sbonwick */ 21537754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21541635Sbonwick 21552082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21562082Seschrock 21572082Seschrock ASSERT(error != 0 || rvd != NULL); 21582082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21592082Seschrock 21605913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21611635Sbonwick error = EINVAL; 21622082Seschrock 21632082Seschrock if (error == 0 && 21642082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 21655450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 21662082Seschrock VDEV_ALLOC_ADD)) == 0) { 21679816SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 21689816SGeorge.Wilson@Sun.COM vdev_metaslab_set_size(rvd->vdev_child[c]); 21699816SGeorge.Wilson@Sun.COM vdev_expand(rvd->vdev_child[c], txg); 21709816SGeorge.Wilson@Sun.COM } 21711635Sbonwick } 21721635Sbonwick 21737754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2174789Sahrens 21752082Seschrock if (error != 0) { 2176789Sahrens spa_unload(spa); 2177789Sahrens spa_deactivate(spa); 2178789Sahrens spa_remove(spa); 2179789Sahrens mutex_exit(&spa_namespace_lock); 2180789Sahrens return (error); 2181789Sahrens } 2182789Sahrens 21832082Seschrock /* 21842082Seschrock * Get the list of spares, if specified. 21852082Seschrock */ 21862082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 21872082Seschrock &spares, &nspares) == 0) { 21885450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 21892082Seschrock KM_SLEEP) == 0); 21905450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 21912082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 21927754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21932082Seschrock spa_load_spares(spa); 21947754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 21955450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 21965450Sbrendan } 21975450Sbrendan 21985450Sbrendan /* 21995450Sbrendan * Get the list of level 2 cache devices, if specified. 22005450Sbrendan */ 22015450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 22025450Sbrendan &l2cache, &nl2cache) == 0) { 22035450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 22045450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 22055450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 22065450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 22077754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 22085450Sbrendan spa_load_l2cache(spa); 22097754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 22105450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 22112082Seschrock } 22122082Seschrock 22137184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2214789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2215789Sahrens 2216789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2217789Sahrens 2218789Sahrens /* 2219789Sahrens * Create the pool config object. 2220789Sahrens */ 2221789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 22227497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2223789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2224789Sahrens 22251544Seschrock if (zap_add(spa->spa_meta_objset, 2226789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 22271544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 22281544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 22291544Seschrock } 2230789Sahrens 22315094Slling /* Newly created pools with the right version are always deflated. */ 22325094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 22335094Slling spa->spa_deflate = TRUE; 22345094Slling if (zap_add(spa->spa_meta_objset, 22355094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 22365094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 22375094Slling cmn_err(CE_PANIC, "failed to add deflate"); 22385094Slling } 22392082Seschrock } 22402082Seschrock 2241789Sahrens /* 2242789Sahrens * Create the deferred-free bplist object. Turn off compression 2243789Sahrens * because sync-to-convergence takes longer if the blocksize 2244789Sahrens * keeps changing. 2245789Sahrens */ 2246789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2247789Sahrens 1 << 14, tx); 2248789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2249789Sahrens ZIO_COMPRESS_OFF, tx); 2250789Sahrens 22511544Seschrock if (zap_add(spa->spa_meta_objset, 2252789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 22531544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 22541544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 22551544Seschrock } 2256789Sahrens 22572926Sek110237 /* 22582926Sek110237 * Create the pool's history object. 22592926Sek110237 */ 22605094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 22615094Slling spa_history_create_obj(spa, tx); 22625094Slling 22635094Slling /* 22645094Slling * Set pool properties. 22655094Slling */ 22665094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 22675094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22685329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 22699816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 22708525SEric.Schrock@Sun.COM if (props != NULL) { 22718525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 22725094Slling spa_sync_props(spa, props, CRED(), tx); 22738525SEric.Schrock@Sun.COM } 22742926Sek110237 2275789Sahrens dmu_tx_commit(tx); 2276789Sahrens 2277789Sahrens spa->spa_sync_on = B_TRUE; 2278789Sahrens txg_sync_start(spa->spa_dsl_pool); 2279789Sahrens 2280789Sahrens /* 2281789Sahrens * We explicitly wait for the first transaction to complete so that our 2282789Sahrens * bean counters are appropriately updated. 2283789Sahrens */ 2284789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2285789Sahrens 22866643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2287789Sahrens 22885094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 22894715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 22909946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_CREATE); 22914715Sek110237 22928667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 22938667SGeorge.Wilson@Sun.COM 2294789Sahrens mutex_exit(&spa_namespace_lock); 2295789Sahrens 2296789Sahrens return (0); 2297789Sahrens } 2298789Sahrens 22996423Sgw25295 #ifdef _KERNEL 23006423Sgw25295 /* 23019790SLin.Ling@Sun.COM * Get the root pool information from the root disk, then import the root pool 23029790SLin.Ling@Sun.COM * during the system boot up time. 23036423Sgw25295 */ 23049790SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 23059790SLin.Ling@Sun.COM 23069790SLin.Ling@Sun.COM static nvlist_t * 23079790SLin.Ling@Sun.COM spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 23086423Sgw25295 { 23099790SLin.Ling@Sun.COM nvlist_t *config; 23106423Sgw25295 nvlist_t *nvtop, *nvroot; 23116423Sgw25295 uint64_t pgid; 23126423Sgw25295 23139790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 23149790SLin.Ling@Sun.COM return (NULL); 23159790SLin.Ling@Sun.COM 23166423Sgw25295 /* 23176423Sgw25295 * Add this top-level vdev to the child array. 23186423Sgw25295 */ 23199790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 23209790SLin.Ling@Sun.COM &nvtop) == 0); 23219790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 23229790SLin.Ling@Sun.COM &pgid) == 0); 23239790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 23246423Sgw25295 23256423Sgw25295 /* 23266423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 23276423Sgw25295 */ 23286423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 23299790SLin.Ling@Sun.COM VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 23309790SLin.Ling@Sun.COM VDEV_TYPE_ROOT) == 0); 23316423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 23326423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 23336423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 23346423Sgw25295 &nvtop, 1) == 0); 23356423Sgw25295 23366423Sgw25295 /* 23376423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 23386423Sgw25295 * this pool's configuration (remove the old, add the new). 23396423Sgw25295 */ 23406423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 23416423Sgw25295 nvlist_free(nvroot); 23429790SLin.Ling@Sun.COM return (config); 23436423Sgw25295 } 23446423Sgw25295 23456423Sgw25295 /* 23469790SLin.Ling@Sun.COM * Walk the vdev tree and see if we can find a device with "better" 23479790SLin.Ling@Sun.COM * configuration. A configuration is "better" if the label on that 23489790SLin.Ling@Sun.COM * device has a more recent txg. 23496423Sgw25295 */ 23509790SLin.Ling@Sun.COM static void 23519790SLin.Ling@Sun.COM spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 23527147Staylor { 23539816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 23549790SLin.Ling@Sun.COM spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 23559790SLin.Ling@Sun.COM 23569790SLin.Ling@Sun.COM if (vd->vdev_ops->vdev_op_leaf) { 23579790SLin.Ling@Sun.COM nvlist_t *label; 23589790SLin.Ling@Sun.COM uint64_t label_txg; 23599790SLin.Ling@Sun.COM 23609790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 23619790SLin.Ling@Sun.COM &label) != 0) 23629790SLin.Ling@Sun.COM return; 23639790SLin.Ling@Sun.COM 23649790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 23659790SLin.Ling@Sun.COM &label_txg) == 0); 23669790SLin.Ling@Sun.COM 23679790SLin.Ling@Sun.COM /* 23689790SLin.Ling@Sun.COM * Do we have a better boot device? 23699790SLin.Ling@Sun.COM */ 23709790SLin.Ling@Sun.COM if (label_txg > *txg) { 23719790SLin.Ling@Sun.COM *txg = label_txg; 23729790SLin.Ling@Sun.COM *avd = vd; 23737147Staylor } 23749790SLin.Ling@Sun.COM nvlist_free(label); 23757147Staylor } 23767147Staylor } 23777147Staylor 23786423Sgw25295 /* 23796423Sgw25295 * Import a root pool. 23806423Sgw25295 * 23817147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 23827147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 23837147Staylor * The GRUB "findroot" command will return the vdev we should boot. 23846423Sgw25295 * 23856423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 23866423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 23876423Sgw25295 * e.g. 23886423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 23896423Sgw25295 */ 23906423Sgw25295 int 23917147Staylor spa_import_rootpool(char *devpath, char *devid) 23926423Sgw25295 { 23939790SLin.Ling@Sun.COM spa_t *spa; 23949790SLin.Ling@Sun.COM vdev_t *rvd, *bvd, *avd = NULL; 23959790SLin.Ling@Sun.COM nvlist_t *config, *nvtop; 23969790SLin.Ling@Sun.COM uint64_t guid, txg; 23976423Sgw25295 char *pname; 23986423Sgw25295 int error; 23996423Sgw25295 24006423Sgw25295 /* 24019790SLin.Ling@Sun.COM * Read the label from the boot device and generate a configuration. 24026423Sgw25295 */ 24039790SLin.Ling@Sun.COM if ((config = spa_generate_rootconf(devpath, devid, &guid)) == NULL) { 24049790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 24059790SLin.Ling@Sun.COM devpath); 24069790SLin.Ling@Sun.COM return (EIO); 24079790SLin.Ling@Sun.COM } 24089790SLin.Ling@Sun.COM 24099790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 24109790SLin.Ling@Sun.COM &pname) == 0); 24119790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 24126423Sgw25295 24139425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 24149425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 24159425SEric.Schrock@Sun.COM /* 24169425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 24179425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 24189425SEric.Schrock@Sun.COM */ 24199425SEric.Schrock@Sun.COM spa_remove(spa); 24209425SEric.Schrock@Sun.COM } 24219425SEric.Schrock@Sun.COM 24229425SEric.Schrock@Sun.COM spa = spa_add(pname, NULL); 24239425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 242410100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 24259790SLin.Ling@Sun.COM 24269790SLin.Ling@Sun.COM /* 24279790SLin.Ling@Sun.COM * Build up a vdev tree based on the boot device's label config. 24289790SLin.Ling@Sun.COM */ 24299790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 24309790SLin.Ling@Sun.COM &nvtop) == 0); 24319790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24329790SLin.Ling@Sun.COM error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 24339790SLin.Ling@Sun.COM VDEV_ALLOC_ROOTPOOL); 24349790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24359790SLin.Ling@Sun.COM if (error) { 24369790SLin.Ling@Sun.COM mutex_exit(&spa_namespace_lock); 24379790SLin.Ling@Sun.COM nvlist_free(config); 24389790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 24399790SLin.Ling@Sun.COM pname); 24409790SLin.Ling@Sun.COM return (error); 24419790SLin.Ling@Sun.COM } 24429790SLin.Ling@Sun.COM 24439790SLin.Ling@Sun.COM /* 24449790SLin.Ling@Sun.COM * Get the boot vdev. 24459790SLin.Ling@Sun.COM */ 24469790SLin.Ling@Sun.COM if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 24479790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 24489790SLin.Ling@Sun.COM (u_longlong_t)guid); 24499790SLin.Ling@Sun.COM error = ENOENT; 24509790SLin.Ling@Sun.COM goto out; 24519790SLin.Ling@Sun.COM } 24529790SLin.Ling@Sun.COM 24539790SLin.Ling@Sun.COM /* 24549790SLin.Ling@Sun.COM * Determine if there is a better boot device. 24559790SLin.Ling@Sun.COM */ 24569790SLin.Ling@Sun.COM avd = bvd; 24579790SLin.Ling@Sun.COM spa_alt_rootvdev(rvd, &avd, &txg); 24589790SLin.Ling@Sun.COM if (avd != bvd) { 24599790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 24609790SLin.Ling@Sun.COM "try booting from '%s'", avd->vdev_path); 24619790SLin.Ling@Sun.COM error = EINVAL; 24629790SLin.Ling@Sun.COM goto out; 24639790SLin.Ling@Sun.COM } 24649790SLin.Ling@Sun.COM 24659790SLin.Ling@Sun.COM /* 24669790SLin.Ling@Sun.COM * If the boot device is part of a spare vdev then ensure that 24679790SLin.Ling@Sun.COM * we're booting off the active spare. 24689790SLin.Ling@Sun.COM */ 24699790SLin.Ling@Sun.COM if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 24709790SLin.Ling@Sun.COM !bvd->vdev_isspare) { 24719790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is currently spared. Please " 24729790SLin.Ling@Sun.COM "try booting from '%s'", 24739790SLin.Ling@Sun.COM bvd->vdev_parent->vdev_child[1]->vdev_path); 24749790SLin.Ling@Sun.COM error = EINVAL; 24759790SLin.Ling@Sun.COM goto out; 24769790SLin.Ling@Sun.COM } 24779790SLin.Ling@Sun.COM 24789790SLin.Ling@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 24799790SLin.Ling@Sun.COM error = 0; 24809946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 24819790SLin.Ling@Sun.COM out: 24829790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24839790SLin.Ling@Sun.COM vdev_free(rvd); 24849790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24859425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 24866423Sgw25295 24879790SLin.Ling@Sun.COM nvlist_free(config); 24886423Sgw25295 return (error); 24896423Sgw25295 } 24909790SLin.Ling@Sun.COM 24916423Sgw25295 #endif 24926423Sgw25295 24936423Sgw25295 /* 24949425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 24959425SEric.Schrock@Sun.COM * boot. 24969425SEric.Schrock@Sun.COM */ 24979425SEric.Schrock@Sun.COM int 24989425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 24999425SEric.Schrock@Sun.COM { 25009425SEric.Schrock@Sun.COM spa_t *spa; 25019425SEric.Schrock@Sun.COM char *altroot = NULL; 25029425SEric.Schrock@Sun.COM 25039425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25049425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 25059425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25069425SEric.Schrock@Sun.COM return (EEXIST); 25079425SEric.Schrock@Sun.COM } 25089425SEric.Schrock@Sun.COM 25099425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25109425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25119425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25129425SEric.Schrock@Sun.COM 251310100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 251410000SVictor.Latushkin@Sun.COM 25159425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25169425SEric.Schrock@Sun.COM 25179425SEric.Schrock@Sun.COM if (props != NULL) 25189425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25199425SEric.Schrock@Sun.COM 25209425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 25219425SEric.Schrock@Sun.COM 25229425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25239946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 25249425SEric.Schrock@Sun.COM 25259425SEric.Schrock@Sun.COM return (0); 25269425SEric.Schrock@Sun.COM } 25279425SEric.Schrock@Sun.COM 25289425SEric.Schrock@Sun.COM /* 25296423Sgw25295 * Import a non-root pool into the system. 25306423Sgw25295 */ 25316423Sgw25295 int 25326423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 25336423Sgw25295 { 25349425SEric.Schrock@Sun.COM spa_t *spa; 25359425SEric.Schrock@Sun.COM char *altroot = NULL; 25369425SEric.Schrock@Sun.COM int error; 25379425SEric.Schrock@Sun.COM nvlist_t *nvroot; 25389425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 25399425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 25409425SEric.Schrock@Sun.COM 25419425SEric.Schrock@Sun.COM /* 25429425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 25439425SEric.Schrock@Sun.COM */ 25449425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25459425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 25469425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25479425SEric.Schrock@Sun.COM return (EEXIST); 25489425SEric.Schrock@Sun.COM } 25499425SEric.Schrock@Sun.COM 25509425SEric.Schrock@Sun.COM /* 25519425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 25529425SEric.Schrock@Sun.COM */ 25539425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25549425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25559425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25569425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 25579425SEric.Schrock@Sun.COM 25589425SEric.Schrock@Sun.COM /* 25599630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 25609630SJeff.Bonwick@Sun.COM */ 25619630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 25629630SJeff.Bonwick@Sun.COM 25639630SJeff.Bonwick@Sun.COM /* 25649425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25659425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 25669425SEric.Schrock@Sun.COM * doing an import. 25679425SEric.Schrock@Sun.COM */ 25689425SEric.Schrock@Sun.COM error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 25699425SEric.Schrock@Sun.COM 25709425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 25719425SEric.Schrock@Sun.COM /* 25729425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 25739425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 25749425SEric.Schrock@Sun.COM */ 25759425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 25769425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 25779425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 25789425SEric.Schrock@Sun.COM spa_load_spares(spa); 25799425SEric.Schrock@Sun.COM } 25809425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 25819425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 25829425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 25839425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 25849425SEric.Schrock@Sun.COM } 25859425SEric.Schrock@Sun.COM 25869425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 25879425SEric.Schrock@Sun.COM &nvroot) == 0); 25889425SEric.Schrock@Sun.COM if (error == 0) 25899425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25909425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 25919425SEric.Schrock@Sun.COM if (error == 0) 25929425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25939425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 25949425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 25959425SEric.Schrock@Sun.COM 25969425SEric.Schrock@Sun.COM if (props != NULL) 25979425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25989425SEric.Schrock@Sun.COM 25999425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 26009425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 26019425SEric.Schrock@Sun.COM spa_unload(spa); 26029425SEric.Schrock@Sun.COM spa_deactivate(spa); 26039425SEric.Schrock@Sun.COM spa_remove(spa); 26049425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26059425SEric.Schrock@Sun.COM return (error); 26069425SEric.Schrock@Sun.COM } 26079425SEric.Schrock@Sun.COM 26089630SJeff.Bonwick@Sun.COM spa_async_resume(spa); 26099630SJeff.Bonwick@Sun.COM 26109425SEric.Schrock@Sun.COM /* 26119425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 26129425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 26139425SEric.Schrock@Sun.COM */ 26149425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26159425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 26169425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 26179425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 26189425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 26199425SEric.Schrock@Sun.COM else 26209425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 26219425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26229425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26239425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26249425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26259425SEric.Schrock@Sun.COM spa_load_spares(spa); 26269425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26279425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 26289425SEric.Schrock@Sun.COM } 26299425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26309425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 26319425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 26329425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26339425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26349425SEric.Schrock@Sun.COM else 26359425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26369425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26379425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26389425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26399425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26409425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 26419425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26429425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 26439425SEric.Schrock@Sun.COM } 26449425SEric.Schrock@Sun.COM 26459425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 26469425SEric.Schrock@Sun.COM /* 26479425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 26489425SEric.Schrock@Sun.COM */ 264910100SLin.Ling@Sun.COM spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 26509425SEric.Schrock@Sun.COM } 26519425SEric.Schrock@Sun.COM 26529816SGeorge.Wilson@Sun.COM /* 26539816SGeorge.Wilson@Sun.COM * It's possible that the pool was expanded while it was exported. 26549816SGeorge.Wilson@Sun.COM * We kick off an async task to handle this for us. 26559816SGeorge.Wilson@Sun.COM */ 26569816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 26579816SGeorge.Wilson@Sun.COM 26589425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26599946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 26609425SEric.Schrock@Sun.COM 26619425SEric.Schrock@Sun.COM return (0); 26626643Seschrock } 26636643Seschrock 26646643Seschrock 2665789Sahrens /* 2666789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2667789Sahrens * to get the vdev stats associated with the imported devices. 2668789Sahrens */ 2669789Sahrens #define TRYIMPORT_NAME "$import" 2670789Sahrens 2671789Sahrens nvlist_t * 2672789Sahrens spa_tryimport(nvlist_t *tryconfig) 2673789Sahrens { 2674789Sahrens nvlist_t *config = NULL; 2675789Sahrens char *poolname; 2676789Sahrens spa_t *spa; 2677789Sahrens uint64_t state; 26788680SLin.Ling@Sun.COM int error; 2679789Sahrens 2680789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2681789Sahrens return (NULL); 2682789Sahrens 2683789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2684789Sahrens return (NULL); 2685789Sahrens 26861635Sbonwick /* 26871635Sbonwick * Create and initialize the spa structure. 26881635Sbonwick */ 2689789Sahrens mutex_enter(&spa_namespace_lock); 26901635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 26918241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 2692789Sahrens 2693789Sahrens /* 26941635Sbonwick * Pass off the heavy lifting to spa_load(). 26951732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 26961732Sbonwick * is actually the one to trust when doing an import. 2697789Sahrens */ 26988680SLin.Ling@Sun.COM error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2699789Sahrens 2700789Sahrens /* 2701789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2702789Sahrens */ 2703789Sahrens if (spa->spa_root_vdev != NULL) { 2704789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2705789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2706789Sahrens poolname) == 0); 2707789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2708789Sahrens state) == 0); 27093975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 27103975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 27112082Seschrock 27122082Seschrock /* 27136423Sgw25295 * If the bootfs property exists on this pool then we 27146423Sgw25295 * copy it out so that external consumers can tell which 27156423Sgw25295 * pools are bootable. 27166423Sgw25295 */ 27178680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 27186423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27196423Sgw25295 27206423Sgw25295 /* 27216423Sgw25295 * We have to play games with the name since the 27226423Sgw25295 * pool was opened as TRYIMPORT_NAME. 27236423Sgw25295 */ 27247754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 27256423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 27266423Sgw25295 char *cp; 27276423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27286423Sgw25295 27296423Sgw25295 cp = strchr(tmpname, '/'); 27306423Sgw25295 if (cp == NULL) { 27316423Sgw25295 (void) strlcpy(dsname, tmpname, 27326423Sgw25295 MAXPATHLEN); 27336423Sgw25295 } else { 27346423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 27356423Sgw25295 "%s/%s", poolname, ++cp); 27366423Sgw25295 } 27376423Sgw25295 VERIFY(nvlist_add_string(config, 27386423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 27396423Sgw25295 kmem_free(dsname, MAXPATHLEN); 27406423Sgw25295 } 27416423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 27426423Sgw25295 } 27436423Sgw25295 27446423Sgw25295 /* 27455450Sbrendan * Add the list of hot spares and level 2 cache devices. 27462082Seschrock */ 27479425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 27482082Seschrock spa_add_spares(spa, config); 27495450Sbrendan spa_add_l2cache(spa, config); 27509425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2751789Sahrens } 2752789Sahrens 2753789Sahrens spa_unload(spa); 2754789Sahrens spa_deactivate(spa); 2755789Sahrens spa_remove(spa); 2756789Sahrens mutex_exit(&spa_namespace_lock); 2757789Sahrens 2758789Sahrens return (config); 2759789Sahrens } 2760789Sahrens 2761789Sahrens /* 2762789Sahrens * Pool export/destroy 2763789Sahrens * 2764789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2765789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2766789Sahrens * update the pool state and sync all the labels to disk, removing the 27678211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 27688211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 2769789Sahrens */ 2770789Sahrens static int 27717214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 27728211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 2773789Sahrens { 2774789Sahrens spa_t *spa; 2775789Sahrens 27761775Sbillm if (oldconfig) 27771775Sbillm *oldconfig = NULL; 27781775Sbillm 27798241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 2780789Sahrens return (EROFS); 2781789Sahrens 2782789Sahrens mutex_enter(&spa_namespace_lock); 2783789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2784789Sahrens mutex_exit(&spa_namespace_lock); 2785789Sahrens return (ENOENT); 2786789Sahrens } 2787789Sahrens 2788789Sahrens /* 27891544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 27901544Seschrock * reacquire the namespace lock, and see if we can export. 27911544Seschrock */ 27921544Seschrock spa_open_ref(spa, FTAG); 27931544Seschrock mutex_exit(&spa_namespace_lock); 27941544Seschrock spa_async_suspend(spa); 27951544Seschrock mutex_enter(&spa_namespace_lock); 27961544Seschrock spa_close(spa, FTAG); 27971544Seschrock 27981544Seschrock /* 2799789Sahrens * The pool will be in core if it's openable, 2800789Sahrens * in which case we can modify its state. 2801789Sahrens */ 2802789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2803789Sahrens /* 2804789Sahrens * Objsets may be open only because they're dirty, so we 2805789Sahrens * have to force it to sync before checking spa_refcnt. 2806789Sahrens */ 2807789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2808789Sahrens 28091544Seschrock /* 28101544Seschrock * A pool cannot be exported or destroyed if there are active 28111544Seschrock * references. If we are resetting a pool, allow references by 28121544Seschrock * fault injection handlers. 28131544Seschrock */ 28141544Seschrock if (!spa_refcount_zero(spa) || 28151544Seschrock (spa->spa_inject_ref != 0 && 28161544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 28171544Seschrock spa_async_resume(spa); 2818789Sahrens mutex_exit(&spa_namespace_lock); 2819789Sahrens return (EBUSY); 2820789Sahrens } 2821789Sahrens 2822789Sahrens /* 28237214Slling * A pool cannot be exported if it has an active shared spare. 28247214Slling * This is to prevent other pools stealing the active spare 28257214Slling * from an exported pool. At user's own will, such pool can 28267214Slling * be forcedly exported. 28277214Slling */ 28287214Slling if (!force && new_state == POOL_STATE_EXPORTED && 28297214Slling spa_has_active_shared_spare(spa)) { 28307214Slling spa_async_resume(spa); 28317214Slling mutex_exit(&spa_namespace_lock); 28327214Slling return (EXDEV); 28337214Slling } 28347214Slling 28357214Slling /* 2836789Sahrens * We want this to be reflected on every label, 2837789Sahrens * so mark them all dirty. spa_unload() will do the 2838789Sahrens * final sync that pushes these changes out. 2839789Sahrens */ 28408211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 28417754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28421544Seschrock spa->spa_state = new_state; 28431635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 28441544Seschrock vdev_config_dirty(spa->spa_root_vdev); 28457754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 28461544Seschrock } 2847789Sahrens } 2848789Sahrens 28494451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28504451Seschrock 2851789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2852789Sahrens spa_unload(spa); 2853789Sahrens spa_deactivate(spa); 2854789Sahrens } 2855789Sahrens 28561775Sbillm if (oldconfig && spa->spa_config) 28571775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 28581775Sbillm 28591544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 28608211SGeorge.Wilson@Sun.COM if (!hardforce) 28618211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 28621544Seschrock spa_remove(spa); 28631544Seschrock } 2864789Sahrens mutex_exit(&spa_namespace_lock); 2865789Sahrens 2866789Sahrens return (0); 2867789Sahrens } 2868789Sahrens 2869789Sahrens /* 2870789Sahrens * Destroy a storage pool. 2871789Sahrens */ 2872789Sahrens int 2873789Sahrens spa_destroy(char *pool) 2874789Sahrens { 28758211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 28768211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 2877789Sahrens } 2878789Sahrens 2879789Sahrens /* 2880789Sahrens * Export a storage pool. 2881789Sahrens */ 2882789Sahrens int 28838211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 28848211SGeorge.Wilson@Sun.COM boolean_t hardforce) 2885789Sahrens { 28868211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 28878211SGeorge.Wilson@Sun.COM force, hardforce)); 2888789Sahrens } 2889789Sahrens 2890789Sahrens /* 28911544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 28921544Seschrock * from the namespace in any way. 28931544Seschrock */ 28941544Seschrock int 28951544Seschrock spa_reset(char *pool) 28961544Seschrock { 28977214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 28988211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 28991544Seschrock } 29001544Seschrock 29011544Seschrock /* 2902789Sahrens * ========================================================================== 2903789Sahrens * Device manipulation 2904789Sahrens * ========================================================================== 2905789Sahrens */ 2906789Sahrens 2907789Sahrens /* 29084527Sperrin * Add a device to a storage pool. 2909789Sahrens */ 2910789Sahrens int 2911789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2912789Sahrens { 2913*10594SGeorge.Wilson@Sun.COM uint64_t txg, id; 29148241SJeff.Bonwick@Sun.COM int error; 2915789Sahrens vdev_t *rvd = spa->spa_root_vdev; 29161585Sbonwick vdev_t *vd, *tvd; 29175450Sbrendan nvlist_t **spares, **l2cache; 29185450Sbrendan uint_t nspares, nl2cache; 2919789Sahrens 2920789Sahrens txg = spa_vdev_enter(spa); 2921789Sahrens 29222082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 29232082Seschrock VDEV_ALLOC_ADD)) != 0) 29242082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 29252082Seschrock 29267754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 2927789Sahrens 29285450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 29295450Sbrendan &nspares) != 0) 29302082Seschrock nspares = 0; 29312082Seschrock 29325450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 29335450Sbrendan &nl2cache) != 0) 29345450Sbrendan nl2cache = 0; 29355450Sbrendan 29367754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 29372082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 29387754SJeff.Bonwick@Sun.COM 29397754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 29407754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 29417754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 29422082Seschrock 29433377Seschrock /* 29445450Sbrendan * We must validate the spares and l2cache devices after checking the 29455450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 29463377Seschrock */ 29477754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 29483377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 29493377Seschrock 29503377Seschrock /* 29513377Seschrock * Transfer each new top-level vdev from vd to rvd. 29523377Seschrock */ 29538241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 2954*10594SGeorge.Wilson@Sun.COM 2955*10594SGeorge.Wilson@Sun.COM /* 2956*10594SGeorge.Wilson@Sun.COM * Set the vdev id to the first hole, if one exists. 2957*10594SGeorge.Wilson@Sun.COM */ 2958*10594SGeorge.Wilson@Sun.COM for (id = 0; id < rvd->vdev_children; id++) { 2959*10594SGeorge.Wilson@Sun.COM if (rvd->vdev_child[id]->vdev_ishole) { 2960*10594SGeorge.Wilson@Sun.COM vdev_free(rvd->vdev_child[id]); 2961*10594SGeorge.Wilson@Sun.COM break; 2962*10594SGeorge.Wilson@Sun.COM } 2963*10594SGeorge.Wilson@Sun.COM } 29643377Seschrock tvd = vd->vdev_child[c]; 29653377Seschrock vdev_remove_child(vd, tvd); 2966*10594SGeorge.Wilson@Sun.COM tvd->vdev_id = id; 29673377Seschrock vdev_add_child(rvd, tvd); 29683377Seschrock vdev_config_dirty(tvd); 29693377Seschrock } 29703377Seschrock 29712082Seschrock if (nspares != 0) { 29725450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 29735450Sbrendan ZPOOL_CONFIG_SPARES); 29742082Seschrock spa_load_spares(spa); 29755450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 29765450Sbrendan } 29775450Sbrendan 29785450Sbrendan if (nl2cache != 0) { 29795450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 29805450Sbrendan ZPOOL_CONFIG_L2CACHE); 29815450Sbrendan spa_load_l2cache(spa); 29825450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2983789Sahrens } 2984789Sahrens 2985789Sahrens /* 29861585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 29871585Sbonwick * If other threads start allocating from these vdevs before we 29881585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 29891585Sbonwick * fail to open the pool because there are DVAs that the config cache 29901585Sbonwick * can't translate. Therefore, we first add the vdevs without 29911585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 29921635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 29931585Sbonwick * 29941585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 29951585Sbonwick * if we lose power at any point in this sequence, the remaining 29961585Sbonwick * steps will be completed the next time we load the pool. 2997789Sahrens */ 29981635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 29991585Sbonwick 30001635Sbonwick mutex_enter(&spa_namespace_lock); 30011635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 30021635Sbonwick mutex_exit(&spa_namespace_lock); 3003789Sahrens 30041635Sbonwick return (0); 3005789Sahrens } 3006789Sahrens 3007789Sahrens /* 3008789Sahrens * Attach a device to a mirror. The arguments are the path to any device 3009789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3010789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3011789Sahrens * 3012789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3013789Sahrens * existing device; in this case the two devices are made into their own 30144451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3015789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3016789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3017789Sahrens * completion of resilvering, the first disk (the one being replaced) 3018789Sahrens * is automatically detached. 3019789Sahrens */ 3020789Sahrens int 30211544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3022789Sahrens { 3023789Sahrens uint64_t txg, open_txg; 3024789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3025789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 30262082Seschrock vdev_ops_t *pvops; 30277313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 30287313SEric.Kustarz@Sun.COM int newvd_isspare; 30297313SEric.Kustarz@Sun.COM int error; 3030789Sahrens 3031789Sahrens txg = spa_vdev_enter(spa); 3032789Sahrens 30336643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3034789Sahrens 3035789Sahrens if (oldvd == NULL) 3036789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3037789Sahrens 30381585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 30391585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30401585Sbonwick 3041789Sahrens pvd = oldvd->vdev_parent; 3042789Sahrens 30432082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30444451Seschrock VDEV_ALLOC_ADD)) != 0) 30454451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30464451Seschrock 30474451Seschrock if (newrootvd->vdev_children != 1) 3048789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3049789Sahrens 3050789Sahrens newvd = newrootvd->vdev_child[0]; 3051789Sahrens 3052789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3053789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3054789Sahrens 30552082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3056789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3057789Sahrens 30584527Sperrin /* 30594527Sperrin * Spares can't replace logs 30604527Sperrin */ 30617326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 30624527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30634527Sperrin 30642082Seschrock if (!replacing) { 30652082Seschrock /* 30662082Seschrock * For attach, the only allowable parent is a mirror or the root 30672082Seschrock * vdev. 30682082Seschrock */ 30692082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 30702082Seschrock pvd->vdev_ops != &vdev_root_ops) 30712082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30722082Seschrock 30732082Seschrock pvops = &vdev_mirror_ops; 30742082Seschrock } else { 30752082Seschrock /* 30762082Seschrock * Active hot spares can only be replaced by inactive hot 30772082Seschrock * spares. 30782082Seschrock */ 30792082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 30802082Seschrock pvd->vdev_child[1] == oldvd && 30812082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 30822082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30832082Seschrock 30842082Seschrock /* 30852082Seschrock * If the source is a hot spare, and the parent isn't already a 30862082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 30873377Seschrock * want to create a replacing vdev. The user is not allowed to 30883377Seschrock * attach to a spared vdev child unless the 'isspare' state is 30893377Seschrock * the same (spare replaces spare, non-spare replaces 30903377Seschrock * non-spare). 30912082Seschrock */ 30922082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 30932082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30943377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 30953377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 30963377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30972082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 30982082Seschrock newvd->vdev_isspare) 30992082Seschrock pvops = &vdev_spare_ops; 31002082Seschrock else 31012082Seschrock pvops = &vdev_replacing_ops; 31022082Seschrock } 31032082Seschrock 31041175Slling /* 31059816SGeorge.Wilson@Sun.COM * Make sure the new device is big enough. 31061175Slling */ 31079816SGeorge.Wilson@Sun.COM if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3108789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3109789Sahrens 31101732Sbonwick /* 31111732Sbonwick * The new device cannot have a higher alignment requirement 31121732Sbonwick * than the top-level vdev. 31131732Sbonwick */ 31141732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3115789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3116789Sahrens 3117789Sahrens /* 3118789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3119789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3120789Sahrens */ 3121789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3122789Sahrens spa_strfree(oldvd->vdev_path); 3123789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3124789Sahrens KM_SLEEP); 3125789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3126789Sahrens newvd->vdev_path, "old"); 3127789Sahrens if (oldvd->vdev_devid != NULL) { 3128789Sahrens spa_strfree(oldvd->vdev_devid); 3129789Sahrens oldvd->vdev_devid = NULL; 3130789Sahrens } 3131789Sahrens } 3132789Sahrens 3133789Sahrens /* 31342082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 31352082Seschrock * mirror/replacing/spare vdev above oldvd. 3136789Sahrens */ 3137789Sahrens if (pvd->vdev_ops != pvops) 3138789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3139789Sahrens 3140789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3141789Sahrens ASSERT(pvd->vdev_ops == pvops); 3142789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3143789Sahrens 3144789Sahrens /* 3145789Sahrens * Extract the new device from its root and add it to pvd. 3146789Sahrens */ 3147789Sahrens vdev_remove_child(newrootvd, newvd); 3148789Sahrens newvd->vdev_id = pvd->vdev_children; 3149*10594SGeorge.Wilson@Sun.COM newvd->vdev_crtxg = oldvd->vdev_crtxg; 3150789Sahrens vdev_add_child(pvd, newvd); 3151789Sahrens 3152789Sahrens tvd = newvd->vdev_top; 3153789Sahrens ASSERT(pvd->vdev_top == tvd); 3154789Sahrens ASSERT(tvd->vdev_parent == rvd); 3155789Sahrens 3156789Sahrens vdev_config_dirty(tvd); 3157789Sahrens 3158789Sahrens /* 3159789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3160789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3161789Sahrens */ 3162789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3163789Sahrens 31648241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 31658241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3166789Sahrens 31679425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 31683377Seschrock spa_spare_activate(newvd); 31699425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 31709425SEric.Schrock@Sun.COM } 31719425SEric.Schrock@Sun.COM 31727754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 31737754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 31747313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 31751544Seschrock 3176789Sahrens /* 3177789Sahrens * Mark newvd's DTL dirty in this txg. 3178789Sahrens */ 31791732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3180789Sahrens 3181789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3182789Sahrens 31839946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 31849946SMark.Musante@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 31859946SMark.Musante@Sun.COM replacing && newvd_isspare ? "spare in" : 31869946SMark.Musante@Sun.COM replacing ? "replace" : "attach", newvdpath, 31879946SMark.Musante@Sun.COM replacing ? "for" : "to", oldvdpath); 31887313SEric.Kustarz@Sun.COM 31897313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 31907313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 31917313SEric.Kustarz@Sun.COM 3192789Sahrens /* 31937046Sahrens * Kick off a resilver to update newvd. 3194789Sahrens */ 31957046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3196789Sahrens 3197789Sahrens return (0); 3198789Sahrens } 3199789Sahrens 3200789Sahrens /* 3201789Sahrens * Detach a device from a mirror or replacing vdev. 3202789Sahrens * If 'replace_done' is specified, only detach if the parent 3203789Sahrens * is a replacing vdev. 3204789Sahrens */ 3205789Sahrens int 32068241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3207789Sahrens { 3208789Sahrens uint64_t txg; 32098241SJeff.Bonwick@Sun.COM int error; 3210789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3211789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 32122082Seschrock boolean_t unspare = B_FALSE; 32132082Seschrock uint64_t unspare_guid; 32146673Seschrock size_t len; 3215789Sahrens 3216789Sahrens txg = spa_vdev_enter(spa); 3217789Sahrens 32186643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3219789Sahrens 3220789Sahrens if (vd == NULL) 3221789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3222789Sahrens 32231585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 32241585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32251585Sbonwick 3226789Sahrens pvd = vd->vdev_parent; 3227789Sahrens 3228789Sahrens /* 32298241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 32308241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32318241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 32328241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 32338241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 32348241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 32358241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 32368241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 32378241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 32388241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 32398241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 32408241SJeff.Bonwick@Sun.COM */ 32418241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 32428241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32438241SJeff.Bonwick@Sun.COM 32448241SJeff.Bonwick@Sun.COM /* 3245789Sahrens * If replace_done is specified, only remove this device if it's 32462082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 32472082Seschrock * disk can be removed. 3248789Sahrens */ 32492082Seschrock if (replace_done) { 32502082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 32512082Seschrock if (vd->vdev_id != 0) 32522082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32532082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 32542082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32552082Seschrock } 32562082Seschrock } 32572082Seschrock 32582082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 32594577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3260789Sahrens 3261789Sahrens /* 32622082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3263789Sahrens */ 3264789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 32652082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 32662082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3267789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3268789Sahrens 3269789Sahrens /* 32708241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 32718241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3272789Sahrens */ 32738241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3274789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3275789Sahrens 32768241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 32778241SJeff.Bonwick@Sun.COM 3278789Sahrens /* 32796673Seschrock * If we are detaching the second disk from a replacing vdev, then 32806673Seschrock * check to see if we changed the original vdev's path to have "/old" 32816673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 32826673Seschrock */ 32836673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 32846673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 32856673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 32866673Seschrock ASSERT(pvd->vdev_child[1] == vd); 32876673Seschrock cvd = pvd->vdev_child[0]; 32886673Seschrock len = strlen(vd->vdev_path); 32896673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 32906673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 32916673Seschrock spa_strfree(cvd->vdev_path); 32926673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 32936673Seschrock } 32946673Seschrock } 32956673Seschrock 32966673Seschrock /* 32972082Seschrock * If we are detaching the original disk from a spare, then it implies 32982082Seschrock * that the spare should become a real disk, and be removed from the 32992082Seschrock * active spare list for the pool. 33002082Seschrock */ 33012082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 33028241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 33032082Seschrock unspare = B_TRUE; 33042082Seschrock 33052082Seschrock /* 3306789Sahrens * Erase the disk labels so the disk can be used for other things. 3307789Sahrens * This must be done after all other error cases are handled, 3308789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3309789Sahrens * But if we can't do it, don't treat the error as fatal -- 3310789Sahrens * it may be that the unwritability of the disk is the reason 3311789Sahrens * it's being detached! 3312789Sahrens */ 33133377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3314789Sahrens 3315789Sahrens /* 3316789Sahrens * Remove vd from its parent and compact the parent's children. 3317789Sahrens */ 3318789Sahrens vdev_remove_child(pvd, vd); 3319789Sahrens vdev_compact_children(pvd); 3320789Sahrens 3321789Sahrens /* 3322789Sahrens * Remember one of the remaining children so we can get tvd below. 3323789Sahrens */ 3324789Sahrens cvd = pvd->vdev_child[0]; 3325789Sahrens 3326789Sahrens /* 33272082Seschrock * If we need to remove the remaining child from the list of hot spares, 33288241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 33298241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 33308241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 33318241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 33328241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 33338241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 33342082Seschrock */ 33352082Seschrock if (unspare) { 33362082Seschrock ASSERT(cvd->vdev_isspare); 33373377Seschrock spa_spare_remove(cvd); 33382082Seschrock unspare_guid = cvd->vdev_guid; 33398241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33402082Seschrock } 33412082Seschrock 33422082Seschrock /* 3343789Sahrens * If the parent mirror/replacing vdev only has one child, 3344789Sahrens * the parent is no longer needed. Remove it from the tree. 3345789Sahrens */ 3346789Sahrens if (pvd->vdev_children == 1) 3347789Sahrens vdev_remove_parent(cvd); 3348789Sahrens 3349789Sahrens /* 3350789Sahrens * We don't set tvd until now because the parent we just removed 3351789Sahrens * may have been the previous top-level vdev. 3352789Sahrens */ 3353789Sahrens tvd = cvd->vdev_top; 3354789Sahrens ASSERT(tvd->vdev_parent == rvd); 3355789Sahrens 3356789Sahrens /* 33573377Seschrock * Reevaluate the parent vdev state. 3358789Sahrens */ 33594451Seschrock vdev_propagate_state(cvd); 3360789Sahrens 3361789Sahrens /* 33629816SGeorge.Wilson@Sun.COM * If the 'autoexpand' property is set on the pool then automatically 33639816SGeorge.Wilson@Sun.COM * try to expand the size of the pool. For example if the device we 33649816SGeorge.Wilson@Sun.COM * just detached was smaller than the others, it may be possible to 33659816SGeorge.Wilson@Sun.COM * add metaslabs (i.e. grow the pool). We need to reopen the vdev 33669816SGeorge.Wilson@Sun.COM * first so that we can obtain the updated sizes of the leaf vdevs. 3367789Sahrens */ 33689816SGeorge.Wilson@Sun.COM if (spa->spa_autoexpand) { 33699816SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 33709816SGeorge.Wilson@Sun.COM vdev_expand(tvd, txg); 33719816SGeorge.Wilson@Sun.COM } 3372789Sahrens 3373789Sahrens vdev_config_dirty(tvd); 3374789Sahrens 3375789Sahrens /* 33763377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 33773377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 33783377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 33793377Seschrock * prevent vd from being accessed after it's freed. 3380789Sahrens */ 33818241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3382789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 33831732Sbonwick vd->vdev_detached = B_TRUE; 33841732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3385789Sahrens 33864451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 33874451Seschrock 33882082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 33892082Seschrock 33902082Seschrock /* 33913377Seschrock * If this was the removal of the original device in a hot spare vdev, 33923377Seschrock * then we want to go through and remove the device from the hot spare 33933377Seschrock * list of every other pool. 33942082Seschrock */ 33952082Seschrock if (unspare) { 33968241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 33972082Seschrock spa = NULL; 33982082Seschrock mutex_enter(&spa_namespace_lock); 33992082Seschrock while ((spa = spa_next(spa)) != NULL) { 34002082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 34012082Seschrock continue; 34028241SJeff.Bonwick@Sun.COM if (spa == myspa) 34038241SJeff.Bonwick@Sun.COM continue; 34047793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 34057793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 34062082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 34077793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 34087793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 34092082Seschrock } 34102082Seschrock mutex_exit(&spa_namespace_lock); 34112082Seschrock } 34122082Seschrock 34132082Seschrock return (error); 34142082Seschrock } 34152082Seschrock 34167754SJeff.Bonwick@Sun.COM static nvlist_t * 34177754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 34182082Seschrock { 34197754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 34207754SJeff.Bonwick@Sun.COM uint64_t guid; 34217754SJeff.Bonwick@Sun.COM 34227754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 34237754SJeff.Bonwick@Sun.COM &guid) == 0); 34247754SJeff.Bonwick@Sun.COM 34257754SJeff.Bonwick@Sun.COM if (guid == target_guid) 34267754SJeff.Bonwick@Sun.COM return (nvpp[i]); 34272082Seschrock } 34282082Seschrock 34297754SJeff.Bonwick@Sun.COM return (NULL); 34305450Sbrendan } 34315450Sbrendan 34327754SJeff.Bonwick@Sun.COM static void 34337754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 34347754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 34355450Sbrendan { 34367754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 34377754SJeff.Bonwick@Sun.COM 34387754SJeff.Bonwick@Sun.COM if (count > 1) 34397754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 34407754SJeff.Bonwick@Sun.COM 34417754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 34427754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 34437754SJeff.Bonwick@Sun.COM continue; 34447754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 34455450Sbrendan } 34465450Sbrendan 34477754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 34487754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 34497754SJeff.Bonwick@Sun.COM 34507754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 34517754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 34527754SJeff.Bonwick@Sun.COM 34537754SJeff.Bonwick@Sun.COM if (count > 1) 34547754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 34555450Sbrendan } 34565450Sbrendan 34575450Sbrendan /* 3458*10594SGeorge.Wilson@Sun.COM * Removing a device from the vdev namespace requires several steps 3459*10594SGeorge.Wilson@Sun.COM * and can take a significant amount of time. As a result we use 3460*10594SGeorge.Wilson@Sun.COM * the spa_vdev_config_[enter/exit] functions which allow us to 3461*10594SGeorge.Wilson@Sun.COM * grab and release the spa_config_lock while still holding the namespace 3462*10594SGeorge.Wilson@Sun.COM * lock. During each step the configuration is synced out. 3463*10594SGeorge.Wilson@Sun.COM */ 3464*10594SGeorge.Wilson@Sun.COM 3465*10594SGeorge.Wilson@Sun.COM /* 3466*10594SGeorge.Wilson@Sun.COM * Initial phase of device removal - stop future allocations from this device. 3467*10594SGeorge.Wilson@Sun.COM */ 3468*10594SGeorge.Wilson@Sun.COM void 3469*10594SGeorge.Wilson@Sun.COM spa_vdev_remove_start(spa_t *spa, vdev_t *vd) 3470*10594SGeorge.Wilson@Sun.COM { 3471*10594SGeorge.Wilson@Sun.COM metaslab_group_t *mg = vd->vdev_mg; 3472*10594SGeorge.Wilson@Sun.COM 3473*10594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3474*10594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3475*10594SGeorge.Wilson@Sun.COM 3476*10594SGeorge.Wilson@Sun.COM /* 3477*10594SGeorge.Wilson@Sun.COM * Remove our vdev from the allocatable vdevs 3478*10594SGeorge.Wilson@Sun.COM */ 3479*10594SGeorge.Wilson@Sun.COM if (mg) 3480*10594SGeorge.Wilson@Sun.COM metaslab_class_remove(mg->mg_class, mg); 3481*10594SGeorge.Wilson@Sun.COM } 3482*10594SGeorge.Wilson@Sun.COM 3483*10594SGeorge.Wilson@Sun.COM /* 3484*10594SGeorge.Wilson@Sun.COM * Evacuate the device. 3485*10594SGeorge.Wilson@Sun.COM */ 3486*10594SGeorge.Wilson@Sun.COM int 3487*10594SGeorge.Wilson@Sun.COM spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 3488*10594SGeorge.Wilson@Sun.COM { 3489*10594SGeorge.Wilson@Sun.COM uint64_t txg; 3490*10594SGeorge.Wilson@Sun.COM int error; 3491*10594SGeorge.Wilson@Sun.COM 3492*10594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3493*10594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3494*10594SGeorge.Wilson@Sun.COM 3495*10594SGeorge.Wilson@Sun.COM /* 3496*10594SGeorge.Wilson@Sun.COM * Evacuate the device. We don't hold the config lock as writer 3497*10594SGeorge.Wilson@Sun.COM * since we need to do I/O but we do keep the 3498*10594SGeorge.Wilson@Sun.COM * spa_namespace_lock held. Once this completes the device 3499*10594SGeorge.Wilson@Sun.COM * should no longer have any blocks allocated on it. 3500*10594SGeorge.Wilson@Sun.COM */ 3501*10594SGeorge.Wilson@Sun.COM if (vd->vdev_islog) { 3502*10594SGeorge.Wilson@Sun.COM /* 3503*10594SGeorge.Wilson@Sun.COM * Evacuate the device. 3504*10594SGeorge.Wilson@Sun.COM */ 3505*10594SGeorge.Wilson@Sun.COM if (error = dmu_objset_find(spa_name(spa), 3506*10594SGeorge.Wilson@Sun.COM zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { 3507*10594SGeorge.Wilson@Sun.COM uint64_t txg; 3508*10594SGeorge.Wilson@Sun.COM 3509*10594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 3510*10594SGeorge.Wilson@Sun.COM metaslab_class_add(spa->spa_log_class, 3511*10594SGeorge.Wilson@Sun.COM vd->vdev_mg); 3512*10594SGeorge.Wilson@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 3513*10594SGeorge.Wilson@Sun.COM } 3514*10594SGeorge.Wilson@Sun.COM txg_wait_synced(spa_get_dsl(spa), 0); 3515*10594SGeorge.Wilson@Sun.COM } 3516*10594SGeorge.Wilson@Sun.COM 3517*10594SGeorge.Wilson@Sun.COM /* 3518*10594SGeorge.Wilson@Sun.COM * Remove any remaining MOS metadata associated with the device. 3519*10594SGeorge.Wilson@Sun.COM */ 3520*10594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 3521*10594SGeorge.Wilson@Sun.COM vd->vdev_removing = B_TRUE; 3522*10594SGeorge.Wilson@Sun.COM vdev_dirty(vd, 0, NULL, txg); 3523*10594SGeorge.Wilson@Sun.COM vdev_config_dirty(vd); 3524*10594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 3525*10594SGeorge.Wilson@Sun.COM 3526*10594SGeorge.Wilson@Sun.COM return (0); 3527*10594SGeorge.Wilson@Sun.COM } 3528*10594SGeorge.Wilson@Sun.COM 3529*10594SGeorge.Wilson@Sun.COM /* 3530*10594SGeorge.Wilson@Sun.COM * Complete the removal by cleaning up the namespace. 3531*10594SGeorge.Wilson@Sun.COM */ 3532*10594SGeorge.Wilson@Sun.COM void 3533*10594SGeorge.Wilson@Sun.COM spa_vdev_remove_done(spa_t *spa, vdev_t *vd) 3534*10594SGeorge.Wilson@Sun.COM { 3535*10594SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 3536*10594SGeorge.Wilson@Sun.COM metaslab_group_t *mg = vd->vdev_mg; 3537*10594SGeorge.Wilson@Sun.COM uint64_t id = vd->vdev_id; 3538*10594SGeorge.Wilson@Sun.COM boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 3539*10594SGeorge.Wilson@Sun.COM 3540*10594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3541*10594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3542*10594SGeorge.Wilson@Sun.COM 3543*10594SGeorge.Wilson@Sun.COM (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3544*10594SGeorge.Wilson@Sun.COM vdev_free(vd); 3545*10594SGeorge.Wilson@Sun.COM 3546*10594SGeorge.Wilson@Sun.COM /* 3547*10594SGeorge.Wilson@Sun.COM * It's possible that another thread is trying todo a spa_vdev_add() 3548*10594SGeorge.Wilson@Sun.COM * at the same time we're trying remove it. As a result the 3549*10594SGeorge.Wilson@Sun.COM * added vdev may not have initialized its metaslabs yet. 3550*10594SGeorge.Wilson@Sun.COM */ 3551*10594SGeorge.Wilson@Sun.COM if (mg != NULL) 3552*10594SGeorge.Wilson@Sun.COM metaslab_group_destroy(mg); 3553*10594SGeorge.Wilson@Sun.COM 3554*10594SGeorge.Wilson@Sun.COM if (last_vdev) { 3555*10594SGeorge.Wilson@Sun.COM vdev_compact_children(rvd); 3556*10594SGeorge.Wilson@Sun.COM } else { 3557*10594SGeorge.Wilson@Sun.COM vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 3558*10594SGeorge.Wilson@Sun.COM vdev_add_child(rvd, vd); 3559*10594SGeorge.Wilson@Sun.COM } 3560*10594SGeorge.Wilson@Sun.COM vdev_config_dirty(rvd); 3561*10594SGeorge.Wilson@Sun.COM 3562*10594SGeorge.Wilson@Sun.COM /* 3563*10594SGeorge.Wilson@Sun.COM * Reassess the health of our root vdev. 3564*10594SGeorge.Wilson@Sun.COM */ 3565*10594SGeorge.Wilson@Sun.COM vdev_reopen(rvd); 3566*10594SGeorge.Wilson@Sun.COM } 3567*10594SGeorge.Wilson@Sun.COM 3568*10594SGeorge.Wilson@Sun.COM /* 35695450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 3570*10594SGeorge.Wilson@Sun.COM * spares, slogs, and level 2 ARC devices. 35715450Sbrendan */ 35725450Sbrendan int 35735450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 35745450Sbrendan { 35755450Sbrendan vdev_t *vd; 35767754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 3577*10594SGeorge.Wilson@Sun.COM uint64_t txg = 0; 35785450Sbrendan uint_t nspares, nl2cache; 35795450Sbrendan int error = 0; 35808241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 35818241SJeff.Bonwick@Sun.COM 35828241SJeff.Bonwick@Sun.COM if (!locked) 35838241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 35845450Sbrendan 35856643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 35865450Sbrendan 35875450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 35885450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 35897754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 35907754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 35917754SJeff.Bonwick@Sun.COM /* 35927754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 35937754SJeff.Bonwick@Sun.COM * in this pool. 35947754SJeff.Bonwick@Sun.COM */ 35957754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 35967754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 35977754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 35987754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 35997754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 36007754SJeff.Bonwick@Sun.COM } else { 36017754SJeff.Bonwick@Sun.COM error = EBUSY; 36027754SJeff.Bonwick@Sun.COM } 36037754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 36045450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 36057754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 36067754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 36077754SJeff.Bonwick@Sun.COM /* 36087754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 36097754SJeff.Bonwick@Sun.COM */ 36107754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 36117754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 36125450Sbrendan spa_load_l2cache(spa); 36135450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3614*10594SGeorge.Wilson@Sun.COM } else if (vd != NULL && vd->vdev_islog) { 3615*10594SGeorge.Wilson@Sun.COM ASSERT(!locked); 3616*10594SGeorge.Wilson@Sun.COM 3617*10594SGeorge.Wilson@Sun.COM /* 3618*10594SGeorge.Wilson@Sun.COM * XXX - Once we have bp-rewrite this should 3619*10594SGeorge.Wilson@Sun.COM * become the common case. 3620*10594SGeorge.Wilson@Sun.COM */ 3621*10594SGeorge.Wilson@Sun.COM 3622*10594SGeorge.Wilson@Sun.COM /* 3623*10594SGeorge.Wilson@Sun.COM * 1. Stop allocations 3624*10594SGeorge.Wilson@Sun.COM * 2. Evacuate the device (i.e. kill off stubby and 3625*10594SGeorge.Wilson@Sun.COM * metadata) and wait for it to complete (i.e. sync). 3626*10594SGeorge.Wilson@Sun.COM * 3. Cleanup the vdev namespace. 3627*10594SGeorge.Wilson@Sun.COM */ 3628*10594SGeorge.Wilson@Sun.COM spa_vdev_remove_start(spa, vd); 3629*10594SGeorge.Wilson@Sun.COM 3630*10594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 3631*10594SGeorge.Wilson@Sun.COM if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) 3632*10594SGeorge.Wilson@Sun.COM return (error); 3633*10594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 3634*10594SGeorge.Wilson@Sun.COM 3635*10594SGeorge.Wilson@Sun.COM spa_vdev_remove_done(spa, vd); 3636*10594SGeorge.Wilson@Sun.COM 36377754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 36387754SJeff.Bonwick@Sun.COM /* 36397754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 36407754SJeff.Bonwick@Sun.COM */ 36417754SJeff.Bonwick@Sun.COM error = ENOTSUP; 36427754SJeff.Bonwick@Sun.COM } else { 36437754SJeff.Bonwick@Sun.COM /* 36447754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 36457754SJeff.Bonwick@Sun.COM */ 36467754SJeff.Bonwick@Sun.COM error = ENOENT; 36475450Sbrendan } 36482082Seschrock 36498241SJeff.Bonwick@Sun.COM if (!locked) 36508241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 36518241SJeff.Bonwick@Sun.COM 36528241SJeff.Bonwick@Sun.COM return (error); 3653789Sahrens } 3654789Sahrens 3655789Sahrens /* 36564451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 36574451Seschrock * current spared, so we can detach it. 3658789Sahrens */ 36591544Seschrock static vdev_t * 36604451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3661789Sahrens { 36621544Seschrock vdev_t *newvd, *oldvd; 36639816SGeorge.Wilson@Sun.COM 36649816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 36654451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 36661544Seschrock if (oldvd != NULL) 36671544Seschrock return (oldvd); 36681544Seschrock } 3669789Sahrens 36704451Seschrock /* 36714451Seschrock * Check for a completed replacement. 36724451Seschrock */ 3673789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 36741544Seschrock oldvd = vd->vdev_child[0]; 36751544Seschrock newvd = vd->vdev_child[1]; 3676789Sahrens 36778241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 36788241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 36791544Seschrock return (oldvd); 36801544Seschrock } 3681789Sahrens 36824451Seschrock /* 36834451Seschrock * Check for a completed resilver with the 'unspare' flag set. 36844451Seschrock */ 36854451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 36864451Seschrock newvd = vd->vdev_child[0]; 36874451Seschrock oldvd = vd->vdev_child[1]; 36884451Seschrock 36894451Seschrock if (newvd->vdev_unspare && 36908241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 36918241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 36924451Seschrock newvd->vdev_unspare = 0; 36934451Seschrock return (oldvd); 36944451Seschrock } 36954451Seschrock } 36964451Seschrock 36971544Seschrock return (NULL); 3698789Sahrens } 3699789Sahrens 37001544Seschrock static void 37014451Seschrock spa_vdev_resilver_done(spa_t *spa) 3702789Sahrens { 37038241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 37048241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 37058241SJeff.Bonwick@Sun.COM 37068241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3707789Sahrens 37084451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 37098241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 37108241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 37111544Seschrock guid = vd->vdev_guid; 37128241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 37138241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 37148241SJeff.Bonwick@Sun.COM sguid = 0; 37152082Seschrock /* 37162082Seschrock * If we have just finished replacing a hot spared device, then 37172082Seschrock * we need to detach the parent's first child (the original hot 37182082Seschrock * spare) as well. 37192082Seschrock */ 37208241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 37212082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 37228241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 37238241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 37242082Seschrock } 37258241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 37268241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 37271544Seschrock return; 37288241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 37292082Seschrock return; 37308241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3731789Sahrens } 3732789Sahrens 37338241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 3734789Sahrens } 3735789Sahrens 3736789Sahrens /* 37379425SEric.Schrock@Sun.COM * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 37389425SEric.Schrock@Sun.COM * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 37391354Seschrock */ 37401354Seschrock int 37419425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 37429425SEric.Schrock@Sun.COM boolean_t ispath) 37431354Seschrock { 37446643Seschrock vdev_t *vd; 37451354Seschrock uint64_t txg; 37461354Seschrock 37471354Seschrock txg = spa_vdev_enter(spa); 37481354Seschrock 37499425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 37505450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 37511354Seschrock 37521585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 37531585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37541585Sbonwick 37559425SEric.Schrock@Sun.COM if (ispath) { 37569425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 37579425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 37589425SEric.Schrock@Sun.COM } else { 37599425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 37609425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 37619425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 37629425SEric.Schrock@Sun.COM } 37631354Seschrock 37641354Seschrock vdev_config_dirty(vd->vdev_top); 37651354Seschrock 37661354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 37671354Seschrock } 37681354Seschrock 37699425SEric.Schrock@Sun.COM int 37709425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 37719425SEric.Schrock@Sun.COM { 37729425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 37739425SEric.Schrock@Sun.COM } 37749425SEric.Schrock@Sun.COM 37759425SEric.Schrock@Sun.COM int 37769425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 37779425SEric.Schrock@Sun.COM { 37789425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 37799425SEric.Schrock@Sun.COM } 37809425SEric.Schrock@Sun.COM 37811354Seschrock /* 3782789Sahrens * ========================================================================== 3783789Sahrens * SPA Scrubbing 3784789Sahrens * ========================================================================== 3785789Sahrens */ 3786789Sahrens 37877046Sahrens int 37887046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3789789Sahrens { 37907754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 37914808Sek110237 3792789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3793789Sahrens return (ENOTSUP); 3794789Sahrens 3795789Sahrens /* 37967046Sahrens * If a resilver was requested, but there is no DTL on a 37977046Sahrens * writeable leaf device, we have nothing to do. 3798789Sahrens */ 37997046Sahrens if (type == POOL_SCRUB_RESILVER && 38007046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 38017046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 38021544Seschrock return (0); 38031544Seschrock } 3804789Sahrens 38057046Sahrens if (type == POOL_SCRUB_EVERYTHING && 38067046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 38077046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 38087046Sahrens return (EBUSY); 38097046Sahrens 38107046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 38117046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 38127046Sahrens } else if (type == POOL_SCRUB_NONE) { 38137046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 38141544Seschrock } else { 38157046Sahrens return (EINVAL); 38161544Seschrock } 3817789Sahrens } 3818789Sahrens 38191544Seschrock /* 38201544Seschrock * ========================================================================== 38211544Seschrock * SPA async task processing 38221544Seschrock * ========================================================================== 38231544Seschrock */ 38241544Seschrock 38251544Seschrock static void 38264451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3827789Sahrens { 38287361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 38297361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 38307361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 383110575SEric.Schrock@Sun.COM 383210575SEric.Schrock@Sun.COM /* 383310575SEric.Schrock@Sun.COM * We want to clear the stats, but we don't want to do a full 383410575SEric.Schrock@Sun.COM * vdev_clear() as that will cause us to throw away 383510575SEric.Schrock@Sun.COM * degraded/faulted state as well as attempt to reopen the 383610575SEric.Schrock@Sun.COM * device, all of which is a waste. 383710575SEric.Schrock@Sun.COM */ 383810575SEric.Schrock@Sun.COM vd->vdev_stat.vs_read_errors = 0; 383910575SEric.Schrock@Sun.COM vd->vdev_stat.vs_write_errors = 0; 384010575SEric.Schrock@Sun.COM vd->vdev_stat.vs_checksum_errors = 0; 384110575SEric.Schrock@Sun.COM 38427754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 38431544Seschrock } 38447361SBrendan.Gregg@Sun.COM 38457754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 38467361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 38471544Seschrock } 38481544Seschrock 38491544Seschrock static void 38507754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 38517754SJeff.Bonwick@Sun.COM { 38527754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 38537754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 38547754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 38557754SJeff.Bonwick@Sun.COM } 38567754SJeff.Bonwick@Sun.COM 38577754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 38587754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 38597754SJeff.Bonwick@Sun.COM } 38607754SJeff.Bonwick@Sun.COM 38617754SJeff.Bonwick@Sun.COM static void 38629816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa_t *spa, vdev_t *vd) 38639816SGeorge.Wilson@Sun.COM { 38649816SGeorge.Wilson@Sun.COM sysevent_id_t eid; 38659816SGeorge.Wilson@Sun.COM nvlist_t *attr; 38669816SGeorge.Wilson@Sun.COM char *physpath; 38679816SGeorge.Wilson@Sun.COM 38689816SGeorge.Wilson@Sun.COM if (!spa->spa_autoexpand) 38699816SGeorge.Wilson@Sun.COM return; 38709816SGeorge.Wilson@Sun.COM 38719816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 38729816SGeorge.Wilson@Sun.COM vdev_t *cvd = vd->vdev_child[c]; 38739816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, cvd); 38749816SGeorge.Wilson@Sun.COM } 38759816SGeorge.Wilson@Sun.COM 38769816SGeorge.Wilson@Sun.COM if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 38779816SGeorge.Wilson@Sun.COM return; 38789816SGeorge.Wilson@Sun.COM 38799816SGeorge.Wilson@Sun.COM physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 38809816SGeorge.Wilson@Sun.COM (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 38819816SGeorge.Wilson@Sun.COM 38829816SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 38839816SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 38849816SGeorge.Wilson@Sun.COM 38859816SGeorge.Wilson@Sun.COM (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 38869816SGeorge.Wilson@Sun.COM ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 38879816SGeorge.Wilson@Sun.COM 38889816SGeorge.Wilson@Sun.COM nvlist_free(attr); 38899816SGeorge.Wilson@Sun.COM kmem_free(physpath, MAXPATHLEN); 38909816SGeorge.Wilson@Sun.COM } 38919816SGeorge.Wilson@Sun.COM 38929816SGeorge.Wilson@Sun.COM static void 38931544Seschrock spa_async_thread(spa_t *spa) 38941544Seschrock { 38957754SJeff.Bonwick@Sun.COM int tasks; 38961544Seschrock 38971544Seschrock ASSERT(spa->spa_sync_on); 3898789Sahrens 38991544Seschrock mutex_enter(&spa->spa_async_lock); 39001544Seschrock tasks = spa->spa_async_tasks; 39011544Seschrock spa->spa_async_tasks = 0; 39021544Seschrock mutex_exit(&spa->spa_async_lock); 39031544Seschrock 39041544Seschrock /* 39051635Sbonwick * See if the config needs to be updated. 39061635Sbonwick */ 39071635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 39089816SGeorge.Wilson@Sun.COM uint64_t oldsz, space_update; 39099816SGeorge.Wilson@Sun.COM 39101635Sbonwick mutex_enter(&spa_namespace_lock); 39119816SGeorge.Wilson@Sun.COM oldsz = spa_get_space(spa); 39121635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 39139816SGeorge.Wilson@Sun.COM space_update = spa_get_space(spa) - oldsz; 39141635Sbonwick mutex_exit(&spa_namespace_lock); 39159816SGeorge.Wilson@Sun.COM 39169816SGeorge.Wilson@Sun.COM /* 39179816SGeorge.Wilson@Sun.COM * If the pool grew as a result of the config update, 39189816SGeorge.Wilson@Sun.COM * then log an internal history event. 39199816SGeorge.Wilson@Sun.COM */ 39209816SGeorge.Wilson@Sun.COM if (space_update) { 39219946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 39229946SMark.Musante@Sun.COM spa, NULL, CRED(), 39239946SMark.Musante@Sun.COM "pool '%s' size: %llu(+%llu)", 39249946SMark.Musante@Sun.COM spa_name(spa), spa_get_space(spa), 39259946SMark.Musante@Sun.COM space_update); 39269816SGeorge.Wilson@Sun.COM } 39271635Sbonwick } 39281635Sbonwick 39291635Sbonwick /* 39304451Seschrock * See if any devices need to be marked REMOVED. 39311544Seschrock */ 39327754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 39337754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 39344451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 39357754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 39367361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 39377754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 39387361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 39397754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 39407754SJeff.Bonwick@Sun.COM } 39417754SJeff.Bonwick@Sun.COM 39429816SGeorge.Wilson@Sun.COM if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 39439816SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 39449816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, spa->spa_root_vdev); 39459816SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 39469816SGeorge.Wilson@Sun.COM } 39479816SGeorge.Wilson@Sun.COM 39487754SJeff.Bonwick@Sun.COM /* 39497754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 39507754SJeff.Bonwick@Sun.COM */ 39517754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 39527754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 39537754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 39547754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 39554451Seschrock } 39561544Seschrock 39571544Seschrock /* 39581544Seschrock * If any devices are done replacing, detach them. 39591544Seschrock */ 39604451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 39614451Seschrock spa_vdev_resilver_done(spa); 3962789Sahrens 39631544Seschrock /* 39641544Seschrock * Kick off a resilver. 39651544Seschrock */ 39667046Sahrens if (tasks & SPA_ASYNC_RESILVER) 39677046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 39681544Seschrock 39691544Seschrock /* 39701544Seschrock * Let the world know that we're done. 39711544Seschrock */ 39721544Seschrock mutex_enter(&spa->spa_async_lock); 39731544Seschrock spa->spa_async_thread = NULL; 39741544Seschrock cv_broadcast(&spa->spa_async_cv); 39751544Seschrock mutex_exit(&spa->spa_async_lock); 39761544Seschrock thread_exit(); 39771544Seschrock } 39781544Seschrock 39791544Seschrock void 39801544Seschrock spa_async_suspend(spa_t *spa) 39811544Seschrock { 39821544Seschrock mutex_enter(&spa->spa_async_lock); 39831544Seschrock spa->spa_async_suspended++; 39841544Seschrock while (spa->spa_async_thread != NULL) 39851544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 39861544Seschrock mutex_exit(&spa->spa_async_lock); 39871544Seschrock } 39881544Seschrock 39891544Seschrock void 39901544Seschrock spa_async_resume(spa_t *spa) 39911544Seschrock { 39921544Seschrock mutex_enter(&spa->spa_async_lock); 39931544Seschrock ASSERT(spa->spa_async_suspended != 0); 39941544Seschrock spa->spa_async_suspended--; 39951544Seschrock mutex_exit(&spa->spa_async_lock); 39961544Seschrock } 39971544Seschrock 39981544Seschrock static void 39991544Seschrock spa_async_dispatch(spa_t *spa) 40001544Seschrock { 40011544Seschrock mutex_enter(&spa->spa_async_lock); 40021544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 40031635Sbonwick spa->spa_async_thread == NULL && 40041635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 40051544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 40061544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 40071544Seschrock mutex_exit(&spa->spa_async_lock); 40081544Seschrock } 40091544Seschrock 40101544Seschrock void 40111544Seschrock spa_async_request(spa_t *spa, int task) 40121544Seschrock { 40131544Seschrock mutex_enter(&spa->spa_async_lock); 40141544Seschrock spa->spa_async_tasks |= task; 40151544Seschrock mutex_exit(&spa->spa_async_lock); 4016789Sahrens } 4017789Sahrens 4018789Sahrens /* 4019789Sahrens * ========================================================================== 4020789Sahrens * SPA syncing routines 4021789Sahrens * ========================================================================== 4022789Sahrens */ 4023789Sahrens 4024789Sahrens static void 4025789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 4026789Sahrens { 4027789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 4028789Sahrens dmu_tx_t *tx; 4029789Sahrens blkptr_t blk; 4030789Sahrens uint64_t itor = 0; 4031789Sahrens zio_t *zio; 4032789Sahrens int error; 4033789Sahrens uint8_t c = 1; 4034789Sahrens 40357754SJeff.Bonwick@Sun.COM zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 40367754SJeff.Bonwick@Sun.COM 40377754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 40387754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 40397754SJeff.Bonwick@Sun.COM zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 40407754SJeff.Bonwick@Sun.COM ZIO_FLAG_MUSTSUCCEED)); 40417754SJeff.Bonwick@Sun.COM } 4042789Sahrens 4043789Sahrens error = zio_wait(zio); 4044789Sahrens ASSERT3U(error, ==, 0); 4045789Sahrens 4046789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 4047789Sahrens bplist_vacate(bpl, tx); 4048789Sahrens 4049789Sahrens /* 4050789Sahrens * Pre-dirty the first block so we sync to convergence faster. 4051789Sahrens * (Usually only the first block is needed.) 4052789Sahrens */ 4053789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 4054789Sahrens dmu_tx_commit(tx); 4055789Sahrens } 4056789Sahrens 4057789Sahrens static void 40582082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 40592082Seschrock { 40602082Seschrock char *packed = NULL; 40617497STim.Haley@Sun.COM size_t bufsize; 40622082Seschrock size_t nvsize = 0; 40632082Seschrock dmu_buf_t *db; 40642082Seschrock 40652082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 40662082Seschrock 40677497STim.Haley@Sun.COM /* 40687497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 40697497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 40707497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 40717497STim.Haley@Sun.COM */ 40727497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 40737497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 40742082Seschrock 40752082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 40762082Seschrock KM_SLEEP) == 0); 40777497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 40787497STim.Haley@Sun.COM 40797497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 40807497STim.Haley@Sun.COM 40817497STim.Haley@Sun.COM kmem_free(packed, bufsize); 40822082Seschrock 40832082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 40842082Seschrock dmu_buf_will_dirty(db, tx); 40852082Seschrock *(uint64_t *)db->db_data = nvsize; 40862082Seschrock dmu_buf_rele(db, FTAG); 40872082Seschrock } 40882082Seschrock 40892082Seschrock static void 40905450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 40915450Sbrendan const char *config, const char *entry) 40922082Seschrock { 40932082Seschrock nvlist_t *nvroot; 40945450Sbrendan nvlist_t **list; 40952082Seschrock int i; 40962082Seschrock 40975450Sbrendan if (!sav->sav_sync) 40982082Seschrock return; 40992082Seschrock 41002082Seschrock /* 41015450Sbrendan * Update the MOS nvlist describing the list of available devices. 41025450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 41034451Seschrock * valid and the vdevs are labeled appropriately. 41042082Seschrock */ 41055450Sbrendan if (sav->sav_object == 0) { 41065450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 41075450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 41085450Sbrendan sizeof (uint64_t), tx); 41092082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 41105450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 41115450Sbrendan &sav->sav_object, tx) == 0); 41122082Seschrock } 41132082Seschrock 41142082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 41155450Sbrendan if (sav->sav_count == 0) { 41165450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 41172082Seschrock } else { 41185450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 41195450Sbrendan for (i = 0; i < sav->sav_count; i++) 41205450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 41215450Sbrendan B_FALSE, B_FALSE, B_TRUE); 41225450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 41235450Sbrendan sav->sav_count) == 0); 41245450Sbrendan for (i = 0; i < sav->sav_count; i++) 41255450Sbrendan nvlist_free(list[i]); 41265450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 41272082Seschrock } 41282082Seschrock 41295450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 41302926Sek110237 nvlist_free(nvroot); 41312082Seschrock 41325450Sbrendan sav->sav_sync = B_FALSE; 41332082Seschrock } 41342082Seschrock 41352082Seschrock static void 4136789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 4137789Sahrens { 4138789Sahrens nvlist_t *config; 4139789Sahrens 41407754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 4141789Sahrens return; 4142789Sahrens 41437754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 41447754SJeff.Bonwick@Sun.COM 41457754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 41467754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 41477754SJeff.Bonwick@Sun.COM 41487754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 4149789Sahrens 41501635Sbonwick if (spa->spa_config_syncing) 41511635Sbonwick nvlist_free(spa->spa_config_syncing); 41521635Sbonwick spa->spa_config_syncing = config; 4153789Sahrens 41542082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 4155789Sahrens } 4156789Sahrens 41575094Slling /* 41585094Slling * Set zpool properties. 41595094Slling */ 41603912Slling static void 41614543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 41623912Slling { 41633912Slling spa_t *spa = arg1; 41645094Slling objset_t *mos = spa->spa_meta_objset; 41653912Slling nvlist_t *nvp = arg2; 41665094Slling nvpair_t *elem; 41674451Seschrock uint64_t intval; 41686643Seschrock char *strval; 41695094Slling zpool_prop_t prop; 41705094Slling const char *propname; 41715094Slling zprop_type_t proptype; 41725094Slling 41737754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 41747754SJeff.Bonwick@Sun.COM 41755094Slling elem = NULL; 41765094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 41775094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 41785094Slling case ZPOOL_PROP_VERSION: 41795094Slling /* 41805094Slling * Only set version for non-zpool-creation cases 41815094Slling * (set/import). spa_create() needs special care 41825094Slling * for version setting. 41835094Slling */ 41845094Slling if (tx->tx_txg != TXG_INITIAL) { 41855094Slling VERIFY(nvpair_value_uint64(elem, 41865094Slling &intval) == 0); 41875094Slling ASSERT(intval <= SPA_VERSION); 41885094Slling ASSERT(intval >= spa_version(spa)); 41895094Slling spa->spa_uberblock.ub_version = intval; 41905094Slling vdev_config_dirty(spa->spa_root_vdev); 41915094Slling } 41925094Slling break; 41935094Slling 41945094Slling case ZPOOL_PROP_ALTROOT: 41955094Slling /* 41965094Slling * 'altroot' is a non-persistent property. It should 41975094Slling * have been set temporarily at creation or import time. 41985094Slling */ 41995094Slling ASSERT(spa->spa_root != NULL); 42005094Slling break; 42015094Slling 42025363Seschrock case ZPOOL_PROP_CACHEFILE: 42035094Slling /* 42048525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 42055094Slling */ 42064543Smarks break; 42075094Slling default: 42085094Slling /* 42095094Slling * Set pool property values in the poolprops mos object. 42105094Slling */ 42115094Slling if (spa->spa_pool_props_object == 0) { 42125094Slling objset_t *mos = spa->spa_meta_objset; 42135094Slling 42145094Slling VERIFY((spa->spa_pool_props_object = 42155094Slling zap_create(mos, DMU_OT_POOL_PROPS, 42165094Slling DMU_OT_NONE, 0, tx)) > 0); 42175094Slling 42185094Slling VERIFY(zap_update(mos, 42195094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 42205094Slling 8, 1, &spa->spa_pool_props_object, tx) 42215094Slling == 0); 42225094Slling } 42235094Slling 42245094Slling /* normalize the property name */ 42255094Slling propname = zpool_prop_to_name(prop); 42265094Slling proptype = zpool_prop_get_type(prop); 42275094Slling 42285094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 42295094Slling ASSERT(proptype == PROP_TYPE_STRING); 42305094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 42315094Slling VERIFY(zap_update(mos, 42325094Slling spa->spa_pool_props_object, propname, 42335094Slling 1, strlen(strval) + 1, strval, tx) == 0); 42345094Slling 42355094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 42365094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 42375094Slling 42385094Slling if (proptype == PROP_TYPE_INDEX) { 42395094Slling const char *unused; 42405094Slling VERIFY(zpool_prop_index_to_string( 42415094Slling prop, intval, &unused) == 0); 42425094Slling } 42435094Slling VERIFY(zap_update(mos, 42445094Slling spa->spa_pool_props_object, propname, 42455094Slling 8, 1, &intval, tx) == 0); 42465094Slling } else { 42475094Slling ASSERT(0); /* not allowed */ 42485094Slling } 42495094Slling 42505329Sgw25295 switch (prop) { 42515329Sgw25295 case ZPOOL_PROP_DELEGATION: 42525094Slling spa->spa_delegation = intval; 42535329Sgw25295 break; 42545329Sgw25295 case ZPOOL_PROP_BOOTFS: 42555094Slling spa->spa_bootfs = intval; 42565329Sgw25295 break; 42575329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 42585329Sgw25295 spa->spa_failmode = intval; 42595329Sgw25295 break; 42609816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 42619816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = intval; 42629816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 42639816SGeorge.Wilson@Sun.COM break; 42645329Sgw25295 default: 42655329Sgw25295 break; 42665329Sgw25295 } 42673912Slling } 42685094Slling 42695094Slling /* log internal history if this is not a zpool create */ 42705094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 42715094Slling tx->tx_txg != TXG_INITIAL) { 42725094Slling spa_history_internal_log(LOG_POOL_PROPSET, 42735094Slling spa, tx, cr, "%s %lld %s", 42747754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 42755094Slling } 42763912Slling } 42777754SJeff.Bonwick@Sun.COM 42787754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 42793912Slling } 42803912Slling 4281789Sahrens /* 4282789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4283789Sahrens * part of the process, so we iterate until it converges. 4284789Sahrens */ 4285789Sahrens void 4286789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4287789Sahrens { 4288789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4289789Sahrens objset_t *mos = spa->spa_meta_objset; 4290789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 42911635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4292789Sahrens vdev_t *vd; 4293789Sahrens dmu_tx_t *tx; 4294789Sahrens int dirty_vdevs; 42957754SJeff.Bonwick@Sun.COM int error; 4296789Sahrens 4297789Sahrens /* 4298789Sahrens * Lock out configuration changes. 4299789Sahrens */ 43007754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4301789Sahrens 4302789Sahrens spa->spa_syncing_txg = txg; 4303789Sahrens spa->spa_sync_pass = 0; 4304789Sahrens 43057754SJeff.Bonwick@Sun.COM /* 43067754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 43077754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 43087754SJeff.Bonwick@Sun.COM */ 43097754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 43108241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 43118241SJeff.Bonwick@Sun.COM /* 43128241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 43138241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 43148241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 43158241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 43168241SJeff.Bonwick@Sun.COM * into the root vdev tree. 43178241SJeff.Bonwick@Sun.COM */ 43188241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43198241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 43208241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 43218241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 43228241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 43238241SJeff.Bonwick@Sun.COM } 43248241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43258241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 43267754SJeff.Bonwick@Sun.COM } 43277754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 43287754SJeff.Bonwick@Sun.COM 43291544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4330789Sahrens 43312082Seschrock tx = dmu_tx_create_assigned(dp, txg); 43322082Seschrock 43332082Seschrock /* 43344577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 43352082Seschrock * set spa_deflate if we have no raid-z vdevs. 43362082Seschrock */ 43374577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 43384577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 43392082Seschrock int i; 43402082Seschrock 43412082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 43422082Seschrock vd = rvd->vdev_child[i]; 43432082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 43442082Seschrock break; 43452082Seschrock } 43462082Seschrock if (i == rvd->vdev_children) { 43472082Seschrock spa->spa_deflate = TRUE; 43482082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 43492082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 43502082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 43512082Seschrock } 43522082Seschrock } 43532082Seschrock 43547046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 43557046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 43567046Sahrens dsl_pool_create_origin(dp, tx); 43577046Sahrens 43587046Sahrens /* Keeping the origin open increases spa_minref */ 43597046Sahrens spa->spa_minref += 3; 43607046Sahrens } 43617046Sahrens 43627046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 43637046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 43647046Sahrens dsl_pool_upgrade_clones(dp, tx); 43657046Sahrens } 43667046Sahrens 4367789Sahrens /* 4368789Sahrens * If anything has changed in this txg, push the deferred frees 4369789Sahrens * from the previous txg. If not, leave them alone so that we 4370789Sahrens * don't generate work on an otherwise idle system. 4371789Sahrens */ 4372789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 43732329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 43742329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4375789Sahrens spa_sync_deferred_frees(spa, txg); 4376789Sahrens 4377789Sahrens /* 4378789Sahrens * Iterate to convergence. 4379789Sahrens */ 4380789Sahrens do { 4381789Sahrens spa->spa_sync_pass++; 4382789Sahrens 4383789Sahrens spa_sync_config_object(spa, tx); 43845450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 43855450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 43865450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 43875450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 43881544Seschrock spa_errlog_sync(spa, txg); 4389789Sahrens dsl_pool_sync(dp, txg); 4390789Sahrens 4391789Sahrens dirty_vdevs = 0; 4392789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4393789Sahrens vdev_sync(vd, txg); 4394789Sahrens dirty_vdevs++; 4395789Sahrens } 4396789Sahrens 4397789Sahrens bplist_sync(bpl, tx); 4398789Sahrens } while (dirty_vdevs); 4399789Sahrens 4400789Sahrens bplist_close(bpl); 4401789Sahrens 4402789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4403789Sahrens 4404789Sahrens /* 4405789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4406789Sahrens * to commit the transaction group. 44071635Sbonwick * 44085688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 44095688Sbonwick * random top-level vdevs that are known to be visible in the 44107754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 44117754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4412789Sahrens */ 44137754SJeff.Bonwick@Sun.COM for (;;) { 44147754SJeff.Bonwick@Sun.COM /* 44157754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 44167754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 44177754SJeff.Bonwick@Sun.COM */ 44187754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 44197754SJeff.Bonwick@Sun.COM 44207754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 44217754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 44227754SJeff.Bonwick@Sun.COM int svdcount = 0; 44237754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 44247754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 44259816SGeorge.Wilson@Sun.COM 44269816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 44277754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 44287754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 44297754SJeff.Bonwick@Sun.COM continue; 44307754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 44317754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 44327754SJeff.Bonwick@Sun.COM break; 44337754SJeff.Bonwick@Sun.COM } 44349725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 44359725SEric.Schrock@Sun.COM if (error != 0) 44369725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 44379725SEric.Schrock@Sun.COM B_TRUE); 44387754SJeff.Bonwick@Sun.COM } else { 44397754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 44409725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 44419725SEric.Schrock@Sun.COM if (error != 0) 44429725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 44439725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 44441635Sbonwick } 44457754SJeff.Bonwick@Sun.COM 44467754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 44477754SJeff.Bonwick@Sun.COM 44487754SJeff.Bonwick@Sun.COM if (error == 0) 44497754SJeff.Bonwick@Sun.COM break; 44507754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 44517754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 44521635Sbonwick } 44532082Seschrock dmu_tx_commit(tx); 44542082Seschrock 44551635Sbonwick /* 44561635Sbonwick * Clear the dirty config list. 44571635Sbonwick */ 44587754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 44591635Sbonwick vdev_config_clean(vd); 44601635Sbonwick 44611635Sbonwick /* 44621635Sbonwick * Now that the new config has synced transactionally, 44631635Sbonwick * let it become visible to the config cache. 44641635Sbonwick */ 44651635Sbonwick if (spa->spa_config_syncing != NULL) { 44661635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 44671635Sbonwick spa->spa_config_txg = txg; 44681635Sbonwick spa->spa_config_syncing = NULL; 44691635Sbonwick } 4470789Sahrens 4471789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4472789Sahrens 4473789Sahrens /* 4474789Sahrens * Clean up the ZIL records for the synced txg. 4475789Sahrens */ 4476789Sahrens dsl_pool_zil_clean(dp); 4477789Sahrens 4478789Sahrens /* 4479789Sahrens * Update usable space statistics. 4480789Sahrens */ 4481789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4482789Sahrens vdev_sync_done(vd, txg); 4483789Sahrens 4484789Sahrens /* 4485789Sahrens * It had better be the case that we didn't dirty anything 44862082Seschrock * since vdev_config_sync(). 4487789Sahrens */ 4488789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4489789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4490789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4491789Sahrens ASSERT(bpl->bpl_queue == NULL); 4492789Sahrens 44937754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 44941544Seschrock 44951544Seschrock /* 44961544Seschrock * If any async tasks have been requested, kick them off. 44971544Seschrock */ 44981544Seschrock spa_async_dispatch(spa); 4499789Sahrens } 4500789Sahrens 4501789Sahrens /* 4502789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4503789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4504789Sahrens * sync. 4505789Sahrens */ 4506789Sahrens void 4507789Sahrens spa_sync_allpools(void) 4508789Sahrens { 4509789Sahrens spa_t *spa = NULL; 4510789Sahrens mutex_enter(&spa_namespace_lock); 4511789Sahrens while ((spa = spa_next(spa)) != NULL) { 45127754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4513789Sahrens continue; 4514789Sahrens spa_open_ref(spa, FTAG); 4515789Sahrens mutex_exit(&spa_namespace_lock); 4516789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4517789Sahrens mutex_enter(&spa_namespace_lock); 4518789Sahrens spa_close(spa, FTAG); 4519789Sahrens } 4520789Sahrens mutex_exit(&spa_namespace_lock); 4521789Sahrens } 4522789Sahrens 4523789Sahrens /* 4524789Sahrens * ========================================================================== 4525789Sahrens * Miscellaneous routines 4526789Sahrens * ========================================================================== 4527789Sahrens */ 4528789Sahrens 4529789Sahrens /* 4530789Sahrens * Remove all pools in the system. 4531789Sahrens */ 4532789Sahrens void 4533789Sahrens spa_evict_all(void) 4534789Sahrens { 4535789Sahrens spa_t *spa; 4536789Sahrens 4537789Sahrens /* 4538789Sahrens * Remove all cached state. All pools should be closed now, 4539789Sahrens * so every spa in the AVL tree should be unreferenced. 4540789Sahrens */ 4541789Sahrens mutex_enter(&spa_namespace_lock); 4542789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4543789Sahrens /* 45441544Seschrock * Stop async tasks. The async thread may need to detach 45451544Seschrock * a device that's been replaced, which requires grabbing 45461544Seschrock * spa_namespace_lock, so we must drop it here. 4547789Sahrens */ 4548789Sahrens spa_open_ref(spa, FTAG); 4549789Sahrens mutex_exit(&spa_namespace_lock); 45501544Seschrock spa_async_suspend(spa); 45514808Sek110237 mutex_enter(&spa_namespace_lock); 4552789Sahrens spa_close(spa, FTAG); 4553789Sahrens 4554789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4555789Sahrens spa_unload(spa); 4556789Sahrens spa_deactivate(spa); 4557789Sahrens } 4558789Sahrens spa_remove(spa); 4559789Sahrens } 4560789Sahrens mutex_exit(&spa_namespace_lock); 4561789Sahrens } 45621544Seschrock 45631544Seschrock vdev_t * 45649425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 45651544Seschrock { 45666643Seschrock vdev_t *vd; 45676643Seschrock int i; 45686643Seschrock 45696643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 45706643Seschrock return (vd); 45716643Seschrock 45729425SEric.Schrock@Sun.COM if (aux) { 45736643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 45746643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 45756643Seschrock if (vd->vdev_guid == guid) 45766643Seschrock return (vd); 45776643Seschrock } 45789425SEric.Schrock@Sun.COM 45799425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 45809425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 45819425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 45829425SEric.Schrock@Sun.COM return (vd); 45839425SEric.Schrock@Sun.COM } 45846643Seschrock } 45856643Seschrock 45866643Seschrock return (NULL); 45871544Seschrock } 45881760Seschrock 45891760Seschrock void 45905094Slling spa_upgrade(spa_t *spa, uint64_t version) 45911760Seschrock { 45927754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 45931760Seschrock 45941760Seschrock /* 45951760Seschrock * This should only be called for a non-faulted pool, and since a 45961760Seschrock * future version would result in an unopenable pool, this shouldn't be 45971760Seschrock * possible. 45981760Seschrock */ 45994577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 46005094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 46015094Slling 46025094Slling spa->spa_uberblock.ub_version = version; 46031760Seschrock vdev_config_dirty(spa->spa_root_vdev); 46041760Seschrock 46057754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 46062082Seschrock 46072082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 46081760Seschrock } 46092082Seschrock 46102082Seschrock boolean_t 46112082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 46122082Seschrock { 46132082Seschrock int i; 46143377Seschrock uint64_t spareguid; 46155450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 46165450Sbrendan 46175450Sbrendan for (i = 0; i < sav->sav_count; i++) 46185450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 46192082Seschrock return (B_TRUE); 46202082Seschrock 46215450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 46225450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 46235450Sbrendan &spareguid) == 0 && spareguid == guid) 46243377Seschrock return (B_TRUE); 46253377Seschrock } 46263377Seschrock 46272082Seschrock return (B_FALSE); 46282082Seschrock } 46293912Slling 46304451Seschrock /* 46317214Slling * Check if a pool has an active shared spare device. 46327214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 46337214Slling */ 46347214Slling static boolean_t 46357214Slling spa_has_active_shared_spare(spa_t *spa) 46367214Slling { 46377214Slling int i, refcnt; 46387214Slling uint64_t pool; 46397214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 46407214Slling 46417214Slling for (i = 0; i < sav->sav_count; i++) { 46427214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 46437214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 46447214Slling refcnt > 2) 46457214Slling return (B_TRUE); 46467214Slling } 46477214Slling 46487214Slling return (B_FALSE); 46497214Slling } 46507214Slling 46517214Slling /* 46524451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 46534451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 46544451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 46554451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 46564451Seschrock * or zdb as real changes. 46574451Seschrock */ 46584451Seschrock void 46594451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 46604451Seschrock { 46614451Seschrock #ifdef _KERNEL 46624451Seschrock sysevent_t *ev; 46634451Seschrock sysevent_attr_list_t *attr = NULL; 46644451Seschrock sysevent_value_t value; 46654451Seschrock sysevent_id_t eid; 46664451Seschrock 46674451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 46684451Seschrock SE_SLEEP); 46694451Seschrock 46704451Seschrock value.value_type = SE_DATA_TYPE_STRING; 46714451Seschrock value.value.sv_string = spa_name(spa); 46724451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 46734451Seschrock goto done; 46744451Seschrock 46754451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 46764451Seschrock value.value.sv_uint64 = spa_guid(spa); 46774451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 46784451Seschrock goto done; 46794451Seschrock 46804451Seschrock if (vd) { 46814451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 46824451Seschrock value.value.sv_uint64 = vd->vdev_guid; 46834451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 46844451Seschrock SE_SLEEP) != 0) 46854451Seschrock goto done; 46864451Seschrock 46874451Seschrock if (vd->vdev_path) { 46884451Seschrock value.value_type = SE_DATA_TYPE_STRING; 46894451Seschrock value.value.sv_string = vd->vdev_path; 46904451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 46914451Seschrock &value, SE_SLEEP) != 0) 46924451Seschrock goto done; 46934451Seschrock } 46944451Seschrock } 46954451Seschrock 46965756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 46975756Seschrock goto done; 46985756Seschrock attr = NULL; 46995756Seschrock 47004451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 47014451Seschrock 47024451Seschrock done: 47034451Seschrock if (attr) 47044451Seschrock sysevent_free_attr(attr); 47054451Seschrock sysevent_free(ev); 47064451Seschrock #endif 47074451Seschrock } 4708