1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/dmu.h> 39789Sahrens #include <sys/dmu_tx.h> 40789Sahrens #include <sys/zap.h> 41789Sahrens #include <sys/zil.h> 4210922SJeff.Bonwick@Sun.COM #include <sys/ddt.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 4510594SGeorge.Wilson@Sun.COM #include <sys/metaslab_impl.h> 46789Sahrens #include <sys/uberblock_impl.h> 47789Sahrens #include <sys/txg.h> 48789Sahrens #include <sys/avl.h> 49789Sahrens #include <sys/dmu_traverse.h> 503912Slling #include <sys/dmu_objset.h> 51789Sahrens #include <sys/unique.h> 52789Sahrens #include <sys/dsl_pool.h> 533912Slling #include <sys/dsl_dataset.h> 54789Sahrens #include <sys/dsl_dir.h> 55789Sahrens #include <sys/dsl_prop.h> 563912Slling #include <sys/dsl_synctask.h> 57789Sahrens #include <sys/fs/zfs.h> 585450Sbrendan #include <sys/arc.h> 59789Sahrens #include <sys/callb.h> 603975Sek110237 #include <sys/systeminfo.h> 616423Sgw25295 #include <sys/spa_boot.h> 629816SGeorge.Wilson@Sun.COM #include <sys/zfs_ioctl.h> 63789Sahrens 648662SJordan.Vaughan@Sun.com #ifdef _KERNEL 658662SJordan.Vaughan@Sun.com #include <sys/zone.h> 6610822SJack.Meng@Sun.COM #include <sys/bootprops.h> 678662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 688662SJordan.Vaughan@Sun.com 695094Slling #include "zfs_prop.h" 705913Sperrin #include "zfs_comutil.h" 715094Slling 729515SJonathan.Adams@Sun.COM enum zti_modes { 739515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 749515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 759515SJonathan.Adams@Sun.COM zti_mode_tune, /* fill from zio_taskq_tune_* */ 769515SJonathan.Adams@Sun.COM zti_nmodes 777754SJeff.Bonwick@Sun.COM }; 782986Sek110237 799515SJonathan.Adams@Sun.COM #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 809515SJonathan.Adams@Sun.COM #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 819515SJonathan.Adams@Sun.COM #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 829515SJonathan.Adams@Sun.COM 839515SJonathan.Adams@Sun.COM #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 849515SJonathan.Adams@Sun.COM 859515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 869515SJonathan.Adams@Sun.COM const char *zti_name; 879515SJonathan.Adams@Sun.COM struct { 889515SJonathan.Adams@Sun.COM enum zti_modes zti_mode; 899515SJonathan.Adams@Sun.COM uint_t zti_value; 909515SJonathan.Adams@Sun.COM } zti_nthreads[ZIO_TASKQ_TYPES]; 919515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 929515SJonathan.Adams@Sun.COM 939515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 949515SJonathan.Adams@Sun.COM "issue", "intr" 959515SJonathan.Adams@Sun.COM }; 969515SJonathan.Adams@Sun.COM 979515SJonathan.Adams@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 989515SJonathan.Adams@Sun.COM /* ISSUE INTR */ 999515SJonathan.Adams@Sun.COM { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1009515SJonathan.Adams@Sun.COM { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1019515SJonathan.Adams@Sun.COM { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1029515SJonathan.Adams@Sun.COM { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1039515SJonathan.Adams@Sun.COM { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1049515SJonathan.Adams@Sun.COM { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1059515SJonathan.Adams@Sun.COM }; 1069515SJonathan.Adams@Sun.COM 1079515SJonathan.Adams@Sun.COM enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1089515SJonathan.Adams@Sun.COM uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1099515SJonathan.Adams@Sun.COM 1105094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1117214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1125094Slling 1135094Slling /* 1145094Slling * ========================================================================== 1155094Slling * SPA properties routines 1165094Slling * ========================================================================== 1175094Slling */ 1185094Slling 1195094Slling /* 1205094Slling * Add a (source=src, propname=propval) list to an nvlist. 1215094Slling */ 1225949Slling static void 1235094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1245094Slling uint64_t intval, zprop_source_t src) 1255094Slling { 1265094Slling const char *propname = zpool_prop_to_name(prop); 1275094Slling nvlist_t *propval; 1285949Slling 1295949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1305949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1315949Slling 1325949Slling if (strval != NULL) 1335949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1345949Slling else 1355949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1365949Slling 1375949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1385094Slling nvlist_free(propval); 1395094Slling } 1405094Slling 1415094Slling /* 1425094Slling * Get property values from the spa configuration. 1435094Slling */ 1445949Slling static void 1455094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1465094Slling { 1478525SEric.Schrock@Sun.COM uint64_t size; 14810956SGeorge.Wilson@Sun.COM uint64_t alloc; 1495094Slling uint64_t cap, version; 1505094Slling zprop_source_t src = ZPROP_SRC_NONE; 1516643Seschrock spa_config_dirent_t *dp; 1525094Slling 1537754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1547754SJeff.Bonwick@Sun.COM 1558525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 15610956SGeorge.Wilson@Sun.COM alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 15710922SJeff.Bonwick@Sun.COM size = metaslab_class_get_space(spa_normal_class(spa)); 1588525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1598525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 16010956SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 16110956SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 16210956SGeorge.Wilson@Sun.COM size - alloc, src); 16310956SGeorge.Wilson@Sun.COM 16410956SGeorge.Wilson@Sun.COM cap = (size == 0) ? 0 : (alloc * 100 / size); 1658525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1668525SEric.Schrock@Sun.COM 16710922SJeff.Bonwick@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 16810922SJeff.Bonwick@Sun.COM ddt_get_pool_dedup_ratio(spa), src); 16910922SJeff.Bonwick@Sun.COM 1708525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1718525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1728525SEric.Schrock@Sun.COM 1738525SEric.Schrock@Sun.COM version = spa_version(spa); 1748525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1758525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1768525SEric.Schrock@Sun.COM else 1778525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1788525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1798525SEric.Schrock@Sun.COM } 1805949Slling 1815949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1825949Slling 1835949Slling if (spa->spa_root != NULL) 1845949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1855949Slling 0, ZPROP_SRC_LOCAL); 1865094Slling 1876643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 1886643Seschrock if (dp->scd_path == NULL) { 1895949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1906643Seschrock "none", 0, ZPROP_SRC_LOCAL); 1916643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1925949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1936643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1945363Seschrock } 1955363Seschrock } 1965094Slling } 1975094Slling 1985094Slling /* 1995094Slling * Get zpool property values. 2005094Slling */ 2015094Slling int 2025094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 2035094Slling { 20410922SJeff.Bonwick@Sun.COM objset_t *mos = spa->spa_meta_objset; 2055094Slling zap_cursor_t zc; 2065094Slling zap_attribute_t za; 2075094Slling int err; 2085094Slling 2095949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2105094Slling 2117754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2127754SJeff.Bonwick@Sun.COM 2135094Slling /* 2145094Slling * Get properties from the spa config. 2155094Slling */ 2165949Slling spa_prop_get_config(spa, nvp); 2175094Slling 2185094Slling /* If no pool property object, no more prop to get. */ 2195094Slling if (spa->spa_pool_props_object == 0) { 2205094Slling mutex_exit(&spa->spa_props_lock); 2215094Slling return (0); 2225094Slling } 2235094Slling 2245094Slling /* 2255094Slling * Get properties from the MOS pool property object. 2265094Slling */ 2275094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2285094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2295094Slling zap_cursor_advance(&zc)) { 2305094Slling uint64_t intval = 0; 2315094Slling char *strval = NULL; 2325094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2335094Slling zpool_prop_t prop; 2345094Slling 2355094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2365094Slling continue; 2375094Slling 2385094Slling switch (za.za_integer_length) { 2395094Slling case 8: 2405094Slling /* integer property */ 2415094Slling if (za.za_first_integer != 2425094Slling zpool_prop_default_numeric(prop)) 2435094Slling src = ZPROP_SRC_LOCAL; 2445094Slling 2455094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2465094Slling dsl_pool_t *dp; 2475094Slling dsl_dataset_t *ds = NULL; 2485094Slling 2495094Slling dp = spa_get_dsl(spa); 2505094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2516689Smaybee if (err = dsl_dataset_hold_obj(dp, 2526689Smaybee za.za_first_integer, FTAG, &ds)) { 2535094Slling rw_exit(&dp->dp_config_rwlock); 2545094Slling break; 2555094Slling } 2565094Slling 2575094Slling strval = kmem_alloc( 2585094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2595094Slling KM_SLEEP); 2605094Slling dsl_dataset_name(ds, strval); 2616689Smaybee dsl_dataset_rele(ds, FTAG); 2625094Slling rw_exit(&dp->dp_config_rwlock); 2635094Slling } else { 2645094Slling strval = NULL; 2655094Slling intval = za.za_first_integer; 2665094Slling } 2675094Slling 2685949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2695094Slling 2705094Slling if (strval != NULL) 2715094Slling kmem_free(strval, 2725094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2735094Slling 2745094Slling break; 2755094Slling 2765094Slling case 1: 2775094Slling /* string property */ 2785094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2795094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2805094Slling za.za_name, 1, za.za_num_integers, strval); 2815094Slling if (err) { 2825094Slling kmem_free(strval, za.za_num_integers); 2835094Slling break; 2845094Slling } 2855949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2865094Slling kmem_free(strval, za.za_num_integers); 2875094Slling break; 2885094Slling 2895094Slling default: 2905094Slling break; 2915094Slling } 2925094Slling } 2935094Slling zap_cursor_fini(&zc); 2945094Slling mutex_exit(&spa->spa_props_lock); 2955094Slling out: 2965094Slling if (err && err != ENOENT) { 2975094Slling nvlist_free(*nvp); 2985949Slling *nvp = NULL; 2995094Slling return (err); 3005094Slling } 3015094Slling 3025094Slling return (0); 3035094Slling } 3045094Slling 3055094Slling /* 3065094Slling * Validate the given pool properties nvlist and modify the list 3075094Slling * for the property values to be set. 3085094Slling */ 3095094Slling static int 3105094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3115094Slling { 3125094Slling nvpair_t *elem; 3135094Slling int error = 0, reset_bootfs = 0; 3145094Slling uint64_t objnum; 3155094Slling 3165094Slling elem = NULL; 3175094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3185094Slling zpool_prop_t prop; 3195094Slling char *propname, *strval; 3205094Slling uint64_t intval; 3215094Slling objset_t *os; 3225363Seschrock char *slash; 3235094Slling 3245094Slling propname = nvpair_name(elem); 3255094Slling 3265094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3275094Slling return (EINVAL); 3285094Slling 3295094Slling switch (prop) { 3305094Slling case ZPOOL_PROP_VERSION: 3315094Slling error = nvpair_value_uint64(elem, &intval); 3325094Slling if (!error && 3335094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3345094Slling error = EINVAL; 3355094Slling break; 3365094Slling 3375094Slling case ZPOOL_PROP_DELEGATION: 3385094Slling case ZPOOL_PROP_AUTOREPLACE: 3397538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3409816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 3415094Slling error = nvpair_value_uint64(elem, &intval); 3425094Slling if (!error && intval > 1) 3435094Slling error = EINVAL; 3445094Slling break; 3455094Slling 3465094Slling case ZPOOL_PROP_BOOTFS: 3479630SJeff.Bonwick@Sun.COM /* 3489630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3499630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3509630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3519630SJeff.Bonwick@Sun.COM */ 3525094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3535094Slling error = ENOTSUP; 3545094Slling break; 3555094Slling } 3565094Slling 3575094Slling /* 3587042Sgw25295 * Make sure the vdev config is bootable 3595094Slling */ 3607042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3615094Slling error = ENOTSUP; 3625094Slling break; 3635094Slling } 3645094Slling 3655094Slling reset_bootfs = 1; 3665094Slling 3675094Slling error = nvpair_value_string(elem, &strval); 3685094Slling 3695094Slling if (!error) { 3707042Sgw25295 uint64_t compress; 3717042Sgw25295 3725094Slling if (strval == NULL || strval[0] == '\0') { 3735094Slling objnum = zpool_prop_default_numeric( 3745094Slling ZPOOL_PROP_BOOTFS); 3755094Slling break; 3765094Slling } 3775094Slling 37810298SMatthew.Ahrens@Sun.COM if (error = dmu_objset_hold(strval, FTAG, &os)) 3795094Slling break; 3807042Sgw25295 38110298SMatthew.Ahrens@Sun.COM /* Must be ZPL and not gzip compressed. */ 38210298SMatthew.Ahrens@Sun.COM 38310298SMatthew.Ahrens@Sun.COM if (dmu_objset_type(os) != DMU_OST_ZFS) { 38410298SMatthew.Ahrens@Sun.COM error = ENOTSUP; 38510298SMatthew.Ahrens@Sun.COM } else if ((error = dsl_prop_get_integer(strval, 3867042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 3877042Sgw25295 &compress, NULL)) == 0 && 3887042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 3897042Sgw25295 error = ENOTSUP; 3907042Sgw25295 } else { 3917042Sgw25295 objnum = dmu_objset_id(os); 3927042Sgw25295 } 39310298SMatthew.Ahrens@Sun.COM dmu_objset_rele(os, FTAG); 3945094Slling } 3955094Slling break; 3967754SJeff.Bonwick@Sun.COM 3975329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3985329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3995329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 4005329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 4015329Sgw25295 error = EINVAL; 4025329Sgw25295 4035329Sgw25295 /* 4045329Sgw25295 * This is a special case which only occurs when 4055329Sgw25295 * the pool has completely failed. This allows 4065329Sgw25295 * the user to change the in-core failmode property 4075329Sgw25295 * without syncing it out to disk (I/Os might 4085329Sgw25295 * currently be blocked). We do this by returning 4095329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4105329Sgw25295 * into thinking we encountered a property validation 4115329Sgw25295 * error. 4125329Sgw25295 */ 4137754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4145329Sgw25295 spa->spa_failmode = intval; 4155329Sgw25295 error = EIO; 4165329Sgw25295 } 4175329Sgw25295 break; 4185363Seschrock 4195363Seschrock case ZPOOL_PROP_CACHEFILE: 4205363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4215363Seschrock break; 4225363Seschrock 4235363Seschrock if (strval[0] == '\0') 4245363Seschrock break; 4255363Seschrock 4265363Seschrock if (strcmp(strval, "none") == 0) 4275363Seschrock break; 4285363Seschrock 4295363Seschrock if (strval[0] != '/') { 4305363Seschrock error = EINVAL; 4315363Seschrock break; 4325363Seschrock } 4335363Seschrock 4345363Seschrock slash = strrchr(strval, '/'); 4355363Seschrock ASSERT(slash != NULL); 4365363Seschrock 4375363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4385363Seschrock strcmp(slash, "/..") == 0) 4395363Seschrock error = EINVAL; 4405363Seschrock break; 44110922SJeff.Bonwick@Sun.COM 44210922SJeff.Bonwick@Sun.COM case ZPOOL_PROP_DEDUPDITTO: 44310922SJeff.Bonwick@Sun.COM if (spa_version(spa) < SPA_VERSION_DEDUP) 44410922SJeff.Bonwick@Sun.COM error = ENOTSUP; 44510922SJeff.Bonwick@Sun.COM else 44610922SJeff.Bonwick@Sun.COM error = nvpair_value_uint64(elem, &intval); 44710922SJeff.Bonwick@Sun.COM if (error == 0 && 44810922SJeff.Bonwick@Sun.COM intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 44910922SJeff.Bonwick@Sun.COM error = EINVAL; 45010922SJeff.Bonwick@Sun.COM break; 4515094Slling } 4525094Slling 4535094Slling if (error) 4545094Slling break; 4555094Slling } 4565094Slling 4575094Slling if (!error && reset_bootfs) { 4585094Slling error = nvlist_remove(props, 4595094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4605094Slling 4615094Slling if (!error) { 4625094Slling error = nvlist_add_uint64(props, 4635094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4645094Slling } 4655094Slling } 4665094Slling 4675094Slling return (error); 4685094Slling } 4695094Slling 4708525SEric.Schrock@Sun.COM void 4718525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4728525SEric.Schrock@Sun.COM { 4738525SEric.Schrock@Sun.COM char *cachefile; 4748525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4758525SEric.Schrock@Sun.COM 4768525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4778525SEric.Schrock@Sun.COM &cachefile) != 0) 4788525SEric.Schrock@Sun.COM return; 4798525SEric.Schrock@Sun.COM 4808525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4818525SEric.Schrock@Sun.COM KM_SLEEP); 4828525SEric.Schrock@Sun.COM 4838525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 4848525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 4858525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 4868525SEric.Schrock@Sun.COM dp->scd_path = NULL; 4878525SEric.Schrock@Sun.COM else 4888525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 4898525SEric.Schrock@Sun.COM 4908525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 4918525SEric.Schrock@Sun.COM if (need_sync) 4928525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 4938525SEric.Schrock@Sun.COM } 4948525SEric.Schrock@Sun.COM 4955094Slling int 4965094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4975094Slling { 4985094Slling int error; 4998525SEric.Schrock@Sun.COM nvpair_t *elem; 5008525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 5018525SEric.Schrock@Sun.COM zpool_prop_t prop; 5025094Slling 5035094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 5045094Slling return (error); 5055094Slling 5068525SEric.Schrock@Sun.COM elem = NULL; 5078525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 5088525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 5098525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 5108525SEric.Schrock@Sun.COM return (EINVAL); 5118525SEric.Schrock@Sun.COM 5128525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 5138525SEric.Schrock@Sun.COM continue; 5148525SEric.Schrock@Sun.COM 5158525SEric.Schrock@Sun.COM need_sync = B_TRUE; 5168525SEric.Schrock@Sun.COM break; 5178525SEric.Schrock@Sun.COM } 5188525SEric.Schrock@Sun.COM 5198525SEric.Schrock@Sun.COM if (need_sync) 5208525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5218525SEric.Schrock@Sun.COM spa, nvp, 3)); 5228525SEric.Schrock@Sun.COM else 5238525SEric.Schrock@Sun.COM return (0); 5245094Slling } 5255094Slling 5265094Slling /* 5275094Slling * If the bootfs property value is dsobj, clear it. 5285094Slling */ 5295094Slling void 5305094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5315094Slling { 5325094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5335094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5345094Slling spa->spa_pool_props_object, 5355094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5365094Slling spa->spa_bootfs = 0; 5375094Slling } 5385094Slling } 5395094Slling 540789Sahrens /* 541789Sahrens * ========================================================================== 542789Sahrens * SPA state manipulation (open/create/destroy/import/export) 543789Sahrens * ========================================================================== 544789Sahrens */ 545789Sahrens 5461544Seschrock static int 5471544Seschrock spa_error_entry_compare(const void *a, const void *b) 5481544Seschrock { 5491544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5501544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5511544Seschrock int ret; 5521544Seschrock 5531544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5541544Seschrock sizeof (zbookmark_t)); 5551544Seschrock 5561544Seschrock if (ret < 0) 5571544Seschrock return (-1); 5581544Seschrock else if (ret > 0) 5591544Seschrock return (1); 5601544Seschrock else 5611544Seschrock return (0); 5621544Seschrock } 5631544Seschrock 5641544Seschrock /* 5651544Seschrock * Utility function which retrieves copies of the current logs and 5661544Seschrock * re-initializes them in the process. 5671544Seschrock */ 5681544Seschrock void 5691544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5701544Seschrock { 5711544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5721544Seschrock 5731544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5741544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5751544Seschrock 5761544Seschrock avl_create(&spa->spa_errlist_scrub, 5771544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5781544Seschrock offsetof(spa_error_entry_t, se_avl)); 5791544Seschrock avl_create(&spa->spa_errlist_last, 5801544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5811544Seschrock offsetof(spa_error_entry_t, se_avl)); 5821544Seschrock } 5831544Seschrock 584789Sahrens /* 585789Sahrens * Activate an uninitialized pool. 586789Sahrens */ 587789Sahrens static void 5888241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 589789Sahrens { 590789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 591789Sahrens 592789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5938241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 594789Sahrens 59510594SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 59610594SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 597789Sahrens 5987754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 5999515SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t]; 6007754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6019515SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 6029515SJonathan.Adams@Sun.COM uint_t value = ztip->zti_nthreads[q].zti_value; 6039515SJonathan.Adams@Sun.COM char name[32]; 6049515SJonathan.Adams@Sun.COM 6059515SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 6069515SJonathan.Adams@Sun.COM "%s_%s", ztip->zti_name, zio_taskq_types[q]); 6079515SJonathan.Adams@Sun.COM 6089515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) { 6099515SJonathan.Adams@Sun.COM mode = zio_taskq_tune_mode; 6109515SJonathan.Adams@Sun.COM value = zio_taskq_tune_value; 6119515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) 6129515SJonathan.Adams@Sun.COM mode = zti_mode_online_percent; 6139515SJonathan.Adams@Sun.COM } 6149515SJonathan.Adams@Sun.COM 6159515SJonathan.Adams@Sun.COM switch (mode) { 6169515SJonathan.Adams@Sun.COM case zti_mode_fixed: 6179515SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 6189515SJonathan.Adams@Sun.COM value = MAX(value, 1); 6199515SJonathan.Adams@Sun.COM 6209515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6219515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6229515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE); 6239515SJonathan.Adams@Sun.COM break; 6249515SJonathan.Adams@Sun.COM 6259515SJonathan.Adams@Sun.COM case zti_mode_online_percent: 6269515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6279515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6289515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6299515SJonathan.Adams@Sun.COM break; 6309515SJonathan.Adams@Sun.COM 6319515SJonathan.Adams@Sun.COM case zti_mode_tune: 6329515SJonathan.Adams@Sun.COM default: 6339515SJonathan.Adams@Sun.COM panic("unrecognized mode for " 6349515SJonathan.Adams@Sun.COM "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6359515SJonathan.Adams@Sun.COM "in spa_activate()", 6369515SJonathan.Adams@Sun.COM t, q, mode, value); 6379515SJonathan.Adams@Sun.COM break; 6389515SJonathan.Adams@Sun.COM } 6397754SJeff.Bonwick@Sun.COM } 640789Sahrens } 641789Sahrens 6427754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 6437754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 6447754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 6457754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 646789Sahrens 647789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 648789Sahrens offsetof(struct vdev, vdev_txg_node)); 6491544Seschrock 6501544Seschrock avl_create(&spa->spa_errlist_scrub, 6511544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6521544Seschrock offsetof(spa_error_entry_t, se_avl)); 6531544Seschrock avl_create(&spa->spa_errlist_last, 6541544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6551544Seschrock offsetof(spa_error_entry_t, se_avl)); 656789Sahrens } 657789Sahrens 658789Sahrens /* 659789Sahrens * Opposite of spa_activate(). 660789Sahrens */ 661789Sahrens static void 662789Sahrens spa_deactivate(spa_t *spa) 663789Sahrens { 664789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 665789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 666789Sahrens ASSERT(spa->spa_root_vdev == NULL); 6679630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 668789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 669789Sahrens 670789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 671789Sahrens 6727754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 6737754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 6747754SJeff.Bonwick@Sun.COM 6757754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 6767754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6777754SJeff.Bonwick@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 6787754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 6797754SJeff.Bonwick@Sun.COM } 680789Sahrens } 681789Sahrens 682789Sahrens metaslab_class_destroy(spa->spa_normal_class); 683789Sahrens spa->spa_normal_class = NULL; 684789Sahrens 6854527Sperrin metaslab_class_destroy(spa->spa_log_class); 6864527Sperrin spa->spa_log_class = NULL; 6874527Sperrin 6881544Seschrock /* 6891544Seschrock * If this was part of an import or the open otherwise failed, we may 6901544Seschrock * still have errors left in the queues. Empty them just in case. 6911544Seschrock */ 6921544Seschrock spa_errlog_drain(spa); 6931544Seschrock 6941544Seschrock avl_destroy(&spa->spa_errlist_scrub); 6951544Seschrock avl_destroy(&spa->spa_errlist_last); 6961544Seschrock 697789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 698789Sahrens } 699789Sahrens 700789Sahrens /* 701789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 702789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 703789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 704789Sahrens * All vdev validation is done by the vdev_alloc() routine. 705789Sahrens */ 7062082Seschrock static int 7072082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 7082082Seschrock uint_t id, int atype) 709789Sahrens { 710789Sahrens nvlist_t **child; 7119816SGeorge.Wilson@Sun.COM uint_t children; 7122082Seschrock int error; 7132082Seschrock 7142082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 7152082Seschrock return (error); 7162082Seschrock 7172082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 7182082Seschrock return (0); 719789Sahrens 7207754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 7217754SJeff.Bonwick@Sun.COM &child, &children); 7227754SJeff.Bonwick@Sun.COM 7237754SJeff.Bonwick@Sun.COM if (error == ENOENT) 7247754SJeff.Bonwick@Sun.COM return (0); 7257754SJeff.Bonwick@Sun.COM 7267754SJeff.Bonwick@Sun.COM if (error) { 7272082Seschrock vdev_free(*vdp); 7282082Seschrock *vdp = NULL; 7292082Seschrock return (EINVAL); 730789Sahrens } 731789Sahrens 7329816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 7332082Seschrock vdev_t *vd; 7342082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 7352082Seschrock atype)) != 0) { 7362082Seschrock vdev_free(*vdp); 7372082Seschrock *vdp = NULL; 7382082Seschrock return (error); 739789Sahrens } 740789Sahrens } 741789Sahrens 7422082Seschrock ASSERT(*vdp != NULL); 7432082Seschrock 7442082Seschrock return (0); 745789Sahrens } 746789Sahrens 747789Sahrens /* 748789Sahrens * Opposite of spa_load(). 749789Sahrens */ 750789Sahrens static void 751789Sahrens spa_unload(spa_t *spa) 752789Sahrens { 7532082Seschrock int i; 7542082Seschrock 7557754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 7567754SJeff.Bonwick@Sun.COM 757789Sahrens /* 7581544Seschrock * Stop async tasks. 7591544Seschrock */ 7601544Seschrock spa_async_suspend(spa); 7611544Seschrock 7621544Seschrock /* 763789Sahrens * Stop syncing. 764789Sahrens */ 765789Sahrens if (spa->spa_sync_on) { 766789Sahrens txg_sync_stop(spa->spa_dsl_pool); 767789Sahrens spa->spa_sync_on = B_FALSE; 768789Sahrens } 769789Sahrens 770789Sahrens /* 7717754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 772789Sahrens */ 7739234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 7749234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 7759234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 7769234SGeorge.Wilson@Sun.COM } 777789Sahrens 778789Sahrens /* 779789Sahrens * Close the dsl pool. 780789Sahrens */ 781789Sahrens if (spa->spa_dsl_pool) { 782789Sahrens dsl_pool_close(spa->spa_dsl_pool); 783789Sahrens spa->spa_dsl_pool = NULL; 784789Sahrens } 785789Sahrens 78610922SJeff.Bonwick@Sun.COM ddt_unload(spa); 78710922SJeff.Bonwick@Sun.COM 7888241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7898241SJeff.Bonwick@Sun.COM 7908241SJeff.Bonwick@Sun.COM /* 7918241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 7928241SJeff.Bonwick@Sun.COM */ 7938241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 7948241SJeff.Bonwick@Sun.COM 795789Sahrens /* 796789Sahrens * Close all vdevs. 797789Sahrens */ 7981585Sbonwick if (spa->spa_root_vdev) 799789Sahrens vdev_free(spa->spa_root_vdev); 8001585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 8011544Seschrock 8025450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 8035450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 8045450Sbrendan if (spa->spa_spares.sav_vdevs) { 8055450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8065450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8075450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8085450Sbrendan } 8095450Sbrendan if (spa->spa_spares.sav_config) { 8105450Sbrendan nvlist_free(spa->spa_spares.sav_config); 8115450Sbrendan spa->spa_spares.sav_config = NULL; 8122082Seschrock } 8137377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 8145450Sbrendan 8155450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 8165450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 8175450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 8185450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 8195450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 8205450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 8215450Sbrendan } 8225450Sbrendan if (spa->spa_l2cache.sav_config) { 8235450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 8245450Sbrendan spa->spa_l2cache.sav_config = NULL; 8252082Seschrock } 8267377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 8272082Seschrock 8281544Seschrock spa->spa_async_suspended = 0; 8298241SJeff.Bonwick@Sun.COM 8308241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 831789Sahrens } 832789Sahrens 833789Sahrens /* 8342082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 8352082Seschrock * this pool. When this is called, we have some form of basic information in 8365450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 8375450Sbrendan * then re-generate a more complete list including status information. 8382082Seschrock */ 8392082Seschrock static void 8402082Seschrock spa_load_spares(spa_t *spa) 8412082Seschrock { 8422082Seschrock nvlist_t **spares; 8432082Seschrock uint_t nspares; 8442082Seschrock int i; 8453377Seschrock vdev_t *vd, *tvd; 8462082Seschrock 8477754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 8487754SJeff.Bonwick@Sun.COM 8492082Seschrock /* 8502082Seschrock * First, close and free any existing spare vdevs. 8512082Seschrock */ 8525450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8535450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 8543377Seschrock 8553377Seschrock /* Undo the call to spa_activate() below */ 8566643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8576643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 8583377Seschrock spa_spare_remove(tvd); 8593377Seschrock vdev_close(vd); 8603377Seschrock vdev_free(vd); 8612082Seschrock } 8623377Seschrock 8635450Sbrendan if (spa->spa_spares.sav_vdevs) 8645450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8655450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8665450Sbrendan 8675450Sbrendan if (spa->spa_spares.sav_config == NULL) 8682082Seschrock nspares = 0; 8692082Seschrock else 8705450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 8712082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8722082Seschrock 8735450Sbrendan spa->spa_spares.sav_count = (int)nspares; 8745450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8752082Seschrock 8762082Seschrock if (nspares == 0) 8772082Seschrock return; 8782082Seschrock 8792082Seschrock /* 8802082Seschrock * Construct the array of vdevs, opening them to get status in the 8813377Seschrock * process. For each spare, there is potentially two different vdev_t 8823377Seschrock * structures associated with it: one in the list of spares (used only 8833377Seschrock * for basic validation purposes) and one in the active vdev 8843377Seschrock * configuration (if it's spared in). During this phase we open and 8853377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 8863377Seschrock * active configuration, then we also mark this vdev as an active spare. 8872082Seschrock */ 8885450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 8895450Sbrendan KM_SLEEP); 8905450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8912082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 8922082Seschrock VDEV_ALLOC_SPARE) == 0); 8932082Seschrock ASSERT(vd != NULL); 8942082Seschrock 8955450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 8962082Seschrock 8976643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8986643Seschrock B_FALSE)) != NULL) { 8993377Seschrock if (!tvd->vdev_isspare) 9003377Seschrock spa_spare_add(tvd); 9013377Seschrock 9023377Seschrock /* 9033377Seschrock * We only mark the spare active if we were successfully 9043377Seschrock * able to load the vdev. Otherwise, importing a pool 9053377Seschrock * with a bad active spare would result in strange 9063377Seschrock * behavior, because multiple pool would think the spare 9073377Seschrock * is actively in use. 9083377Seschrock * 9093377Seschrock * There is a vulnerability here to an equally bizarre 9103377Seschrock * circumstance, where a dead active spare is later 9113377Seschrock * brought back to life (onlined or otherwise). Given 9123377Seschrock * the rarity of this scenario, and the extra complexity 9133377Seschrock * it adds, we ignore the possibility. 9143377Seschrock */ 9153377Seschrock if (!vdev_is_dead(tvd)) 9163377Seschrock spa_spare_activate(tvd); 9173377Seschrock } 9183377Seschrock 9197754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 9209425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 9217754SJeff.Bonwick@Sun.COM 9222082Seschrock if (vdev_open(vd) != 0) 9232082Seschrock continue; 9242082Seschrock 9255450Sbrendan if (vdev_validate_aux(vd) == 0) 9265450Sbrendan spa_spare_add(vd); 9272082Seschrock } 9282082Seschrock 9292082Seschrock /* 9302082Seschrock * Recompute the stashed list of spares, with status information 9312082Seschrock * this time. 9322082Seschrock */ 9335450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 9342082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 9352082Seschrock 9365450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 9375450Sbrendan KM_SLEEP); 9385450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9395450Sbrendan spares[i] = vdev_config_generate(spa, 9405450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 9415450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 9425450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 9435450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9442082Seschrock nvlist_free(spares[i]); 9455450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 9465450Sbrendan } 9475450Sbrendan 9485450Sbrendan /* 9495450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 9505450Sbrendan * this pool. When this is called, we have some form of basic information in 9515450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 9525450Sbrendan * then re-generate a more complete list including status information. 9535450Sbrendan * Devices which are already active have their details maintained, and are 9545450Sbrendan * not re-opened. 9555450Sbrendan */ 9565450Sbrendan static void 9575450Sbrendan spa_load_l2cache(spa_t *spa) 9585450Sbrendan { 9595450Sbrendan nvlist_t **l2cache; 9605450Sbrendan uint_t nl2cache; 9615450Sbrendan int i, j, oldnvdevs; 9629816SGeorge.Wilson@Sun.COM uint64_t guid; 9635450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 9645450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 9655450Sbrendan 9667754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 9677754SJeff.Bonwick@Sun.COM 9685450Sbrendan if (sav->sav_config != NULL) { 9695450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 9705450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 9715450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 9725450Sbrendan } else { 9735450Sbrendan nl2cache = 0; 9745450Sbrendan } 9755450Sbrendan 9765450Sbrendan oldvdevs = sav->sav_vdevs; 9775450Sbrendan oldnvdevs = sav->sav_count; 9785450Sbrendan sav->sav_vdevs = NULL; 9795450Sbrendan sav->sav_count = 0; 9805450Sbrendan 9815450Sbrendan /* 9825450Sbrendan * Process new nvlist of vdevs. 9835450Sbrendan */ 9845450Sbrendan for (i = 0; i < nl2cache; i++) { 9855450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 9865450Sbrendan &guid) == 0); 9875450Sbrendan 9885450Sbrendan newvdevs[i] = NULL; 9895450Sbrendan for (j = 0; j < oldnvdevs; j++) { 9905450Sbrendan vd = oldvdevs[j]; 9915450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 9925450Sbrendan /* 9935450Sbrendan * Retain previous vdev for add/remove ops. 9945450Sbrendan */ 9955450Sbrendan newvdevs[i] = vd; 9965450Sbrendan oldvdevs[j] = NULL; 9975450Sbrendan break; 9985450Sbrendan } 9995450Sbrendan } 10005450Sbrendan 10015450Sbrendan if (newvdevs[i] == NULL) { 10025450Sbrendan /* 10035450Sbrendan * Create new vdev 10045450Sbrendan */ 10055450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 10065450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 10075450Sbrendan ASSERT(vd != NULL); 10085450Sbrendan newvdevs[i] = vd; 10095450Sbrendan 10105450Sbrendan /* 10115450Sbrendan * Commit this vdev as an l2cache device, 10125450Sbrendan * even if it fails to open. 10135450Sbrendan */ 10145450Sbrendan spa_l2cache_add(vd); 10155450Sbrendan 10166643Seschrock vd->vdev_top = vd; 10176643Seschrock vd->vdev_aux = sav; 10186643Seschrock 10196643Seschrock spa_l2cache_activate(vd); 10206643Seschrock 10215450Sbrendan if (vdev_open(vd) != 0) 10225450Sbrendan continue; 10235450Sbrendan 10245450Sbrendan (void) vdev_validate_aux(vd); 10255450Sbrendan 10269816SGeorge.Wilson@Sun.COM if (!vdev_is_dead(vd)) 10279816SGeorge.Wilson@Sun.COM l2arc_add_vdev(spa, vd); 10285450Sbrendan } 10295450Sbrendan } 10305450Sbrendan 10315450Sbrendan /* 10325450Sbrendan * Purge vdevs that were dropped 10335450Sbrendan */ 10345450Sbrendan for (i = 0; i < oldnvdevs; i++) { 10355450Sbrendan uint64_t pool; 10365450Sbrendan 10375450Sbrendan vd = oldvdevs[i]; 10385450Sbrendan if (vd != NULL) { 10398241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10408241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 10415450Sbrendan l2arc_remove_vdev(vd); 10425450Sbrendan (void) vdev_close(vd); 10435450Sbrendan spa_l2cache_remove(vd); 10445450Sbrendan } 10455450Sbrendan } 10465450Sbrendan 10475450Sbrendan if (oldvdevs) 10485450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 10495450Sbrendan 10505450Sbrendan if (sav->sav_config == NULL) 10515450Sbrendan goto out; 10525450Sbrendan 10535450Sbrendan sav->sav_vdevs = newvdevs; 10545450Sbrendan sav->sav_count = (int)nl2cache; 10555450Sbrendan 10565450Sbrendan /* 10575450Sbrendan * Recompute the stashed list of l2cache devices, with status 10585450Sbrendan * information this time. 10595450Sbrendan */ 10605450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 10615450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 10625450Sbrendan 10635450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 10645450Sbrendan for (i = 0; i < sav->sav_count; i++) 10655450Sbrendan l2cache[i] = vdev_config_generate(spa, 10665450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 10675450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 10685450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 10695450Sbrendan out: 10705450Sbrendan for (i = 0; i < sav->sav_count; i++) 10715450Sbrendan nvlist_free(l2cache[i]); 10725450Sbrendan if (sav->sav_count) 10735450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 10742082Seschrock } 10752082Seschrock 10762082Seschrock static int 10772082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 10782082Seschrock { 10792082Seschrock dmu_buf_t *db; 10802082Seschrock char *packed = NULL; 10812082Seschrock size_t nvsize = 0; 10822082Seschrock int error; 10832082Seschrock *value = NULL; 10842082Seschrock 10852082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 10862082Seschrock nvsize = *(uint64_t *)db->db_data; 10872082Seschrock dmu_buf_rele(db, FTAG); 10882082Seschrock 10892082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10909512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10919512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 10922082Seschrock if (error == 0) 10932082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 10942082Seschrock kmem_free(packed, nvsize); 10952082Seschrock 10962082Seschrock return (error); 10972082Seschrock } 10982082Seschrock 10992082Seschrock /* 11004451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 11014451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 11024451Seschrock */ 11034451Seschrock static void 11044451Seschrock spa_check_removed(vdev_t *vd) 11054451Seschrock { 11069816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 11074451Seschrock spa_check_removed(vd->vdev_child[c]); 11084451Seschrock 11094451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 11104451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 11114451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 11124451Seschrock } 11134451Seschrock } 11144451Seschrock 11154451Seschrock /* 11169701SGeorge.Wilson@Sun.COM * Load the slog device state from the config object since it's possible 11179701SGeorge.Wilson@Sun.COM * that the label does not contain the most up-to-date information. 11189701SGeorge.Wilson@Sun.COM */ 11199701SGeorge.Wilson@Sun.COM void 112010594SGeorge.Wilson@Sun.COM spa_load_log_state(spa_t *spa, nvlist_t *nv) 11219701SGeorge.Wilson@Sun.COM { 112210594SGeorge.Wilson@Sun.COM vdev_t *ovd, *rvd = spa->spa_root_vdev; 112310594SGeorge.Wilson@Sun.COM 112410594SGeorge.Wilson@Sun.COM /* 112510594SGeorge.Wilson@Sun.COM * Load the original root vdev tree from the passed config. 112610594SGeorge.Wilson@Sun.COM */ 112710594SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 112810594SGeorge.Wilson@Sun.COM VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 112910594SGeorge.Wilson@Sun.COM 113010594SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 113110594SGeorge.Wilson@Sun.COM vdev_t *cvd = rvd->vdev_child[c]; 113210594SGeorge.Wilson@Sun.COM if (cvd->vdev_islog) 113310594SGeorge.Wilson@Sun.COM vdev_load_log_state(cvd, ovd->vdev_child[c]); 11349701SGeorge.Wilson@Sun.COM } 113510594SGeorge.Wilson@Sun.COM vdev_free(ovd); 113610594SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 11379701SGeorge.Wilson@Sun.COM } 11389701SGeorge.Wilson@Sun.COM 11399701SGeorge.Wilson@Sun.COM /* 11407294Sperrin * Check for missing log devices 11417294Sperrin */ 11427294Sperrin int 11437294Sperrin spa_check_logs(spa_t *spa) 11447294Sperrin { 11457294Sperrin switch (spa->spa_log_state) { 11467294Sperrin case SPA_LOG_MISSING: 11477294Sperrin /* need to recheck in case slog has been restored */ 11487294Sperrin case SPA_LOG_UNKNOWN: 11497294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 11507294Sperrin DS_FIND_CHILDREN)) { 11517294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 11527294Sperrin return (1); 11537294Sperrin } 11547294Sperrin break; 11557294Sperrin } 11567294Sperrin return (0); 11577294Sperrin } 11587294Sperrin 115910672SEric.Schrock@Sun.COM static void 116010672SEric.Schrock@Sun.COM spa_aux_check_removed(spa_aux_vdev_t *sav) 116110672SEric.Schrock@Sun.COM { 116210922SJeff.Bonwick@Sun.COM for (int i = 0; i < sav->sav_count; i++) 116310672SEric.Schrock@Sun.COM spa_check_removed(sav->sav_vdevs[i]); 116410672SEric.Schrock@Sun.COM } 116510672SEric.Schrock@Sun.COM 116610922SJeff.Bonwick@Sun.COM void 116710922SJeff.Bonwick@Sun.COM spa_claim_notify(zio_t *zio) 116810922SJeff.Bonwick@Sun.COM { 116910922SJeff.Bonwick@Sun.COM spa_t *spa = zio->io_spa; 117010922SJeff.Bonwick@Sun.COM 117110922SJeff.Bonwick@Sun.COM if (zio->io_error) 117210922SJeff.Bonwick@Sun.COM return; 117310922SJeff.Bonwick@Sun.COM 117410922SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 117510922SJeff.Bonwick@Sun.COM if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 117610922SJeff.Bonwick@Sun.COM spa->spa_claim_max_txg = zio->io_bp->blk_birth; 117710922SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 117810922SJeff.Bonwick@Sun.COM } 117910922SJeff.Bonwick@Sun.COM 118010921STim.Haley@Sun.COM typedef struct spa_load_error { 118110921STim.Haley@Sun.COM uint64_t sle_metadata_count; 118210921STim.Haley@Sun.COM uint64_t sle_data_count; 118310921STim.Haley@Sun.COM } spa_load_error_t; 118410921STim.Haley@Sun.COM 118510921STim.Haley@Sun.COM static void 118610921STim.Haley@Sun.COM spa_load_verify_done(zio_t *zio) 118710921STim.Haley@Sun.COM { 118810921STim.Haley@Sun.COM blkptr_t *bp = zio->io_bp; 118910921STim.Haley@Sun.COM spa_load_error_t *sle = zio->io_private; 119010921STim.Haley@Sun.COM dmu_object_type_t type = BP_GET_TYPE(bp); 119110921STim.Haley@Sun.COM int error = zio->io_error; 119210921STim.Haley@Sun.COM 119310921STim.Haley@Sun.COM if (error) { 119410921STim.Haley@Sun.COM if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && 119510921STim.Haley@Sun.COM type != DMU_OT_INTENT_LOG) 119610921STim.Haley@Sun.COM atomic_add_64(&sle->sle_metadata_count, 1); 119710921STim.Haley@Sun.COM else 119810921STim.Haley@Sun.COM atomic_add_64(&sle->sle_data_count, 1); 119910921STim.Haley@Sun.COM } 120010921STim.Haley@Sun.COM zio_data_buf_free(zio->io_data, zio->io_size); 120110921STim.Haley@Sun.COM } 120210921STim.Haley@Sun.COM 120310921STim.Haley@Sun.COM /*ARGSUSED*/ 120410921STim.Haley@Sun.COM static int 120510922SJeff.Bonwick@Sun.COM spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 120610922SJeff.Bonwick@Sun.COM const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 120710921STim.Haley@Sun.COM { 120810921STim.Haley@Sun.COM if (bp != NULL) { 120910921STim.Haley@Sun.COM zio_t *rio = arg; 121010921STim.Haley@Sun.COM size_t size = BP_GET_PSIZE(bp); 121110921STim.Haley@Sun.COM void *data = zio_data_buf_alloc(size); 121210921STim.Haley@Sun.COM 121310921STim.Haley@Sun.COM zio_nowait(zio_read(rio, spa, bp, data, size, 121410921STim.Haley@Sun.COM spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 121510921STim.Haley@Sun.COM ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 121610921STim.Haley@Sun.COM ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 121710921STim.Haley@Sun.COM } 121810921STim.Haley@Sun.COM return (0); 121910921STim.Haley@Sun.COM } 122010921STim.Haley@Sun.COM 122110921STim.Haley@Sun.COM static int 122210921STim.Haley@Sun.COM spa_load_verify(spa_t *spa) 122310921STim.Haley@Sun.COM { 122410921STim.Haley@Sun.COM zio_t *rio; 122510921STim.Haley@Sun.COM spa_load_error_t sle = { 0 }; 122610921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 122710921STim.Haley@Sun.COM boolean_t verify_ok = B_FALSE; 122810921STim.Haley@Sun.COM int error; 122910921STim.Haley@Sun.COM 123010921STim.Haley@Sun.COM rio = zio_root(spa, NULL, &sle, 123110921STim.Haley@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 123210921STim.Haley@Sun.COM 123310921STim.Haley@Sun.COM error = traverse_pool(spa, spa_load_verify_cb, rio, 123410921STim.Haley@Sun.COM spa->spa_verify_min_txg); 123510921STim.Haley@Sun.COM 123610921STim.Haley@Sun.COM (void) zio_wait(rio); 123710921STim.Haley@Sun.COM 123810921STim.Haley@Sun.COM zpool_get_rewind_policy(spa->spa_config, &policy); 123910921STim.Haley@Sun.COM 124010921STim.Haley@Sun.COM spa->spa_load_meta_errors = sle.sle_metadata_count; 124110921STim.Haley@Sun.COM spa->spa_load_data_errors = sle.sle_data_count; 124210921STim.Haley@Sun.COM 124310921STim.Haley@Sun.COM if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta && 124410921STim.Haley@Sun.COM sle.sle_data_count <= policy.zrp_maxdata) { 124510921STim.Haley@Sun.COM verify_ok = B_TRUE; 124610921STim.Haley@Sun.COM spa->spa_load_txg = spa->spa_uberblock.ub_txg; 124710921STim.Haley@Sun.COM spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 1248*11026STim.Haley@Sun.COM } else { 1249*11026STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 125010921STim.Haley@Sun.COM } 125110921STim.Haley@Sun.COM 125210921STim.Haley@Sun.COM if (error) { 125310921STim.Haley@Sun.COM if (error != ENXIO && error != EIO) 125410921STim.Haley@Sun.COM error = EIO; 125510921STim.Haley@Sun.COM return (error); 125610921STim.Haley@Sun.COM } 125710921STim.Haley@Sun.COM 125810921STim.Haley@Sun.COM return (verify_ok ? 0 : EIO); 125910921STim.Haley@Sun.COM } 126010921STim.Haley@Sun.COM 12617294Sperrin /* 1262789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 12631544Seschrock * source of configuration information. 1264789Sahrens */ 1265789Sahrens static int 126610921STim.Haley@Sun.COM spa_load(spa_t *spa, spa_load_state_t state, int mosconfig) 1267789Sahrens { 1268789Sahrens int error = 0; 126910594SGeorge.Wilson@Sun.COM nvlist_t *nvconfig, *nvroot = NULL; 1270789Sahrens vdev_t *rvd; 1271789Sahrens uberblock_t *ub = &spa->spa_uberblock; 12721635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1273789Sahrens uint64_t pool_guid; 12742082Seschrock uint64_t version; 12754451Seschrock uint64_t autoreplace = 0; 12768241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 12777294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 127810921STim.Haley@Sun.COM nvlist_t *config = spa->spa_config; 1279789Sahrens 12808241SJeff.Bonwick@Sun.COM /* 12818241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 12828241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 12838241SJeff.Bonwick@Sun.COM */ 12848241SJeff.Bonwick@Sun.COM if (!mosconfig) 12858241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 12868241SJeff.Bonwick@Sun.COM 12877754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 12887754SJeff.Bonwick@Sun.COM 12891544Seschrock spa->spa_load_state = state; 12901635Sbonwick 1291789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 12921733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 12931544Seschrock error = EINVAL; 12941544Seschrock goto out; 12951544Seschrock } 1296789Sahrens 12972082Seschrock /* 12982082Seschrock * Versioning wasn't explicitly added to the label until later, so if 12992082Seschrock * it's not present treat it as the initial version. 13002082Seschrock */ 13012082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 13024577Sahrens version = SPA_VERSION_INITIAL; 13032082Seschrock 13041733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 13051733Sbonwick &spa->spa_config_txg); 13061733Sbonwick 13071635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 13081544Seschrock spa_guid_exists(pool_guid, 0)) { 13091544Seschrock error = EEXIST; 13101544Seschrock goto out; 13111544Seschrock } 1312789Sahrens 13132174Seschrock spa->spa_load_guid = pool_guid; 13142174Seschrock 1315789Sahrens /* 13169234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 13179234SGeorge.Wilson@Sun.COM */ 13189630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 13199630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 13209234SGeorge.Wilson@Sun.COM 13219234SGeorge.Wilson@Sun.COM /* 13222082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 13232082Seschrock * value that will be returned by spa_version() since parsing the 13242082Seschrock * configuration requires knowing the version number. 1325789Sahrens */ 13267754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 13272082Seschrock spa->spa_ubsync.ub_version = version; 13282082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 13297754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1330789Sahrens 13312082Seschrock if (error != 0) 13321544Seschrock goto out; 1333789Sahrens 13341585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1335789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1336789Sahrens 1337789Sahrens /* 1338789Sahrens * Try to open all vdevs, loading each label in the process. 1339789Sahrens */ 13407754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 13414070Smc142369 error = vdev_open(rvd); 13427754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 13434070Smc142369 if (error != 0) 13441544Seschrock goto out; 1345789Sahrens 1346789Sahrens /* 13479276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 13489276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 13499276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 13509276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 13519276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 13529276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 13539276SMark.Musante@Sun.COM * the vdev config. 13541986Seschrock */ 13559276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 13569276SMark.Musante@Sun.COM error = vdev_validate(rvd); 13579276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 13589276SMark.Musante@Sun.COM if (error != 0) 13599276SMark.Musante@Sun.COM goto out; 13601986Seschrock 13611986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 13621986Seschrock error = ENXIO; 13631986Seschrock goto out; 13641986Seschrock } 13651986Seschrock 13661986Seschrock /* 1367789Sahrens * Find the best uberblock. 1368789Sahrens */ 13697754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1370789Sahrens 1371789Sahrens /* 1372789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1373789Sahrens */ 1374789Sahrens if (ub->ub_txg == 0) { 13751760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13761760Seschrock VDEV_AUX_CORRUPT_DATA); 13771544Seschrock error = ENXIO; 13781544Seschrock goto out; 13791544Seschrock } 13801544Seschrock 13811544Seschrock /* 13821544Seschrock * If the pool is newer than the code, we can't open it. 13831544Seschrock */ 13844577Sahrens if (ub->ub_version > SPA_VERSION) { 13851760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13861760Seschrock VDEV_AUX_VERSION_NEWER); 13871544Seschrock error = ENOTSUP; 13881544Seschrock goto out; 1389789Sahrens } 1390789Sahrens 1391789Sahrens /* 1392789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1393789Sahrens * incomplete configuration. 1394789Sahrens */ 13951732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 13961544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13971544Seschrock VDEV_AUX_BAD_GUID_SUM); 13981544Seschrock error = ENXIO; 13991544Seschrock goto out; 1400789Sahrens } 1401789Sahrens 1402789Sahrens /* 1403789Sahrens * Initialize internal SPA structures. 1404789Sahrens */ 1405789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1406789Sahrens spa->spa_ubsync = spa->spa_uberblock; 140710921STim.Haley@Sun.COM spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 140810921STim.Haley@Sun.COM TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE; 140910921STim.Haley@Sun.COM spa->spa_first_txg = spa->spa_last_ubsync_txg ? 141010921STim.Haley@Sun.COM spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 141110922SJeff.Bonwick@Sun.COM spa->spa_claim_max_txg = spa->spa_first_txg; 141210922SJeff.Bonwick@Sun.COM 14131544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 14141544Seschrock if (error) { 14151544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14161544Seschrock VDEV_AUX_CORRUPT_DATA); 141710921STim.Haley@Sun.COM error = EIO; 14181544Seschrock goto out; 14191544Seschrock } 1420789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1421789Sahrens 14221544Seschrock if (zap_lookup(spa->spa_meta_objset, 1423789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 14241544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 14251544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14261544Seschrock VDEV_AUX_CORRUPT_DATA); 14271544Seschrock error = EIO; 14281544Seschrock goto out; 14291544Seschrock } 1430789Sahrens 143110594SGeorge.Wilson@Sun.COM if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 143210594SGeorge.Wilson@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 143310594SGeorge.Wilson@Sun.COM VDEV_AUX_CORRUPT_DATA); 143410594SGeorge.Wilson@Sun.COM error = EIO; 143510594SGeorge.Wilson@Sun.COM goto out; 143610594SGeorge.Wilson@Sun.COM } 143710594SGeorge.Wilson@Sun.COM 1438789Sahrens if (!mosconfig) { 14393975Sek110237 uint64_t hostid; 14402082Seschrock 144110594SGeorge.Wilson@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 14427706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 14433975Sek110237 char *hostname; 14443975Sek110237 unsigned long myhostid = 0; 14453975Sek110237 144610594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_string(nvconfig, 14473975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 14483975Sek110237 14498662SJordan.Vaughan@Sun.com #ifdef _KERNEL 14508662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 14518662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 14528662SJordan.Vaughan@Sun.com /* 14538662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 14548662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 14558662SJordan.Vaughan@Sun.com */ 14563975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 14578662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 14584178Slling if (hostid != 0 && myhostid != 0 && 14598662SJordan.Vaughan@Sun.com hostid != myhostid) { 14603975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 14613975Sek110237 "loaded as it was last accessed by " 14627706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 14633975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 14647754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 14653975Sek110237 (unsigned long)hostid); 14663975Sek110237 error = EBADF; 14673975Sek110237 goto out; 14683975Sek110237 } 14693975Sek110237 } 14703975Sek110237 147110594SGeorge.Wilson@Sun.COM spa_config_set(spa, nvconfig); 1472789Sahrens spa_unload(spa); 1473789Sahrens spa_deactivate(spa); 14748241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1475789Sahrens 147610921STim.Haley@Sun.COM return (spa_load(spa, state, B_TRUE)); 14771544Seschrock } 14781544Seschrock 14791544Seschrock if (zap_lookup(spa->spa_meta_objset, 14801544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 148110922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) { 14821544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14831544Seschrock VDEV_AUX_CORRUPT_DATA); 14841544Seschrock error = EIO; 14851544Seschrock goto out; 1486789Sahrens } 1487789Sahrens 14881544Seschrock /* 14892082Seschrock * Load the bit that tells us to use the new accounting function 14902082Seschrock * (raid-z deflation). If we have an older pool, this will not 14912082Seschrock * be present. 14922082Seschrock */ 14932082Seschrock error = zap_lookup(spa->spa_meta_objset, 14942082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 14952082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 14962082Seschrock if (error != 0 && error != ENOENT) { 14972082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14982082Seschrock VDEV_AUX_CORRUPT_DATA); 14992082Seschrock error = EIO; 15002082Seschrock goto out; 15012082Seschrock } 15022082Seschrock 15032082Seschrock /* 15041544Seschrock * Load the persistent error log. If we have an older pool, this will 15051544Seschrock * not be present. 15061544Seschrock */ 15071544Seschrock error = zap_lookup(spa->spa_meta_objset, 15081544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 15091544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 15101807Sbonwick if (error != 0 && error != ENOENT) { 15111544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15121544Seschrock VDEV_AUX_CORRUPT_DATA); 15131544Seschrock error = EIO; 15141544Seschrock goto out; 15151544Seschrock } 15161544Seschrock 15171544Seschrock error = zap_lookup(spa->spa_meta_objset, 15181544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 15191544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 15201544Seschrock if (error != 0 && error != ENOENT) { 15211544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15221544Seschrock VDEV_AUX_CORRUPT_DATA); 15231544Seschrock error = EIO; 15241544Seschrock goto out; 15251544Seschrock } 1526789Sahrens 1527789Sahrens /* 15282926Sek110237 * Load the history object. If we have an older pool, this 15292926Sek110237 * will not be present. 15302926Sek110237 */ 15312926Sek110237 error = zap_lookup(spa->spa_meta_objset, 15322926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 15332926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 15342926Sek110237 if (error != 0 && error != ENOENT) { 15352926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15362926Sek110237 VDEV_AUX_CORRUPT_DATA); 15372926Sek110237 error = EIO; 15382926Sek110237 goto out; 15392926Sek110237 } 15402926Sek110237 15412926Sek110237 /* 15422082Seschrock * Load any hot spares for this pool. 15432082Seschrock */ 15442082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 15455450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 15462082Seschrock if (error != 0 && error != ENOENT) { 15472082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15482082Seschrock VDEV_AUX_CORRUPT_DATA); 15492082Seschrock error = EIO; 15502082Seschrock goto out; 15512082Seschrock } 15522082Seschrock if (error == 0) { 15534577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 15545450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 15555450Sbrendan &spa->spa_spares.sav_config) != 0) { 15562082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15572082Seschrock VDEV_AUX_CORRUPT_DATA); 15582082Seschrock error = EIO; 15592082Seschrock goto out; 15602082Seschrock } 15612082Seschrock 15627754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 15632082Seschrock spa_load_spares(spa); 15647754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 15652082Seschrock } 15662082Seschrock 15675450Sbrendan /* 15685450Sbrendan * Load any level 2 ARC devices for this pool. 15695450Sbrendan */ 15705450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 15715450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 15725450Sbrendan &spa->spa_l2cache.sav_object); 15735450Sbrendan if (error != 0 && error != ENOENT) { 15745450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15755450Sbrendan VDEV_AUX_CORRUPT_DATA); 15765450Sbrendan error = EIO; 15775450Sbrendan goto out; 15785450Sbrendan } 15795450Sbrendan if (error == 0) { 15805450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 15815450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 15825450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 15835450Sbrendan vdev_set_state(rvd, B_TRUE, 15845450Sbrendan VDEV_STATE_CANT_OPEN, 15855450Sbrendan VDEV_AUX_CORRUPT_DATA); 15865450Sbrendan error = EIO; 15875450Sbrendan goto out; 15885450Sbrendan } 15895450Sbrendan 15907754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 15915450Sbrendan spa_load_l2cache(spa); 15927754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 15935450Sbrendan } 15945450Sbrendan 15955094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 15964543Smarks 15973912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 15983912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 15993912Slling 16003912Slling if (error && error != ENOENT) { 16013912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 16023912Slling VDEV_AUX_CORRUPT_DATA); 16033912Slling error = EIO; 16043912Slling goto out; 16053912Slling } 16063912Slling 16073912Slling if (error == 0) { 16083912Slling (void) zap_lookup(spa->spa_meta_objset, 16093912Slling spa->spa_pool_props_object, 16104451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 16113912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 16124451Seschrock (void) zap_lookup(spa->spa_meta_objset, 16134451Seschrock spa->spa_pool_props_object, 16144451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 16154451Seschrock sizeof (uint64_t), 1, &autoreplace); 161610672SEric.Schrock@Sun.COM spa->spa_autoreplace = (autoreplace != 0); 16174543Smarks (void) zap_lookup(spa->spa_meta_objset, 16184543Smarks spa->spa_pool_props_object, 16194543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 16204543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 16215329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 16225329Sgw25295 spa->spa_pool_props_object, 16235329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 16245329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 16259816SGeorge.Wilson@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 16269816SGeorge.Wilson@Sun.COM spa->spa_pool_props_object, 16279816SGeorge.Wilson@Sun.COM zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 16289816SGeorge.Wilson@Sun.COM sizeof (uint64_t), 1, &spa->spa_autoexpand); 162910922SJeff.Bonwick@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 163010922SJeff.Bonwick@Sun.COM spa->spa_pool_props_object, 163110922SJeff.Bonwick@Sun.COM zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO), 163210922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &spa->spa_dedup_ditto); 16333912Slling } 16343912Slling 16352082Seschrock /* 16364451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 16374451Seschrock * the ZFS DE that it should not issue any faults for unopenable 16384451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 16394451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 16404451Seschrock * over. 16414451Seschrock */ 164210672SEric.Schrock@Sun.COM if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 16434451Seschrock spa_check_removed(spa->spa_root_vdev); 164410672SEric.Schrock@Sun.COM /* 164510672SEric.Schrock@Sun.COM * For the import case, this is done in spa_import(), because 164610672SEric.Schrock@Sun.COM * at this point we're using the spare definitions from 164710672SEric.Schrock@Sun.COM * the MOS config, not necessarily from the userland config. 164810672SEric.Schrock@Sun.COM */ 164910672SEric.Schrock@Sun.COM if (state != SPA_LOAD_IMPORT) { 165010672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 165110672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 165210672SEric.Schrock@Sun.COM } 165310672SEric.Schrock@Sun.COM } 16544451Seschrock 16554451Seschrock /* 16561986Seschrock * Load the vdev state for all toplevel vdevs. 1657789Sahrens */ 16581986Seschrock vdev_load(rvd); 1659789Sahrens 1660789Sahrens /* 1661789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1662789Sahrens */ 16637754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1664789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 16657754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1666789Sahrens 1667789Sahrens /* 1668789Sahrens * Check the state of the root vdev. If it can't be opened, it 1669789Sahrens * indicates one or more toplevel vdevs are faulted. 1670789Sahrens */ 16711544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 16721544Seschrock error = ENXIO; 16731544Seschrock goto out; 16741544Seschrock } 1675789Sahrens 167610922SJeff.Bonwick@Sun.COM /* 167710922SJeff.Bonwick@Sun.COM * Load the DDTs (dedup tables). 167810922SJeff.Bonwick@Sun.COM */ 167910922SJeff.Bonwick@Sun.COM error = ddt_load(spa); 168010922SJeff.Bonwick@Sun.COM if (error != 0) { 168110922SJeff.Bonwick@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 168210922SJeff.Bonwick@Sun.COM VDEV_AUX_CORRUPT_DATA); 168310922SJeff.Bonwick@Sun.COM error = EIO; 168410922SJeff.Bonwick@Sun.COM goto out; 168510922SJeff.Bonwick@Sun.COM } 168610922SJeff.Bonwick@Sun.COM 168710956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 168810956SGeorge.Wilson@Sun.COM 168910921STim.Haley@Sun.COM if (state != SPA_LOAD_TRYIMPORT) { 169010921STim.Haley@Sun.COM error = spa_load_verify(spa); 169110921STim.Haley@Sun.COM if (error) { 169210921STim.Haley@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 169310921STim.Haley@Sun.COM VDEV_AUX_CORRUPT_DATA); 169410921STim.Haley@Sun.COM goto out; 169510921STim.Haley@Sun.COM } 169610921STim.Haley@Sun.COM } 169710921STim.Haley@Sun.COM 169810922SJeff.Bonwick@Sun.COM /* 169910922SJeff.Bonwick@Sun.COM * Load the intent log state and check log integrity. 170010922SJeff.Bonwick@Sun.COM */ 170110922SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 170210922SJeff.Bonwick@Sun.COM &nvroot) == 0); 170310922SJeff.Bonwick@Sun.COM spa_load_log_state(spa, nvroot); 170410922SJeff.Bonwick@Sun.COM nvlist_free(nvconfig); 170510922SJeff.Bonwick@Sun.COM 170610922SJeff.Bonwick@Sun.COM if (spa_check_logs(spa)) { 170710922SJeff.Bonwick@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 170810922SJeff.Bonwick@Sun.COM VDEV_AUX_BAD_LOG); 170910922SJeff.Bonwick@Sun.COM error = ENXIO; 171010922SJeff.Bonwick@Sun.COM ereport = FM_EREPORT_ZFS_LOG_REPLAY; 171110922SJeff.Bonwick@Sun.COM goto out; 171210922SJeff.Bonwick@Sun.COM } 171310922SJeff.Bonwick@Sun.COM 171410921STim.Haley@Sun.COM if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 171510921STim.Haley@Sun.COM spa->spa_load_max_txg == UINT64_MAX)) { 17161635Sbonwick dmu_tx_t *tx; 17171635Sbonwick int need_update = B_FALSE; 17188241SJeff.Bonwick@Sun.COM 17198241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 17201601Sbonwick 17211635Sbonwick /* 17221635Sbonwick * Claim log blocks that haven't been committed yet. 17231635Sbonwick * This must all happen in a single txg. 172410922SJeff.Bonwick@Sun.COM * Note: spa_claim_max_txg is updated by spa_claim_notify(), 172510922SJeff.Bonwick@Sun.COM * invoked from zil_claim_log_block()'s i/o done callback. 172610921STim.Haley@Sun.COM * Price of rollback is that we abandon the log. 17271635Sbonwick */ 172810922SJeff.Bonwick@Sun.COM spa->spa_claiming = B_TRUE; 172910922SJeff.Bonwick@Sun.COM 17301601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1731789Sahrens spa_first_txg(spa)); 17327754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 17332417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1734789Sahrens dmu_tx_commit(tx); 1735789Sahrens 173610922SJeff.Bonwick@Sun.COM spa->spa_claiming = B_FALSE; 173710922SJeff.Bonwick@Sun.COM 17389701SGeorge.Wilson@Sun.COM spa->spa_log_state = SPA_LOG_GOOD; 1739789Sahrens spa->spa_sync_on = B_TRUE; 1740789Sahrens txg_sync_start(spa->spa_dsl_pool); 1741789Sahrens 1742789Sahrens /* 174310922SJeff.Bonwick@Sun.COM * Wait for all claims to sync. We sync up to the highest 174410922SJeff.Bonwick@Sun.COM * claimed log block birth time so that claimed log blocks 174510922SJeff.Bonwick@Sun.COM * don't appear to be from the future. spa_claim_max_txg 174610922SJeff.Bonwick@Sun.COM * will have been set for us by either zil_check_log_chain() 174710922SJeff.Bonwick@Sun.COM * (invoked from spa_check_logs()) or zil_claim() above. 1748789Sahrens */ 174910922SJeff.Bonwick@Sun.COM txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 17501585Sbonwick 17511585Sbonwick /* 17521635Sbonwick * If the config cache is stale, or we have uninitialized 17531635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 175410100SLin.Ling@Sun.COM * 175510100SLin.Ling@Sun.COM * If spa_load_verbatim is true, trust the current 175610100SLin.Ling@Sun.COM * in-core spa_config and update the disk labels. 17571585Sbonwick */ 17581635Sbonwick if (config_cache_txg != spa->spa_config_txg || 175910921STim.Haley@Sun.COM state == SPA_LOAD_IMPORT || spa->spa_load_verbatim || 176010921STim.Haley@Sun.COM state == SPA_LOAD_RECOVER) 17611635Sbonwick need_update = B_TRUE; 17621635Sbonwick 17638241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 17641635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 17651635Sbonwick need_update = B_TRUE; 17661585Sbonwick 17671585Sbonwick /* 17681635Sbonwick * Update the config cache asychronously in case we're the 17691635Sbonwick * root pool, in which case the config cache isn't writable yet. 17701585Sbonwick */ 17711635Sbonwick if (need_update) 17721635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 17738241SJeff.Bonwick@Sun.COM 17748241SJeff.Bonwick@Sun.COM /* 17758241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 17768241SJeff.Bonwick@Sun.COM */ 17778241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 17788241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 177910298SMatthew.Ahrens@Sun.COM 178010298SMatthew.Ahrens@Sun.COM /* 178110298SMatthew.Ahrens@Sun.COM * Delete any inconsistent datasets. 178210298SMatthew.Ahrens@Sun.COM */ 178310298SMatthew.Ahrens@Sun.COM (void) dmu_objset_find(spa_name(spa), 178410298SMatthew.Ahrens@Sun.COM dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 178510342Schris.kirby@sun.com 178610342Schris.kirby@sun.com /* 178710342Schris.kirby@sun.com * Clean up any stale temporary dataset userrefs. 178810342Schris.kirby@sun.com */ 178910342Schris.kirby@sun.com dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1790789Sahrens } 1791789Sahrens 17921544Seschrock error = 0; 17931544Seschrock out: 179410921STim.Haley@Sun.COM 17957046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 17962082Seschrock if (error && error != EBADF) 17977294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 17981544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 17991544Seschrock spa->spa_ena = 0; 18001544Seschrock 18011544Seschrock return (error); 1802789Sahrens } 1803789Sahrens 180410921STim.Haley@Sun.COM static int 180510921STim.Haley@Sun.COM spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 180610921STim.Haley@Sun.COM { 180710921STim.Haley@Sun.COM spa_unload(spa); 180810921STim.Haley@Sun.COM spa_deactivate(spa); 180910921STim.Haley@Sun.COM 181010921STim.Haley@Sun.COM spa->spa_load_max_txg--; 181110921STim.Haley@Sun.COM 181210921STim.Haley@Sun.COM spa_activate(spa, spa_mode_global); 181310921STim.Haley@Sun.COM spa_async_suspend(spa); 181410921STim.Haley@Sun.COM 181510921STim.Haley@Sun.COM return (spa_load(spa, state, mosconfig)); 181610921STim.Haley@Sun.COM } 181710921STim.Haley@Sun.COM 181810921STim.Haley@Sun.COM static int 181910921STim.Haley@Sun.COM spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 182010921STim.Haley@Sun.COM uint64_t max_request, boolean_t extreme) 182110921STim.Haley@Sun.COM { 182210921STim.Haley@Sun.COM nvlist_t *config = NULL; 182310921STim.Haley@Sun.COM int load_error, rewind_error; 182410921STim.Haley@Sun.COM uint64_t safe_rollback_txg; 182510921STim.Haley@Sun.COM uint64_t min_txg; 182610921STim.Haley@Sun.COM 1827*11026STim.Haley@Sun.COM if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 182810921STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_load_txg; 1829*11026STim.Haley@Sun.COM spa->spa_log_state = SPA_LOG_CLEAR; 1830*11026STim.Haley@Sun.COM } else { 183110921STim.Haley@Sun.COM spa->spa_load_max_txg = max_request; 1832*11026STim.Haley@Sun.COM } 183310921STim.Haley@Sun.COM 183410921STim.Haley@Sun.COM load_error = rewind_error = spa_load(spa, state, mosconfig); 183510921STim.Haley@Sun.COM if (load_error == 0) 183610921STim.Haley@Sun.COM return (0); 183710921STim.Haley@Sun.COM 183810921STim.Haley@Sun.COM if (spa->spa_root_vdev != NULL) 183910921STim.Haley@Sun.COM config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 184010921STim.Haley@Sun.COM 184110921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 184210921STim.Haley@Sun.COM spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 184310921STim.Haley@Sun.COM 184410921STim.Haley@Sun.COM /* specific txg requested */ 184510921STim.Haley@Sun.COM if (spa->spa_load_max_txg != UINT64_MAX && !extreme) { 184610921STim.Haley@Sun.COM nvlist_free(config); 184710921STim.Haley@Sun.COM return (load_error); 184810921STim.Haley@Sun.COM } 184910921STim.Haley@Sun.COM 185010921STim.Haley@Sun.COM /* Price of rolling back is discarding txgs, including log */ 185110921STim.Haley@Sun.COM if (state == SPA_LOAD_RECOVER) 185210921STim.Haley@Sun.COM spa->spa_log_state = SPA_LOG_CLEAR; 185310921STim.Haley@Sun.COM 185410921STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 185510921STim.Haley@Sun.COM safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE; 185610921STim.Haley@Sun.COM 185710921STim.Haley@Sun.COM min_txg = extreme ? TXG_INITIAL : safe_rollback_txg; 185810921STim.Haley@Sun.COM while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) { 185910921STim.Haley@Sun.COM if (spa->spa_load_max_txg < safe_rollback_txg) 186010921STim.Haley@Sun.COM spa->spa_extreme_rewind = B_TRUE; 186110921STim.Haley@Sun.COM rewind_error = spa_load_retry(spa, state, mosconfig); 186210921STim.Haley@Sun.COM } 186310921STim.Haley@Sun.COM 186410921STim.Haley@Sun.COM if (config) 186510921STim.Haley@Sun.COM spa_rewind_data_to_nvlist(spa, config); 186610921STim.Haley@Sun.COM 186710921STim.Haley@Sun.COM spa->spa_extreme_rewind = B_FALSE; 186810921STim.Haley@Sun.COM spa->spa_load_max_txg = UINT64_MAX; 186910921STim.Haley@Sun.COM 187010921STim.Haley@Sun.COM if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 187110921STim.Haley@Sun.COM spa_config_set(spa, config); 187210921STim.Haley@Sun.COM 187310921STim.Haley@Sun.COM return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); 187410921STim.Haley@Sun.COM } 187510921STim.Haley@Sun.COM 1876789Sahrens /* 1877789Sahrens * Pool Open/Import 1878789Sahrens * 1879789Sahrens * The import case is identical to an open except that the configuration is sent 1880789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1881789Sahrens * case of an open, the pool configuration will exist in the 18824451Seschrock * POOL_STATE_UNINITIALIZED state. 1883789Sahrens * 1884789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1885789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1886789Sahrens * ambiguous state. 1887789Sahrens */ 1888789Sahrens static int 188910921STim.Haley@Sun.COM spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 189010921STim.Haley@Sun.COM nvlist_t **config) 1891789Sahrens { 1892789Sahrens spa_t *spa; 189310921STim.Haley@Sun.COM boolean_t norewind; 189410921STim.Haley@Sun.COM boolean_t extreme; 189510921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 189610921STim.Haley@Sun.COM spa_load_state_t state = SPA_LOAD_OPEN; 1897789Sahrens int error; 1898789Sahrens int locked = B_FALSE; 1899789Sahrens 1900789Sahrens *spapp = NULL; 1901789Sahrens 190210921STim.Haley@Sun.COM zpool_get_rewind_policy(nvpolicy, &policy); 190310921STim.Haley@Sun.COM if (policy.zrp_request & ZPOOL_DO_REWIND) 190410921STim.Haley@Sun.COM state = SPA_LOAD_RECOVER; 190510921STim.Haley@Sun.COM norewind = (policy.zrp_request == ZPOOL_NO_REWIND); 190610921STim.Haley@Sun.COM extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0); 190710921STim.Haley@Sun.COM 1908789Sahrens /* 1909789Sahrens * As disgusting as this is, we need to support recursive calls to this 1910789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1911789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1912789Sahrens * avoid dsl_dir_open() calling this in the first place. 1913789Sahrens */ 1914789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1915789Sahrens mutex_enter(&spa_namespace_lock); 1916789Sahrens locked = B_TRUE; 1917789Sahrens } 1918789Sahrens 1919789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1920789Sahrens if (locked) 1921789Sahrens mutex_exit(&spa_namespace_lock); 1922789Sahrens return (ENOENT); 1923789Sahrens } 192410921STim.Haley@Sun.COM 1925789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1926789Sahrens 19278241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 1928789Sahrens 192910921STim.Haley@Sun.COM if (spa->spa_last_open_failed && norewind) { 193010921STim.Haley@Sun.COM if (config != NULL && spa->spa_config) 193110921STim.Haley@Sun.COM VERIFY(nvlist_dup(spa->spa_config, 193210921STim.Haley@Sun.COM config, KM_SLEEP) == 0); 193310921STim.Haley@Sun.COM spa_deactivate(spa); 193410921STim.Haley@Sun.COM if (locked) 193510921STim.Haley@Sun.COM mutex_exit(&spa_namespace_lock); 193610921STim.Haley@Sun.COM return (spa->spa_last_open_failed); 193710921STim.Haley@Sun.COM } 193810921STim.Haley@Sun.COM 193910921STim.Haley@Sun.COM if (state != SPA_LOAD_RECOVER) 194010921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 194110921STim.Haley@Sun.COM 194210921STim.Haley@Sun.COM error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 194310921STim.Haley@Sun.COM extreme); 1944789Sahrens 1945789Sahrens if (error == EBADF) { 1946789Sahrens /* 19471986Seschrock * If vdev_validate() returns failure (indicated by 19481986Seschrock * EBADF), it indicates that one of the vdevs indicates 19491986Seschrock * that the pool has been exported or destroyed. If 19501986Seschrock * this is the case, the config cache is out of sync and 19511986Seschrock * we should remove the pool from the namespace. 1952789Sahrens */ 1953789Sahrens spa_unload(spa); 1954789Sahrens spa_deactivate(spa); 19556643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1956789Sahrens spa_remove(spa); 1957789Sahrens if (locked) 1958789Sahrens mutex_exit(&spa_namespace_lock); 1959789Sahrens return (ENOENT); 19601544Seschrock } 19611544Seschrock 19621544Seschrock if (error) { 1963789Sahrens /* 1964789Sahrens * We can't open the pool, but we still have useful 1965789Sahrens * information: the state of each vdev after the 1966789Sahrens * attempted vdev_open(). Return this to the user. 1967789Sahrens */ 196810921STim.Haley@Sun.COM if (config != NULL && spa->spa_config) 196910921STim.Haley@Sun.COM VERIFY(nvlist_dup(spa->spa_config, config, 197010921STim.Haley@Sun.COM KM_SLEEP) == 0); 1971789Sahrens spa_unload(spa); 1972789Sahrens spa_deactivate(spa); 197310921STim.Haley@Sun.COM spa->spa_last_open_failed = error; 1974789Sahrens if (locked) 1975789Sahrens mutex_exit(&spa_namespace_lock); 1976789Sahrens *spapp = NULL; 1977789Sahrens return (error); 1978789Sahrens } 197910921STim.Haley@Sun.COM 1980789Sahrens } 1981789Sahrens 1982789Sahrens spa_open_ref(spa, tag); 19834451Seschrock 198410921STim.Haley@Sun.COM 198510921STim.Haley@Sun.COM if (config != NULL) 198610921STim.Haley@Sun.COM *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 198710921STim.Haley@Sun.COM 1988*11026STim.Haley@Sun.COM if (locked) { 1989*11026STim.Haley@Sun.COM spa->spa_last_open_failed = 0; 1990*11026STim.Haley@Sun.COM spa->spa_last_ubsync_txg = 0; 1991*11026STim.Haley@Sun.COM spa->spa_load_txg = 0; 1992789Sahrens mutex_exit(&spa_namespace_lock); 1993*11026STim.Haley@Sun.COM } 1994789Sahrens 1995789Sahrens *spapp = spa; 1996789Sahrens 1997789Sahrens return (0); 1998789Sahrens } 1999789Sahrens 2000789Sahrens int 200110921STim.Haley@Sun.COM spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 200210921STim.Haley@Sun.COM nvlist_t **config) 200310921STim.Haley@Sun.COM { 200410921STim.Haley@Sun.COM return (spa_open_common(name, spapp, tag, policy, config)); 200510921STim.Haley@Sun.COM } 200610921STim.Haley@Sun.COM 200710921STim.Haley@Sun.COM int 2008789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 2009789Sahrens { 201010921STim.Haley@Sun.COM return (spa_open_common(name, spapp, tag, NULL, NULL)); 2011789Sahrens } 2012789Sahrens 20131544Seschrock /* 20141544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 20151544Seschrock * preventing it from being exported or destroyed. 20161544Seschrock */ 20171544Seschrock spa_t * 20181544Seschrock spa_inject_addref(char *name) 20191544Seschrock { 20201544Seschrock spa_t *spa; 20211544Seschrock 20221544Seschrock mutex_enter(&spa_namespace_lock); 20231544Seschrock if ((spa = spa_lookup(name)) == NULL) { 20241544Seschrock mutex_exit(&spa_namespace_lock); 20251544Seschrock return (NULL); 20261544Seschrock } 20271544Seschrock spa->spa_inject_ref++; 20281544Seschrock mutex_exit(&spa_namespace_lock); 20291544Seschrock 20301544Seschrock return (spa); 20311544Seschrock } 20321544Seschrock 20331544Seschrock void 20341544Seschrock spa_inject_delref(spa_t *spa) 20351544Seschrock { 20361544Seschrock mutex_enter(&spa_namespace_lock); 20371544Seschrock spa->spa_inject_ref--; 20381544Seschrock mutex_exit(&spa_namespace_lock); 20391544Seschrock } 20401544Seschrock 20415450Sbrendan /* 20425450Sbrendan * Add spares device information to the nvlist. 20435450Sbrendan */ 20442082Seschrock static void 20452082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 20462082Seschrock { 20472082Seschrock nvlist_t **spares; 20482082Seschrock uint_t i, nspares; 20492082Seschrock nvlist_t *nvroot; 20502082Seschrock uint64_t guid; 20512082Seschrock vdev_stat_t *vs; 20522082Seschrock uint_t vsc; 20533377Seschrock uint64_t pool; 20542082Seschrock 20559425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 20569425SEric.Schrock@Sun.COM 20575450Sbrendan if (spa->spa_spares.sav_count == 0) 20582082Seschrock return; 20592082Seschrock 20602082Seschrock VERIFY(nvlist_lookup_nvlist(config, 20612082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 20625450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 20632082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 20642082Seschrock if (nspares != 0) { 20652082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 20662082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 20672082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 20682082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 20692082Seschrock 20702082Seschrock /* 20712082Seschrock * Go through and find any spares which have since been 20722082Seschrock * repurposed as an active spare. If this is the case, update 20732082Seschrock * their status appropriately. 20742082Seschrock */ 20752082Seschrock for (i = 0; i < nspares; i++) { 20762082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 20772082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 20787214Slling if (spa_spare_exists(guid, &pool, NULL) && 20797214Slling pool != 0ULL) { 20802082Seschrock VERIFY(nvlist_lookup_uint64_array( 20812082Seschrock spares[i], ZPOOL_CONFIG_STATS, 20822082Seschrock (uint64_t **)&vs, &vsc) == 0); 20832082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 20842082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 20852082Seschrock } 20862082Seschrock } 20872082Seschrock } 20882082Seschrock } 20892082Seschrock 20905450Sbrendan /* 20915450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 20925450Sbrendan */ 20935450Sbrendan static void 20945450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 20955450Sbrendan { 20965450Sbrendan nvlist_t **l2cache; 20975450Sbrendan uint_t i, j, nl2cache; 20985450Sbrendan nvlist_t *nvroot; 20995450Sbrendan uint64_t guid; 21005450Sbrendan vdev_t *vd; 21015450Sbrendan vdev_stat_t *vs; 21025450Sbrendan uint_t vsc; 21035450Sbrendan 21049425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 21059425SEric.Schrock@Sun.COM 21065450Sbrendan if (spa->spa_l2cache.sav_count == 0) 21075450Sbrendan return; 21085450Sbrendan 21095450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 21105450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 21115450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 21125450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 21135450Sbrendan if (nl2cache != 0) { 21145450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 21155450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 21165450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 21175450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 21185450Sbrendan 21195450Sbrendan /* 21205450Sbrendan * Update level 2 cache device stats. 21215450Sbrendan */ 21225450Sbrendan 21235450Sbrendan for (i = 0; i < nl2cache; i++) { 21245450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 21255450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 21265450Sbrendan 21275450Sbrendan vd = NULL; 21285450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 21295450Sbrendan if (guid == 21305450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 21315450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 21325450Sbrendan break; 21335450Sbrendan } 21345450Sbrendan } 21355450Sbrendan ASSERT(vd != NULL); 21365450Sbrendan 21375450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 21385450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 21395450Sbrendan vdev_get_stats(vd, vs); 21405450Sbrendan } 21415450Sbrendan } 21425450Sbrendan } 21435450Sbrendan 2144789Sahrens int 21451544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 2146789Sahrens { 2147789Sahrens int error; 2148789Sahrens spa_t *spa; 2149789Sahrens 2150789Sahrens *config = NULL; 215110921STim.Haley@Sun.COM error = spa_open_common(name, &spa, FTAG, NULL, config); 2152789Sahrens 21539425SEric.Schrock@Sun.COM if (spa != NULL) { 21549425SEric.Schrock@Sun.COM /* 21559425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 21569425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 21579425SEric.Schrock@Sun.COM * self-inconsistent. 21589425SEric.Schrock@Sun.COM */ 21599425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 21609425SEric.Schrock@Sun.COM 21619425SEric.Schrock@Sun.COM if (*config != NULL) { 21627754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 21639425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 21649425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 21659425SEric.Schrock@Sun.COM 21669425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 21679425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 21689425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 21699425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 21709425SEric.Schrock@Sun.COM 21719425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 21729425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 21739425SEric.Schrock@Sun.COM } 21742082Seschrock } 21752082Seschrock 21761544Seschrock /* 21771544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 21781544Seschrock * and call spa_lookup() directly. 21791544Seschrock */ 21801544Seschrock if (altroot) { 21811544Seschrock if (spa == NULL) { 21821544Seschrock mutex_enter(&spa_namespace_lock); 21831544Seschrock spa = spa_lookup(name); 21841544Seschrock if (spa) 21851544Seschrock spa_altroot(spa, altroot, buflen); 21861544Seschrock else 21871544Seschrock altroot[0] = '\0'; 21881544Seschrock spa = NULL; 21891544Seschrock mutex_exit(&spa_namespace_lock); 21901544Seschrock } else { 21911544Seschrock spa_altroot(spa, altroot, buflen); 21921544Seschrock } 21931544Seschrock } 21941544Seschrock 21959425SEric.Schrock@Sun.COM if (spa != NULL) { 21969425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2197789Sahrens spa_close(spa, FTAG); 21989425SEric.Schrock@Sun.COM } 2199789Sahrens 2200789Sahrens return (error); 2201789Sahrens } 2202789Sahrens 2203789Sahrens /* 22045450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 22055450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 22065450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 22075450Sbrendan * specified, as long as they are well-formed. 22082082Seschrock */ 22092082Seschrock static int 22105450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 22115450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 22125450Sbrendan vdev_labeltype_t label) 22132082Seschrock { 22145450Sbrendan nvlist_t **dev; 22155450Sbrendan uint_t i, ndev; 22162082Seschrock vdev_t *vd; 22172082Seschrock int error; 22182082Seschrock 22197754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 22207754SJeff.Bonwick@Sun.COM 22212082Seschrock /* 22225450Sbrendan * It's acceptable to have no devs specified. 22232082Seschrock */ 22245450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 22252082Seschrock return (0); 22262082Seschrock 22275450Sbrendan if (ndev == 0) 22282082Seschrock return (EINVAL); 22292082Seschrock 22302082Seschrock /* 22315450Sbrendan * Make sure the pool is formatted with a version that supports this 22325450Sbrendan * device type. 22332082Seschrock */ 22345450Sbrendan if (spa_version(spa) < version) 22352082Seschrock return (ENOTSUP); 22362082Seschrock 22373377Seschrock /* 22385450Sbrendan * Set the pending device list so we correctly handle device in-use 22393377Seschrock * checking. 22403377Seschrock */ 22415450Sbrendan sav->sav_pending = dev; 22425450Sbrendan sav->sav_npending = ndev; 22435450Sbrendan 22445450Sbrendan for (i = 0; i < ndev; i++) { 22455450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 22462082Seschrock mode)) != 0) 22473377Seschrock goto out; 22482082Seschrock 22492082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 22502082Seschrock vdev_free(vd); 22513377Seschrock error = EINVAL; 22523377Seschrock goto out; 22532082Seschrock } 22542082Seschrock 22555450Sbrendan /* 22567754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 22577754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 22585450Sbrendan */ 22597754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 22605450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 22615450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 22625450Sbrendan error = ENOTBLK; 22635450Sbrendan goto out; 22645450Sbrendan } 22657754SJeff.Bonwick@Sun.COM #endif 22662082Seschrock vd->vdev_top = vd; 22673377Seschrock 22683377Seschrock if ((error = vdev_open(vd)) == 0 && 22695450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 22705450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 22713377Seschrock vd->vdev_guid) == 0); 22722082Seschrock } 22732082Seschrock 22742082Seschrock vdev_free(vd); 22753377Seschrock 22765450Sbrendan if (error && 22775450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 22783377Seschrock goto out; 22793377Seschrock else 22803377Seschrock error = 0; 22812082Seschrock } 22822082Seschrock 22833377Seschrock out: 22845450Sbrendan sav->sav_pending = NULL; 22855450Sbrendan sav->sav_npending = 0; 22863377Seschrock return (error); 22872082Seschrock } 22882082Seschrock 22895450Sbrendan static int 22905450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 22915450Sbrendan { 22925450Sbrendan int error; 22935450Sbrendan 22947754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 22957754SJeff.Bonwick@Sun.COM 22965450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 22975450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 22985450Sbrendan VDEV_LABEL_SPARE)) != 0) { 22995450Sbrendan return (error); 23005450Sbrendan } 23015450Sbrendan 23025450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 23035450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 23045450Sbrendan VDEV_LABEL_L2CACHE)); 23055450Sbrendan } 23065450Sbrendan 23075450Sbrendan static void 23085450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 23095450Sbrendan const char *config) 23105450Sbrendan { 23115450Sbrendan int i; 23125450Sbrendan 23135450Sbrendan if (sav->sav_config != NULL) { 23145450Sbrendan nvlist_t **olddevs; 23155450Sbrendan uint_t oldndevs; 23165450Sbrendan nvlist_t **newdevs; 23175450Sbrendan 23185450Sbrendan /* 23195450Sbrendan * Generate new dev list by concatentating with the 23205450Sbrendan * current dev list. 23215450Sbrendan */ 23225450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 23235450Sbrendan &olddevs, &oldndevs) == 0); 23245450Sbrendan 23255450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 23265450Sbrendan (ndevs + oldndevs), KM_SLEEP); 23275450Sbrendan for (i = 0; i < oldndevs; i++) 23285450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 23295450Sbrendan KM_SLEEP) == 0); 23305450Sbrendan for (i = 0; i < ndevs; i++) 23315450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 23325450Sbrendan KM_SLEEP) == 0); 23335450Sbrendan 23345450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 23355450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 23365450Sbrendan 23375450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 23385450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 23395450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 23405450Sbrendan nvlist_free(newdevs[i]); 23415450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 23425450Sbrendan } else { 23435450Sbrendan /* 23445450Sbrendan * Generate a new dev list. 23455450Sbrendan */ 23465450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 23475450Sbrendan KM_SLEEP) == 0); 23485450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 23495450Sbrendan devs, ndevs) == 0); 23505450Sbrendan } 23515450Sbrendan } 23525450Sbrendan 23535450Sbrendan /* 23545450Sbrendan * Stop and drop level 2 ARC devices 23555450Sbrendan */ 23565450Sbrendan void 23575450Sbrendan spa_l2cache_drop(spa_t *spa) 23585450Sbrendan { 23595450Sbrendan vdev_t *vd; 23605450Sbrendan int i; 23615450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 23625450Sbrendan 23635450Sbrendan for (i = 0; i < sav->sav_count; i++) { 23645450Sbrendan uint64_t pool; 23655450Sbrendan 23665450Sbrendan vd = sav->sav_vdevs[i]; 23675450Sbrendan ASSERT(vd != NULL); 23685450Sbrendan 23698241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 23708241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 23715450Sbrendan l2arc_remove_vdev(vd); 23725450Sbrendan if (vd->vdev_isl2cache) 23735450Sbrendan spa_l2cache_remove(vd); 23745450Sbrendan vdev_clear_stats(vd); 23755450Sbrendan (void) vdev_close(vd); 23765450Sbrendan } 23775450Sbrendan } 23785450Sbrendan 23792082Seschrock /* 2380789Sahrens * Pool Creation 2381789Sahrens */ 2382789Sahrens int 23835094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 23847184Stimh const char *history_str, nvlist_t *zplprops) 2385789Sahrens { 2386789Sahrens spa_t *spa; 23875094Slling char *altroot = NULL; 23881635Sbonwick vdev_t *rvd; 2389789Sahrens dsl_pool_t *dp; 2390789Sahrens dmu_tx_t *tx; 23919816SGeorge.Wilson@Sun.COM int error = 0; 2392789Sahrens uint64_t txg = TXG_INITIAL; 23935450Sbrendan nvlist_t **spares, **l2cache; 23945450Sbrendan uint_t nspares, nl2cache; 23955094Slling uint64_t version; 2396789Sahrens 2397789Sahrens /* 2398789Sahrens * If this pool already exists, return failure. 2399789Sahrens */ 2400789Sahrens mutex_enter(&spa_namespace_lock); 2401789Sahrens if (spa_lookup(pool) != NULL) { 2402789Sahrens mutex_exit(&spa_namespace_lock); 2403789Sahrens return (EEXIST); 2404789Sahrens } 2405789Sahrens 2406789Sahrens /* 2407789Sahrens * Allocate a new spa_t structure. 2408789Sahrens */ 24095094Slling (void) nvlist_lookup_string(props, 24105094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 241110921STim.Haley@Sun.COM spa = spa_add(pool, NULL, altroot); 24128241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2413789Sahrens 24145094Slling if (props && (error = spa_prop_validate(spa, props))) { 24155094Slling spa_deactivate(spa); 24165094Slling spa_remove(spa); 24176643Seschrock mutex_exit(&spa_namespace_lock); 24185094Slling return (error); 24195094Slling } 24205094Slling 24215094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 24225094Slling &version) != 0) 24235094Slling version = SPA_VERSION; 24245094Slling ASSERT(version <= SPA_VERSION); 242510922SJeff.Bonwick@Sun.COM 242610922SJeff.Bonwick@Sun.COM spa->spa_first_txg = txg; 242710922SJeff.Bonwick@Sun.COM spa->spa_uberblock.ub_txg = txg - 1; 24285094Slling spa->spa_uberblock.ub_version = version; 2429789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2430789Sahrens 24311635Sbonwick /* 24329234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 24339234SGeorge.Wilson@Sun.COM */ 24349630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 24359630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 24369234SGeorge.Wilson@Sun.COM 24379234SGeorge.Wilson@Sun.COM /* 24381635Sbonwick * Create the root vdev. 24391635Sbonwick */ 24407754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24411635Sbonwick 24422082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 24432082Seschrock 24442082Seschrock ASSERT(error != 0 || rvd != NULL); 24452082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 24462082Seschrock 24475913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 24481635Sbonwick error = EINVAL; 24492082Seschrock 24502082Seschrock if (error == 0 && 24512082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 24525450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 24532082Seschrock VDEV_ALLOC_ADD)) == 0) { 24549816SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 24559816SGeorge.Wilson@Sun.COM vdev_metaslab_set_size(rvd->vdev_child[c]); 24569816SGeorge.Wilson@Sun.COM vdev_expand(rvd->vdev_child[c], txg); 24579816SGeorge.Wilson@Sun.COM } 24581635Sbonwick } 24591635Sbonwick 24607754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2461789Sahrens 24622082Seschrock if (error != 0) { 2463789Sahrens spa_unload(spa); 2464789Sahrens spa_deactivate(spa); 2465789Sahrens spa_remove(spa); 2466789Sahrens mutex_exit(&spa_namespace_lock); 2467789Sahrens return (error); 2468789Sahrens } 2469789Sahrens 24702082Seschrock /* 24712082Seschrock * Get the list of spares, if specified. 24722082Seschrock */ 24732082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 24742082Seschrock &spares, &nspares) == 0) { 24755450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 24762082Seschrock KM_SLEEP) == 0); 24775450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 24782082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 24797754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24802082Seschrock spa_load_spares(spa); 24817754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24825450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 24835450Sbrendan } 24845450Sbrendan 24855450Sbrendan /* 24865450Sbrendan * Get the list of level 2 cache devices, if specified. 24875450Sbrendan */ 24885450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 24895450Sbrendan &l2cache, &nl2cache) == 0) { 24905450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 24915450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 24925450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 24935450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 24947754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24955450Sbrendan spa_load_l2cache(spa); 24967754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24975450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 24982082Seschrock } 24992082Seschrock 25007184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2501789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2502789Sahrens 250310956SGeorge.Wilson@Sun.COM /* 250410956SGeorge.Wilson@Sun.COM * Create DDTs (dedup tables). 250510956SGeorge.Wilson@Sun.COM */ 250610956SGeorge.Wilson@Sun.COM ddt_create(spa); 250710956SGeorge.Wilson@Sun.COM 250810956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 250910956SGeorge.Wilson@Sun.COM 2510789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2511789Sahrens 2512789Sahrens /* 2513789Sahrens * Create the pool config object. 2514789Sahrens */ 2515789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 25167497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2517789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2518789Sahrens 25191544Seschrock if (zap_add(spa->spa_meta_objset, 2520789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 25211544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 25221544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 25231544Seschrock } 2524789Sahrens 25255094Slling /* Newly created pools with the right version are always deflated. */ 25265094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 25275094Slling spa->spa_deflate = TRUE; 25285094Slling if (zap_add(spa->spa_meta_objset, 25295094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 25305094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 25315094Slling cmn_err(CE_PANIC, "failed to add deflate"); 25325094Slling } 25332082Seschrock } 25342082Seschrock 2535789Sahrens /* 2536789Sahrens * Create the deferred-free bplist object. Turn off compression 2537789Sahrens * because sync-to-convergence takes longer if the blocksize 2538789Sahrens * keeps changing. 2539789Sahrens */ 254010922SJeff.Bonwick@Sun.COM spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset, 2541789Sahrens 1 << 14, tx); 254210922SJeff.Bonwick@Sun.COM dmu_object_set_compress(spa->spa_meta_objset, 254310922SJeff.Bonwick@Sun.COM spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx); 2544789Sahrens 25451544Seschrock if (zap_add(spa->spa_meta_objset, 2546789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 254710922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) { 25481544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 25491544Seschrock } 2550789Sahrens 25512926Sek110237 /* 25522926Sek110237 * Create the pool's history object. 25532926Sek110237 */ 25545094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 25555094Slling spa_history_create_obj(spa, tx); 25565094Slling 25575094Slling /* 25585094Slling * Set pool properties. 25595094Slling */ 25605094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 25615094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 25625329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 25639816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 256410922SJeff.Bonwick@Sun.COM 25658525SEric.Schrock@Sun.COM if (props != NULL) { 25668525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25675094Slling spa_sync_props(spa, props, CRED(), tx); 25688525SEric.Schrock@Sun.COM } 25692926Sek110237 2570789Sahrens dmu_tx_commit(tx); 2571789Sahrens 2572789Sahrens spa->spa_sync_on = B_TRUE; 2573789Sahrens txg_sync_start(spa->spa_dsl_pool); 2574789Sahrens 2575789Sahrens /* 2576789Sahrens * We explicitly wait for the first transaction to complete so that our 2577789Sahrens * bean counters are appropriately updated. 2578789Sahrens */ 2579789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2580789Sahrens 25816643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2582789Sahrens 25835094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 25844715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 25859946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_CREATE); 25864715Sek110237 25878667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 25888667SGeorge.Wilson@Sun.COM 2589789Sahrens mutex_exit(&spa_namespace_lock); 2590789Sahrens 2591789Sahrens return (0); 2592789Sahrens } 2593789Sahrens 25946423Sgw25295 #ifdef _KERNEL 25956423Sgw25295 /* 25969790SLin.Ling@Sun.COM * Get the root pool information from the root disk, then import the root pool 25979790SLin.Ling@Sun.COM * during the system boot up time. 25986423Sgw25295 */ 25999790SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 26009790SLin.Ling@Sun.COM 26019790SLin.Ling@Sun.COM static nvlist_t * 26029790SLin.Ling@Sun.COM spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 26036423Sgw25295 { 26049790SLin.Ling@Sun.COM nvlist_t *config; 26056423Sgw25295 nvlist_t *nvtop, *nvroot; 26066423Sgw25295 uint64_t pgid; 26076423Sgw25295 26089790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 26099790SLin.Ling@Sun.COM return (NULL); 26109790SLin.Ling@Sun.COM 26116423Sgw25295 /* 26126423Sgw25295 * Add this top-level vdev to the child array. 26136423Sgw25295 */ 26149790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 26159790SLin.Ling@Sun.COM &nvtop) == 0); 26169790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 26179790SLin.Ling@Sun.COM &pgid) == 0); 26189790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 26196423Sgw25295 26206423Sgw25295 /* 26216423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 26226423Sgw25295 */ 26236423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 26249790SLin.Ling@Sun.COM VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 26259790SLin.Ling@Sun.COM VDEV_TYPE_ROOT) == 0); 26266423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 26276423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 26286423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 26296423Sgw25295 &nvtop, 1) == 0); 26306423Sgw25295 26316423Sgw25295 /* 26326423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 26336423Sgw25295 * this pool's configuration (remove the old, add the new). 26346423Sgw25295 */ 26356423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 26366423Sgw25295 nvlist_free(nvroot); 26379790SLin.Ling@Sun.COM return (config); 26386423Sgw25295 } 26396423Sgw25295 26406423Sgw25295 /* 26419790SLin.Ling@Sun.COM * Walk the vdev tree and see if we can find a device with "better" 26429790SLin.Ling@Sun.COM * configuration. A configuration is "better" if the label on that 26439790SLin.Ling@Sun.COM * device has a more recent txg. 26446423Sgw25295 */ 26459790SLin.Ling@Sun.COM static void 26469790SLin.Ling@Sun.COM spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 26477147Staylor { 26489816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 26499790SLin.Ling@Sun.COM spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 26509790SLin.Ling@Sun.COM 26519790SLin.Ling@Sun.COM if (vd->vdev_ops->vdev_op_leaf) { 26529790SLin.Ling@Sun.COM nvlist_t *label; 26539790SLin.Ling@Sun.COM uint64_t label_txg; 26549790SLin.Ling@Sun.COM 26559790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 26569790SLin.Ling@Sun.COM &label) != 0) 26579790SLin.Ling@Sun.COM return; 26589790SLin.Ling@Sun.COM 26599790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 26609790SLin.Ling@Sun.COM &label_txg) == 0); 26619790SLin.Ling@Sun.COM 26629790SLin.Ling@Sun.COM /* 26639790SLin.Ling@Sun.COM * Do we have a better boot device? 26649790SLin.Ling@Sun.COM */ 26659790SLin.Ling@Sun.COM if (label_txg > *txg) { 26669790SLin.Ling@Sun.COM *txg = label_txg; 26679790SLin.Ling@Sun.COM *avd = vd; 26687147Staylor } 26699790SLin.Ling@Sun.COM nvlist_free(label); 26707147Staylor } 26717147Staylor } 26727147Staylor 26736423Sgw25295 /* 26746423Sgw25295 * Import a root pool. 26756423Sgw25295 * 26767147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 26777147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 26787147Staylor * The GRUB "findroot" command will return the vdev we should boot. 26796423Sgw25295 * 26806423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 26816423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 26826423Sgw25295 * e.g. 26836423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 26846423Sgw25295 */ 26856423Sgw25295 int 26867147Staylor spa_import_rootpool(char *devpath, char *devid) 26876423Sgw25295 { 26889790SLin.Ling@Sun.COM spa_t *spa; 26899790SLin.Ling@Sun.COM vdev_t *rvd, *bvd, *avd = NULL; 26909790SLin.Ling@Sun.COM nvlist_t *config, *nvtop; 26919790SLin.Ling@Sun.COM uint64_t guid, txg; 26926423Sgw25295 char *pname; 26936423Sgw25295 int error; 26946423Sgw25295 26956423Sgw25295 /* 26969790SLin.Ling@Sun.COM * Read the label from the boot device and generate a configuration. 26976423Sgw25295 */ 269810822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 269910822SJack.Meng@Sun.COM #if defined(_OBP) && defined(_KERNEL) 270010822SJack.Meng@Sun.COM if (config == NULL) { 270110822SJack.Meng@Sun.COM if (strstr(devpath, "/iscsi/ssd") != NULL) { 270210822SJack.Meng@Sun.COM /* iscsi boot */ 270310822SJack.Meng@Sun.COM get_iscsi_bootpath_phy(devpath); 270410822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 270510822SJack.Meng@Sun.COM } 270610822SJack.Meng@Sun.COM } 270710822SJack.Meng@Sun.COM #endif 270810822SJack.Meng@Sun.COM if (config == NULL) { 27099790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 27109790SLin.Ling@Sun.COM devpath); 27119790SLin.Ling@Sun.COM return (EIO); 27129790SLin.Ling@Sun.COM } 27139790SLin.Ling@Sun.COM 27149790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 27159790SLin.Ling@Sun.COM &pname) == 0); 27169790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 27176423Sgw25295 27189425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 27199425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 27209425SEric.Schrock@Sun.COM /* 27219425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 27229425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 27239425SEric.Schrock@Sun.COM */ 27249425SEric.Schrock@Sun.COM spa_remove(spa); 27259425SEric.Schrock@Sun.COM } 27269425SEric.Schrock@Sun.COM 272710921STim.Haley@Sun.COM spa = spa_add(pname, config, NULL); 27289425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 272910100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 27309790SLin.Ling@Sun.COM 27319790SLin.Ling@Sun.COM /* 27329790SLin.Ling@Sun.COM * Build up a vdev tree based on the boot device's label config. 27339790SLin.Ling@Sun.COM */ 27349790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 27359790SLin.Ling@Sun.COM &nvtop) == 0); 27369790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 27379790SLin.Ling@Sun.COM error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 27389790SLin.Ling@Sun.COM VDEV_ALLOC_ROOTPOOL); 27399790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 27409790SLin.Ling@Sun.COM if (error) { 27419790SLin.Ling@Sun.COM mutex_exit(&spa_namespace_lock); 27429790SLin.Ling@Sun.COM nvlist_free(config); 27439790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 27449790SLin.Ling@Sun.COM pname); 27459790SLin.Ling@Sun.COM return (error); 27469790SLin.Ling@Sun.COM } 27479790SLin.Ling@Sun.COM 27489790SLin.Ling@Sun.COM /* 27499790SLin.Ling@Sun.COM * Get the boot vdev. 27509790SLin.Ling@Sun.COM */ 27519790SLin.Ling@Sun.COM if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 27529790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 27539790SLin.Ling@Sun.COM (u_longlong_t)guid); 27549790SLin.Ling@Sun.COM error = ENOENT; 27559790SLin.Ling@Sun.COM goto out; 27569790SLin.Ling@Sun.COM } 27579790SLin.Ling@Sun.COM 27589790SLin.Ling@Sun.COM /* 27599790SLin.Ling@Sun.COM * Determine if there is a better boot device. 27609790SLin.Ling@Sun.COM */ 27619790SLin.Ling@Sun.COM avd = bvd; 27629790SLin.Ling@Sun.COM spa_alt_rootvdev(rvd, &avd, &txg); 27639790SLin.Ling@Sun.COM if (avd != bvd) { 27649790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 27659790SLin.Ling@Sun.COM "try booting from '%s'", avd->vdev_path); 27669790SLin.Ling@Sun.COM error = EINVAL; 27679790SLin.Ling@Sun.COM goto out; 27689790SLin.Ling@Sun.COM } 27699790SLin.Ling@Sun.COM 27709790SLin.Ling@Sun.COM /* 27719790SLin.Ling@Sun.COM * If the boot device is part of a spare vdev then ensure that 27729790SLin.Ling@Sun.COM * we're booting off the active spare. 27739790SLin.Ling@Sun.COM */ 27749790SLin.Ling@Sun.COM if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 27759790SLin.Ling@Sun.COM !bvd->vdev_isspare) { 27769790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is currently spared. Please " 27779790SLin.Ling@Sun.COM "try booting from '%s'", 27789790SLin.Ling@Sun.COM bvd->vdev_parent->vdev_child[1]->vdev_path); 27799790SLin.Ling@Sun.COM error = EINVAL; 27809790SLin.Ling@Sun.COM goto out; 27819790SLin.Ling@Sun.COM } 27829790SLin.Ling@Sun.COM 27839790SLin.Ling@Sun.COM error = 0; 27849946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 27859790SLin.Ling@Sun.COM out: 27869790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 27879790SLin.Ling@Sun.COM vdev_free(rvd); 27889790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 27899425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 27906423Sgw25295 27919790SLin.Ling@Sun.COM nvlist_free(config); 27926423Sgw25295 return (error); 27936423Sgw25295 } 27949790SLin.Ling@Sun.COM 27956423Sgw25295 #endif 27966423Sgw25295 27976423Sgw25295 /* 27989425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 27999425SEric.Schrock@Sun.COM * boot. 28009425SEric.Schrock@Sun.COM */ 28019425SEric.Schrock@Sun.COM int 28029425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 28039425SEric.Schrock@Sun.COM { 28049425SEric.Schrock@Sun.COM spa_t *spa; 280510921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 28069425SEric.Schrock@Sun.COM char *altroot = NULL; 28079425SEric.Schrock@Sun.COM 28089425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 28099425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 28109425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 28119425SEric.Schrock@Sun.COM return (EEXIST); 28129425SEric.Schrock@Sun.COM } 28139425SEric.Schrock@Sun.COM 28149425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 28159425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 281610921STim.Haley@Sun.COM spa = spa_add(pool, config, altroot); 281710921STim.Haley@Sun.COM 281810921STim.Haley@Sun.COM zpool_get_rewind_policy(config, &policy); 281910921STim.Haley@Sun.COM spa->spa_load_max_txg = policy.zrp_txg; 28209425SEric.Schrock@Sun.COM 282110100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 282210000SVictor.Latushkin@Sun.COM 28239425SEric.Schrock@Sun.COM if (props != NULL) 28249425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 28259425SEric.Schrock@Sun.COM 28269425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 28279425SEric.Schrock@Sun.COM 28289425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 28299946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 28309425SEric.Schrock@Sun.COM 28319425SEric.Schrock@Sun.COM return (0); 28329425SEric.Schrock@Sun.COM } 28339425SEric.Schrock@Sun.COM 28349425SEric.Schrock@Sun.COM /* 28356423Sgw25295 * Import a non-root pool into the system. 28366423Sgw25295 */ 28376423Sgw25295 int 28386423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 28396423Sgw25295 { 28409425SEric.Schrock@Sun.COM spa_t *spa; 28419425SEric.Schrock@Sun.COM char *altroot = NULL; 284210921STim.Haley@Sun.COM spa_load_state_t state = SPA_LOAD_IMPORT; 284310921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 28449425SEric.Schrock@Sun.COM int error; 28459425SEric.Schrock@Sun.COM nvlist_t *nvroot; 28469425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 28479425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 28489425SEric.Schrock@Sun.COM 28499425SEric.Schrock@Sun.COM /* 28509425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 28519425SEric.Schrock@Sun.COM */ 28529425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 28539425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 28549425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 28559425SEric.Schrock@Sun.COM return (EEXIST); 28569425SEric.Schrock@Sun.COM } 28579425SEric.Schrock@Sun.COM 285810921STim.Haley@Sun.COM zpool_get_rewind_policy(config, &policy); 285910921STim.Haley@Sun.COM if (policy.zrp_request & ZPOOL_DO_REWIND) 286010921STim.Haley@Sun.COM state = SPA_LOAD_RECOVER; 286110921STim.Haley@Sun.COM 28629425SEric.Schrock@Sun.COM /* 28639425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 28649425SEric.Schrock@Sun.COM */ 28659425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 28669425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 286710921STim.Haley@Sun.COM spa = spa_add(pool, config, altroot); 28689425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 28699425SEric.Schrock@Sun.COM 28709425SEric.Schrock@Sun.COM /* 28719630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 28729630SJeff.Bonwick@Sun.COM */ 28739630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 28749630SJeff.Bonwick@Sun.COM 28759630SJeff.Bonwick@Sun.COM /* 28769425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 28779425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 28789425SEric.Schrock@Sun.COM * doing an import. 28799425SEric.Schrock@Sun.COM */ 288010921STim.Haley@Sun.COM if (state != SPA_LOAD_RECOVER) 288110921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 288210921STim.Haley@Sun.COM error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 288310921STim.Haley@Sun.COM ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0)); 288410921STim.Haley@Sun.COM 288510921STim.Haley@Sun.COM /* 288610921STim.Haley@Sun.COM * Propagate anything learned about failing or best txgs 288710921STim.Haley@Sun.COM * back to caller 288810921STim.Haley@Sun.COM */ 288910921STim.Haley@Sun.COM spa_rewind_data_to_nvlist(spa, config); 28909425SEric.Schrock@Sun.COM 28919425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28929425SEric.Schrock@Sun.COM /* 28939425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 28949425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 28959425SEric.Schrock@Sun.COM */ 28969425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 28979425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 28989425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 28999425SEric.Schrock@Sun.COM spa_load_spares(spa); 29009425SEric.Schrock@Sun.COM } 29019425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 29029425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 29039425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 29049425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 29059425SEric.Schrock@Sun.COM } 29069425SEric.Schrock@Sun.COM 29079425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 29089425SEric.Schrock@Sun.COM &nvroot) == 0); 29099425SEric.Schrock@Sun.COM if (error == 0) 29109425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 29119425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 29129425SEric.Schrock@Sun.COM if (error == 0) 29139425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 29149425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 29159425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29169425SEric.Schrock@Sun.COM 29179425SEric.Schrock@Sun.COM if (props != NULL) 29189425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 29199425SEric.Schrock@Sun.COM 29209425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 29219425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 29229425SEric.Schrock@Sun.COM spa_unload(spa); 29239425SEric.Schrock@Sun.COM spa_deactivate(spa); 29249425SEric.Schrock@Sun.COM spa_remove(spa); 29259425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 29269425SEric.Schrock@Sun.COM return (error); 29279425SEric.Schrock@Sun.COM } 29289425SEric.Schrock@Sun.COM 29299630SJeff.Bonwick@Sun.COM spa_async_resume(spa); 29309630SJeff.Bonwick@Sun.COM 29319425SEric.Schrock@Sun.COM /* 29329425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 29339425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 29349425SEric.Schrock@Sun.COM */ 29359425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 29369425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 29379425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 29389425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 29399425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 29409425SEric.Schrock@Sun.COM else 29419425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 29429425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 29439425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 29449425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 29459425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29469425SEric.Schrock@Sun.COM spa_load_spares(spa); 29479425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29489425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 29499425SEric.Schrock@Sun.COM } 29509425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 29519425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 29529425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 29539425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 29549425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 29559425SEric.Schrock@Sun.COM else 29569425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 29579425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 29589425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 29599425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 29609425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29619425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 29629425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29639425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 29649425SEric.Schrock@Sun.COM } 29659425SEric.Schrock@Sun.COM 296610672SEric.Schrock@Sun.COM /* 296710672SEric.Schrock@Sun.COM * Check for any removed devices. 296810672SEric.Schrock@Sun.COM */ 296910672SEric.Schrock@Sun.COM if (spa->spa_autoreplace) { 297010672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 297110672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 297210672SEric.Schrock@Sun.COM } 297310672SEric.Schrock@Sun.COM 29749425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 29759425SEric.Schrock@Sun.COM /* 29769425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 29779425SEric.Schrock@Sun.COM */ 297810100SLin.Ling@Sun.COM spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 29799425SEric.Schrock@Sun.COM } 29809425SEric.Schrock@Sun.COM 29819816SGeorge.Wilson@Sun.COM /* 29829816SGeorge.Wilson@Sun.COM * It's possible that the pool was expanded while it was exported. 29839816SGeorge.Wilson@Sun.COM * We kick off an async task to handle this for us. 29849816SGeorge.Wilson@Sun.COM */ 29859816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 29869816SGeorge.Wilson@Sun.COM 29879425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 29889946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 29899425SEric.Schrock@Sun.COM 29909425SEric.Schrock@Sun.COM return (0); 29916643Seschrock } 29926643Seschrock 29936643Seschrock 2994789Sahrens /* 2995789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2996789Sahrens * to get the vdev stats associated with the imported devices. 2997789Sahrens */ 2998789Sahrens #define TRYIMPORT_NAME "$import" 2999789Sahrens 3000789Sahrens nvlist_t * 3001789Sahrens spa_tryimport(nvlist_t *tryconfig) 3002789Sahrens { 3003789Sahrens nvlist_t *config = NULL; 3004789Sahrens char *poolname; 3005789Sahrens spa_t *spa; 3006789Sahrens uint64_t state; 30078680SLin.Ling@Sun.COM int error; 3008789Sahrens 3009789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 3010789Sahrens return (NULL); 3011789Sahrens 3012789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 3013789Sahrens return (NULL); 3014789Sahrens 30151635Sbonwick /* 30161635Sbonwick * Create and initialize the spa structure. 30171635Sbonwick */ 3018789Sahrens mutex_enter(&spa_namespace_lock); 301910921STim.Haley@Sun.COM spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 30208241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 3021789Sahrens 3022789Sahrens /* 30231635Sbonwick * Pass off the heavy lifting to spa_load(). 30241732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 30251732Sbonwick * is actually the one to trust when doing an import. 3026789Sahrens */ 302710921STim.Haley@Sun.COM error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE); 3028789Sahrens 3029789Sahrens /* 3030789Sahrens * If 'tryconfig' was at least parsable, return the current config. 3031789Sahrens */ 3032789Sahrens if (spa->spa_root_vdev != NULL) { 3033789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 3034789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 3035789Sahrens poolname) == 0); 3036789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 3037789Sahrens state) == 0); 30383975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 30393975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 30402082Seschrock 30412082Seschrock /* 30426423Sgw25295 * If the bootfs property exists on this pool then we 30436423Sgw25295 * copy it out so that external consumers can tell which 30446423Sgw25295 * pools are bootable. 30456423Sgw25295 */ 30468680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 30476423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 30486423Sgw25295 30496423Sgw25295 /* 30506423Sgw25295 * We have to play games with the name since the 30516423Sgw25295 * pool was opened as TRYIMPORT_NAME. 30526423Sgw25295 */ 30537754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 30546423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 30556423Sgw25295 char *cp; 30566423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 30576423Sgw25295 30586423Sgw25295 cp = strchr(tmpname, '/'); 30596423Sgw25295 if (cp == NULL) { 30606423Sgw25295 (void) strlcpy(dsname, tmpname, 30616423Sgw25295 MAXPATHLEN); 30626423Sgw25295 } else { 30636423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 30646423Sgw25295 "%s/%s", poolname, ++cp); 30656423Sgw25295 } 30666423Sgw25295 VERIFY(nvlist_add_string(config, 30676423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 30686423Sgw25295 kmem_free(dsname, MAXPATHLEN); 30696423Sgw25295 } 30706423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 30716423Sgw25295 } 30726423Sgw25295 30736423Sgw25295 /* 30745450Sbrendan * Add the list of hot spares and level 2 cache devices. 30752082Seschrock */ 30769425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 30772082Seschrock spa_add_spares(spa, config); 30785450Sbrendan spa_add_l2cache(spa, config); 30799425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 3080789Sahrens } 3081789Sahrens 3082789Sahrens spa_unload(spa); 3083789Sahrens spa_deactivate(spa); 3084789Sahrens spa_remove(spa); 3085789Sahrens mutex_exit(&spa_namespace_lock); 3086789Sahrens 3087789Sahrens return (config); 3088789Sahrens } 3089789Sahrens 3090789Sahrens /* 3091789Sahrens * Pool export/destroy 3092789Sahrens * 3093789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 3094789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 3095789Sahrens * update the pool state and sync all the labels to disk, removing the 30968211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 30978211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 3098789Sahrens */ 3099789Sahrens static int 31007214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 31018211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 3102789Sahrens { 3103789Sahrens spa_t *spa; 3104789Sahrens 31051775Sbillm if (oldconfig) 31061775Sbillm *oldconfig = NULL; 31071775Sbillm 31088241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 3109789Sahrens return (EROFS); 3110789Sahrens 3111789Sahrens mutex_enter(&spa_namespace_lock); 3112789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 3113789Sahrens mutex_exit(&spa_namespace_lock); 3114789Sahrens return (ENOENT); 3115789Sahrens } 3116789Sahrens 3117789Sahrens /* 31181544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 31191544Seschrock * reacquire the namespace lock, and see if we can export. 31201544Seschrock */ 31211544Seschrock spa_open_ref(spa, FTAG); 31221544Seschrock mutex_exit(&spa_namespace_lock); 31231544Seschrock spa_async_suspend(spa); 31241544Seschrock mutex_enter(&spa_namespace_lock); 31251544Seschrock spa_close(spa, FTAG); 31261544Seschrock 31271544Seschrock /* 3128789Sahrens * The pool will be in core if it's openable, 3129789Sahrens * in which case we can modify its state. 3130789Sahrens */ 3131789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 3132789Sahrens /* 3133789Sahrens * Objsets may be open only because they're dirty, so we 3134789Sahrens * have to force it to sync before checking spa_refcnt. 3135789Sahrens */ 3136789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 3137789Sahrens 31381544Seschrock /* 31391544Seschrock * A pool cannot be exported or destroyed if there are active 31401544Seschrock * references. If we are resetting a pool, allow references by 31411544Seschrock * fault injection handlers. 31421544Seschrock */ 31431544Seschrock if (!spa_refcount_zero(spa) || 31441544Seschrock (spa->spa_inject_ref != 0 && 31451544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 31461544Seschrock spa_async_resume(spa); 3147789Sahrens mutex_exit(&spa_namespace_lock); 3148789Sahrens return (EBUSY); 3149789Sahrens } 3150789Sahrens 3151789Sahrens /* 31527214Slling * A pool cannot be exported if it has an active shared spare. 31537214Slling * This is to prevent other pools stealing the active spare 31547214Slling * from an exported pool. At user's own will, such pool can 31557214Slling * be forcedly exported. 31567214Slling */ 31577214Slling if (!force && new_state == POOL_STATE_EXPORTED && 31587214Slling spa_has_active_shared_spare(spa)) { 31597214Slling spa_async_resume(spa); 31607214Slling mutex_exit(&spa_namespace_lock); 31617214Slling return (EXDEV); 31627214Slling } 31637214Slling 31647214Slling /* 3165789Sahrens * We want this to be reflected on every label, 3166789Sahrens * so mark them all dirty. spa_unload() will do the 3167789Sahrens * final sync that pushes these changes out. 3168789Sahrens */ 31698211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 31707754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 31711544Seschrock spa->spa_state = new_state; 31721635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 31731544Seschrock vdev_config_dirty(spa->spa_root_vdev); 31747754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 31751544Seschrock } 3176789Sahrens } 3177789Sahrens 31784451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 31794451Seschrock 3180789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 3181789Sahrens spa_unload(spa); 3182789Sahrens spa_deactivate(spa); 3183789Sahrens } 3184789Sahrens 31851775Sbillm if (oldconfig && spa->spa_config) 31861775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 31871775Sbillm 31881544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 31898211SGeorge.Wilson@Sun.COM if (!hardforce) 31908211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 31911544Seschrock spa_remove(spa); 31921544Seschrock } 3193789Sahrens mutex_exit(&spa_namespace_lock); 3194789Sahrens 3195789Sahrens return (0); 3196789Sahrens } 3197789Sahrens 3198789Sahrens /* 3199789Sahrens * Destroy a storage pool. 3200789Sahrens */ 3201789Sahrens int 3202789Sahrens spa_destroy(char *pool) 3203789Sahrens { 32048211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 32058211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 3206789Sahrens } 3207789Sahrens 3208789Sahrens /* 3209789Sahrens * Export a storage pool. 3210789Sahrens */ 3211789Sahrens int 32128211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 32138211SGeorge.Wilson@Sun.COM boolean_t hardforce) 3214789Sahrens { 32158211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 32168211SGeorge.Wilson@Sun.COM force, hardforce)); 3217789Sahrens } 3218789Sahrens 3219789Sahrens /* 32201544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 32211544Seschrock * from the namespace in any way. 32221544Seschrock */ 32231544Seschrock int 32241544Seschrock spa_reset(char *pool) 32251544Seschrock { 32267214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 32278211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 32281544Seschrock } 32291544Seschrock 32301544Seschrock /* 3231789Sahrens * ========================================================================== 3232789Sahrens * Device manipulation 3233789Sahrens * ========================================================================== 3234789Sahrens */ 3235789Sahrens 3236789Sahrens /* 32374527Sperrin * Add a device to a storage pool. 3238789Sahrens */ 3239789Sahrens int 3240789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 3241789Sahrens { 324210594SGeorge.Wilson@Sun.COM uint64_t txg, id; 32438241SJeff.Bonwick@Sun.COM int error; 3244789Sahrens vdev_t *rvd = spa->spa_root_vdev; 32451585Sbonwick vdev_t *vd, *tvd; 32465450Sbrendan nvlist_t **spares, **l2cache; 32475450Sbrendan uint_t nspares, nl2cache; 3248789Sahrens 3249789Sahrens txg = spa_vdev_enter(spa); 3250789Sahrens 32512082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 32522082Seschrock VDEV_ALLOC_ADD)) != 0) 32532082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 32542082Seschrock 32557754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 3256789Sahrens 32575450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 32585450Sbrendan &nspares) != 0) 32592082Seschrock nspares = 0; 32602082Seschrock 32615450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 32625450Sbrendan &nl2cache) != 0) 32635450Sbrendan nl2cache = 0; 32645450Sbrendan 32657754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 32662082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 32677754SJeff.Bonwick@Sun.COM 32687754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 32697754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 32707754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 32712082Seschrock 32723377Seschrock /* 32735450Sbrendan * We must validate the spares and l2cache devices after checking the 32745450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 32753377Seschrock */ 32767754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 32773377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 32783377Seschrock 32793377Seschrock /* 32803377Seschrock * Transfer each new top-level vdev from vd to rvd. 32813377Seschrock */ 32828241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 328310594SGeorge.Wilson@Sun.COM 328410594SGeorge.Wilson@Sun.COM /* 328510594SGeorge.Wilson@Sun.COM * Set the vdev id to the first hole, if one exists. 328610594SGeorge.Wilson@Sun.COM */ 328710594SGeorge.Wilson@Sun.COM for (id = 0; id < rvd->vdev_children; id++) { 328810594SGeorge.Wilson@Sun.COM if (rvd->vdev_child[id]->vdev_ishole) { 328910594SGeorge.Wilson@Sun.COM vdev_free(rvd->vdev_child[id]); 329010594SGeorge.Wilson@Sun.COM break; 329110594SGeorge.Wilson@Sun.COM } 329210594SGeorge.Wilson@Sun.COM } 32933377Seschrock tvd = vd->vdev_child[c]; 32943377Seschrock vdev_remove_child(vd, tvd); 329510594SGeorge.Wilson@Sun.COM tvd->vdev_id = id; 32963377Seschrock vdev_add_child(rvd, tvd); 32973377Seschrock vdev_config_dirty(tvd); 32983377Seschrock } 32993377Seschrock 33002082Seschrock if (nspares != 0) { 33015450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 33025450Sbrendan ZPOOL_CONFIG_SPARES); 33032082Seschrock spa_load_spares(spa); 33045450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 33055450Sbrendan } 33065450Sbrendan 33075450Sbrendan if (nl2cache != 0) { 33085450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 33095450Sbrendan ZPOOL_CONFIG_L2CACHE); 33105450Sbrendan spa_load_l2cache(spa); 33115450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3312789Sahrens } 3313789Sahrens 3314789Sahrens /* 33151585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 33161585Sbonwick * If other threads start allocating from these vdevs before we 33171585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 33181585Sbonwick * fail to open the pool because there are DVAs that the config cache 33191585Sbonwick * can't translate. Therefore, we first add the vdevs without 33201585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 33211635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 33221585Sbonwick * 33231585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 33241585Sbonwick * if we lose power at any point in this sequence, the remaining 33251585Sbonwick * steps will be completed the next time we load the pool. 3326789Sahrens */ 33271635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 33281585Sbonwick 33291635Sbonwick mutex_enter(&spa_namespace_lock); 33301635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 33311635Sbonwick mutex_exit(&spa_namespace_lock); 3332789Sahrens 33331635Sbonwick return (0); 3334789Sahrens } 3335789Sahrens 3336789Sahrens /* 3337789Sahrens * Attach a device to a mirror. The arguments are the path to any device 3338789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3339789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3340789Sahrens * 3341789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3342789Sahrens * existing device; in this case the two devices are made into their own 33434451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3344789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3345789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3346789Sahrens * completion of resilvering, the first disk (the one being replaced) 3347789Sahrens * is automatically detached. 3348789Sahrens */ 3349789Sahrens int 33501544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3351789Sahrens { 3352789Sahrens uint64_t txg, open_txg; 3353789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3354789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 33552082Seschrock vdev_ops_t *pvops; 33567313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 33577313SEric.Kustarz@Sun.COM int newvd_isspare; 33587313SEric.Kustarz@Sun.COM int error; 3359789Sahrens 3360789Sahrens txg = spa_vdev_enter(spa); 3361789Sahrens 33626643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3363789Sahrens 3364789Sahrens if (oldvd == NULL) 3365789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3366789Sahrens 33671585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 33681585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 33691585Sbonwick 3370789Sahrens pvd = oldvd->vdev_parent; 3371789Sahrens 33722082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 33734451Seschrock VDEV_ALLOC_ADD)) != 0) 33744451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 33754451Seschrock 33764451Seschrock if (newrootvd->vdev_children != 1) 3377789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3378789Sahrens 3379789Sahrens newvd = newrootvd->vdev_child[0]; 3380789Sahrens 3381789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3382789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3383789Sahrens 33842082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3385789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3386789Sahrens 33874527Sperrin /* 33884527Sperrin * Spares can't replace logs 33894527Sperrin */ 33907326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 33914527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 33924527Sperrin 33932082Seschrock if (!replacing) { 33942082Seschrock /* 33952082Seschrock * For attach, the only allowable parent is a mirror or the root 33962082Seschrock * vdev. 33972082Seschrock */ 33982082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 33992082Seschrock pvd->vdev_ops != &vdev_root_ops) 34002082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 34012082Seschrock 34022082Seschrock pvops = &vdev_mirror_ops; 34032082Seschrock } else { 34042082Seschrock /* 34052082Seschrock * Active hot spares can only be replaced by inactive hot 34062082Seschrock * spares. 34072082Seschrock */ 34082082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 34092082Seschrock pvd->vdev_child[1] == oldvd && 34102082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 34112082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 34122082Seschrock 34132082Seschrock /* 34142082Seschrock * If the source is a hot spare, and the parent isn't already a 34152082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 34163377Seschrock * want to create a replacing vdev. The user is not allowed to 34173377Seschrock * attach to a spared vdev child unless the 'isspare' state is 34183377Seschrock * the same (spare replaces spare, non-spare replaces 34193377Seschrock * non-spare). 34202082Seschrock */ 34212082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 34222082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 34233377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 34243377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 34253377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 34262082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 34272082Seschrock newvd->vdev_isspare) 34282082Seschrock pvops = &vdev_spare_ops; 34292082Seschrock else 34302082Seschrock pvops = &vdev_replacing_ops; 34312082Seschrock } 34322082Seschrock 34331175Slling /* 34349816SGeorge.Wilson@Sun.COM * Make sure the new device is big enough. 34351175Slling */ 34369816SGeorge.Wilson@Sun.COM if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3437789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3438789Sahrens 34391732Sbonwick /* 34401732Sbonwick * The new device cannot have a higher alignment requirement 34411732Sbonwick * than the top-level vdev. 34421732Sbonwick */ 34431732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3444789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3445789Sahrens 3446789Sahrens /* 3447789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3448789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3449789Sahrens */ 3450789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3451789Sahrens spa_strfree(oldvd->vdev_path); 3452789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3453789Sahrens KM_SLEEP); 3454789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3455789Sahrens newvd->vdev_path, "old"); 3456789Sahrens if (oldvd->vdev_devid != NULL) { 3457789Sahrens spa_strfree(oldvd->vdev_devid); 3458789Sahrens oldvd->vdev_devid = NULL; 3459789Sahrens } 3460789Sahrens } 3461789Sahrens 3462789Sahrens /* 34632082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 34642082Seschrock * mirror/replacing/spare vdev above oldvd. 3465789Sahrens */ 3466789Sahrens if (pvd->vdev_ops != pvops) 3467789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3468789Sahrens 3469789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3470789Sahrens ASSERT(pvd->vdev_ops == pvops); 3471789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3472789Sahrens 3473789Sahrens /* 3474789Sahrens * Extract the new device from its root and add it to pvd. 3475789Sahrens */ 3476789Sahrens vdev_remove_child(newrootvd, newvd); 3477789Sahrens newvd->vdev_id = pvd->vdev_children; 347810594SGeorge.Wilson@Sun.COM newvd->vdev_crtxg = oldvd->vdev_crtxg; 3479789Sahrens vdev_add_child(pvd, newvd); 3480789Sahrens 3481789Sahrens tvd = newvd->vdev_top; 3482789Sahrens ASSERT(pvd->vdev_top == tvd); 3483789Sahrens ASSERT(tvd->vdev_parent == rvd); 3484789Sahrens 3485789Sahrens vdev_config_dirty(tvd); 3486789Sahrens 3487789Sahrens /* 3488789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3489789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3490789Sahrens */ 3491789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3492789Sahrens 34938241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 34948241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3495789Sahrens 34969425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 34973377Seschrock spa_spare_activate(newvd); 34989425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 34999425SEric.Schrock@Sun.COM } 35009425SEric.Schrock@Sun.COM 35017754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 35027754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 35037313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 35041544Seschrock 3505789Sahrens /* 3506789Sahrens * Mark newvd's DTL dirty in this txg. 3507789Sahrens */ 35081732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3509789Sahrens 3510789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3511789Sahrens 35129946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 35139946SMark.Musante@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 35149946SMark.Musante@Sun.COM replacing && newvd_isspare ? "spare in" : 35159946SMark.Musante@Sun.COM replacing ? "replace" : "attach", newvdpath, 35169946SMark.Musante@Sun.COM replacing ? "for" : "to", oldvdpath); 35177313SEric.Kustarz@Sun.COM 35187313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 35197313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 35207313SEric.Kustarz@Sun.COM 3521789Sahrens /* 35227046Sahrens * Kick off a resilver to update newvd. 3523789Sahrens */ 35247046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3525789Sahrens 3526789Sahrens return (0); 3527789Sahrens } 3528789Sahrens 3529789Sahrens /* 3530789Sahrens * Detach a device from a mirror or replacing vdev. 3531789Sahrens * If 'replace_done' is specified, only detach if the parent 3532789Sahrens * is a replacing vdev. 3533789Sahrens */ 3534789Sahrens int 35358241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3536789Sahrens { 3537789Sahrens uint64_t txg; 35388241SJeff.Bonwick@Sun.COM int error; 3539789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3540789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 35412082Seschrock boolean_t unspare = B_FALSE; 35422082Seschrock uint64_t unspare_guid; 35436673Seschrock size_t len; 3544789Sahrens 3545789Sahrens txg = spa_vdev_enter(spa); 3546789Sahrens 35476643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3548789Sahrens 3549789Sahrens if (vd == NULL) 3550789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3551789Sahrens 35521585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 35531585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35541585Sbonwick 3555789Sahrens pvd = vd->vdev_parent; 3556789Sahrens 3557789Sahrens /* 35588241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 35598241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 35608241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 35618241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 35628241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 35638241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 35648241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 35658241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 35668241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 35678241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 35688241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 35698241SJeff.Bonwick@Sun.COM */ 35708241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 35718241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 35728241SJeff.Bonwick@Sun.COM 35738241SJeff.Bonwick@Sun.COM /* 3574789Sahrens * If replace_done is specified, only remove this device if it's 35752082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 35762082Seschrock * disk can be removed. 3577789Sahrens */ 35782082Seschrock if (replace_done) { 35792082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 35802082Seschrock if (vd->vdev_id != 0) 35812082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35822082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 35832082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35842082Seschrock } 35852082Seschrock } 35862082Seschrock 35872082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 35884577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3589789Sahrens 3590789Sahrens /* 35912082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3592789Sahrens */ 3593789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 35942082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 35952082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3596789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3597789Sahrens 3598789Sahrens /* 35998241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 36008241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3601789Sahrens */ 36028241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3603789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3604789Sahrens 36058241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 36068241SJeff.Bonwick@Sun.COM 3607789Sahrens /* 36086673Seschrock * If we are detaching the second disk from a replacing vdev, then 36096673Seschrock * check to see if we changed the original vdev's path to have "/old" 36106673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 36116673Seschrock */ 36126673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 36136673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 36146673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 36156673Seschrock ASSERT(pvd->vdev_child[1] == vd); 36166673Seschrock cvd = pvd->vdev_child[0]; 36176673Seschrock len = strlen(vd->vdev_path); 36186673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 36196673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 36206673Seschrock spa_strfree(cvd->vdev_path); 36216673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 36226673Seschrock } 36236673Seschrock } 36246673Seschrock 36256673Seschrock /* 36262082Seschrock * If we are detaching the original disk from a spare, then it implies 36272082Seschrock * that the spare should become a real disk, and be removed from the 36282082Seschrock * active spare list for the pool. 36292082Seschrock */ 36302082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 36318241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 36322082Seschrock unspare = B_TRUE; 36332082Seschrock 36342082Seschrock /* 3635789Sahrens * Erase the disk labels so the disk can be used for other things. 3636789Sahrens * This must be done after all other error cases are handled, 3637789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3638789Sahrens * But if we can't do it, don't treat the error as fatal -- 3639789Sahrens * it may be that the unwritability of the disk is the reason 3640789Sahrens * it's being detached! 3641789Sahrens */ 36423377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3643789Sahrens 3644789Sahrens /* 3645789Sahrens * Remove vd from its parent and compact the parent's children. 3646789Sahrens */ 3647789Sahrens vdev_remove_child(pvd, vd); 3648789Sahrens vdev_compact_children(pvd); 3649789Sahrens 3650789Sahrens /* 3651789Sahrens * Remember one of the remaining children so we can get tvd below. 3652789Sahrens */ 3653789Sahrens cvd = pvd->vdev_child[0]; 3654789Sahrens 3655789Sahrens /* 36562082Seschrock * If we need to remove the remaining child from the list of hot spares, 36578241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 36588241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 36598241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 36608241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 36618241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 36628241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 36632082Seschrock */ 36642082Seschrock if (unspare) { 36652082Seschrock ASSERT(cvd->vdev_isspare); 36663377Seschrock spa_spare_remove(cvd); 36672082Seschrock unspare_guid = cvd->vdev_guid; 36688241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 36692082Seschrock } 36702082Seschrock 36712082Seschrock /* 3672789Sahrens * If the parent mirror/replacing vdev only has one child, 3673789Sahrens * the parent is no longer needed. Remove it from the tree. 3674789Sahrens */ 3675789Sahrens if (pvd->vdev_children == 1) 3676789Sahrens vdev_remove_parent(cvd); 3677789Sahrens 3678789Sahrens /* 3679789Sahrens * We don't set tvd until now because the parent we just removed 3680789Sahrens * may have been the previous top-level vdev. 3681789Sahrens */ 3682789Sahrens tvd = cvd->vdev_top; 3683789Sahrens ASSERT(tvd->vdev_parent == rvd); 3684789Sahrens 3685789Sahrens /* 36863377Seschrock * Reevaluate the parent vdev state. 3687789Sahrens */ 36884451Seschrock vdev_propagate_state(cvd); 3689789Sahrens 3690789Sahrens /* 36919816SGeorge.Wilson@Sun.COM * If the 'autoexpand' property is set on the pool then automatically 36929816SGeorge.Wilson@Sun.COM * try to expand the size of the pool. For example if the device we 36939816SGeorge.Wilson@Sun.COM * just detached was smaller than the others, it may be possible to 36949816SGeorge.Wilson@Sun.COM * add metaslabs (i.e. grow the pool). We need to reopen the vdev 36959816SGeorge.Wilson@Sun.COM * first so that we can obtain the updated sizes of the leaf vdevs. 3696789Sahrens */ 36979816SGeorge.Wilson@Sun.COM if (spa->spa_autoexpand) { 36989816SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 36999816SGeorge.Wilson@Sun.COM vdev_expand(tvd, txg); 37009816SGeorge.Wilson@Sun.COM } 3701789Sahrens 3702789Sahrens vdev_config_dirty(tvd); 3703789Sahrens 3704789Sahrens /* 37053377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 37063377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 37073377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 37083377Seschrock * prevent vd from being accessed after it's freed. 3709789Sahrens */ 37108241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3711789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 37121732Sbonwick vd->vdev_detached = B_TRUE; 37131732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3714789Sahrens 37154451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 37164451Seschrock 37172082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 37182082Seschrock 37192082Seschrock /* 37203377Seschrock * If this was the removal of the original device in a hot spare vdev, 37213377Seschrock * then we want to go through and remove the device from the hot spare 37223377Seschrock * list of every other pool. 37232082Seschrock */ 37242082Seschrock if (unspare) { 37258241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 37262082Seschrock spa = NULL; 37272082Seschrock mutex_enter(&spa_namespace_lock); 37282082Seschrock while ((spa = spa_next(spa)) != NULL) { 37292082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 37302082Seschrock continue; 37318241SJeff.Bonwick@Sun.COM if (spa == myspa) 37328241SJeff.Bonwick@Sun.COM continue; 37337793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 37347793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 37352082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 37367793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 37377793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 37382082Seschrock } 37392082Seschrock mutex_exit(&spa_namespace_lock); 37402082Seschrock } 37412082Seschrock 37422082Seschrock return (error); 37432082Seschrock } 37442082Seschrock 37457754SJeff.Bonwick@Sun.COM static nvlist_t * 37467754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 37472082Seschrock { 37487754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 37497754SJeff.Bonwick@Sun.COM uint64_t guid; 37507754SJeff.Bonwick@Sun.COM 37517754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 37527754SJeff.Bonwick@Sun.COM &guid) == 0); 37537754SJeff.Bonwick@Sun.COM 37547754SJeff.Bonwick@Sun.COM if (guid == target_guid) 37557754SJeff.Bonwick@Sun.COM return (nvpp[i]); 37562082Seschrock } 37572082Seschrock 37587754SJeff.Bonwick@Sun.COM return (NULL); 37595450Sbrendan } 37605450Sbrendan 37617754SJeff.Bonwick@Sun.COM static void 37627754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 37637754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 37645450Sbrendan { 37657754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 37667754SJeff.Bonwick@Sun.COM 37677754SJeff.Bonwick@Sun.COM if (count > 1) 37687754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 37697754SJeff.Bonwick@Sun.COM 37707754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 37717754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 37727754SJeff.Bonwick@Sun.COM continue; 37737754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 37745450Sbrendan } 37755450Sbrendan 37767754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 37777754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 37787754SJeff.Bonwick@Sun.COM 37797754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 37807754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 37817754SJeff.Bonwick@Sun.COM 37827754SJeff.Bonwick@Sun.COM if (count > 1) 37837754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 37845450Sbrendan } 37855450Sbrendan 37865450Sbrendan /* 378710594SGeorge.Wilson@Sun.COM * Removing a device from the vdev namespace requires several steps 378810594SGeorge.Wilson@Sun.COM * and can take a significant amount of time. As a result we use 378910594SGeorge.Wilson@Sun.COM * the spa_vdev_config_[enter/exit] functions which allow us to 379010594SGeorge.Wilson@Sun.COM * grab and release the spa_config_lock while still holding the namespace 379110594SGeorge.Wilson@Sun.COM * lock. During each step the configuration is synced out. 379210594SGeorge.Wilson@Sun.COM */ 379310594SGeorge.Wilson@Sun.COM 379410594SGeorge.Wilson@Sun.COM /* 379510594SGeorge.Wilson@Sun.COM * Evacuate the device. 379610594SGeorge.Wilson@Sun.COM */ 379710594SGeorge.Wilson@Sun.COM int 379810594SGeorge.Wilson@Sun.COM spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 379910594SGeorge.Wilson@Sun.COM { 380010974SJeff.Bonwick@Sun.COM int error = 0; 380110594SGeorge.Wilson@Sun.COM uint64_t txg; 380210594SGeorge.Wilson@Sun.COM 380310594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 380410594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 380510922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 380610594SGeorge.Wilson@Sun.COM 380710594SGeorge.Wilson@Sun.COM /* 380810594SGeorge.Wilson@Sun.COM * Evacuate the device. We don't hold the config lock as writer 380910594SGeorge.Wilson@Sun.COM * since we need to do I/O but we do keep the 381010594SGeorge.Wilson@Sun.COM * spa_namespace_lock held. Once this completes the device 381110594SGeorge.Wilson@Sun.COM * should no longer have any blocks allocated on it. 381210594SGeorge.Wilson@Sun.COM */ 381310594SGeorge.Wilson@Sun.COM if (vd->vdev_islog) { 381410974SJeff.Bonwick@Sun.COM error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 381510974SJeff.Bonwick@Sun.COM NULL, DS_FIND_CHILDREN); 381610974SJeff.Bonwick@Sun.COM } else { 381710974SJeff.Bonwick@Sun.COM error = ENOTSUP; /* until we have bp rewrite */ 381810594SGeorge.Wilson@Sun.COM } 381910594SGeorge.Wilson@Sun.COM 382010974SJeff.Bonwick@Sun.COM txg_wait_synced(spa_get_dsl(spa), 0); 382110974SJeff.Bonwick@Sun.COM 382210974SJeff.Bonwick@Sun.COM if (error) 382310974SJeff.Bonwick@Sun.COM return (error); 382410974SJeff.Bonwick@Sun.COM 382510594SGeorge.Wilson@Sun.COM /* 382610974SJeff.Bonwick@Sun.COM * The evacuation succeeded. Remove any remaining MOS metadata 382710974SJeff.Bonwick@Sun.COM * associated with this vdev, and wait for these changes to sync. 382810594SGeorge.Wilson@Sun.COM */ 382910594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 383010594SGeorge.Wilson@Sun.COM vd->vdev_removing = B_TRUE; 383110594SGeorge.Wilson@Sun.COM vdev_dirty(vd, 0, NULL, txg); 383210594SGeorge.Wilson@Sun.COM vdev_config_dirty(vd); 383310594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 383410594SGeorge.Wilson@Sun.COM 383510594SGeorge.Wilson@Sun.COM return (0); 383610594SGeorge.Wilson@Sun.COM } 383710594SGeorge.Wilson@Sun.COM 383810594SGeorge.Wilson@Sun.COM /* 383910594SGeorge.Wilson@Sun.COM * Complete the removal by cleaning up the namespace. 384010594SGeorge.Wilson@Sun.COM */ 384110594SGeorge.Wilson@Sun.COM void 384210974SJeff.Bonwick@Sun.COM spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) 384310594SGeorge.Wilson@Sun.COM { 384410594SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 384510594SGeorge.Wilson@Sun.COM uint64_t id = vd->vdev_id; 384610594SGeorge.Wilson@Sun.COM boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 384710594SGeorge.Wilson@Sun.COM 384810594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 384910594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 385010922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 385110594SGeorge.Wilson@Sun.COM 385210594SGeorge.Wilson@Sun.COM (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 385310922SJeff.Bonwick@Sun.COM 385410922SJeff.Bonwick@Sun.COM if (list_link_active(&vd->vdev_state_dirty_node)) 385510922SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 385610922SJeff.Bonwick@Sun.COM if (list_link_active(&vd->vdev_config_dirty_node)) 385710922SJeff.Bonwick@Sun.COM vdev_config_clean(vd); 385810922SJeff.Bonwick@Sun.COM 385910594SGeorge.Wilson@Sun.COM vdev_free(vd); 386010594SGeorge.Wilson@Sun.COM 386110594SGeorge.Wilson@Sun.COM if (last_vdev) { 386210594SGeorge.Wilson@Sun.COM vdev_compact_children(rvd); 386310594SGeorge.Wilson@Sun.COM } else { 386410594SGeorge.Wilson@Sun.COM vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 386510594SGeorge.Wilson@Sun.COM vdev_add_child(rvd, vd); 386610594SGeorge.Wilson@Sun.COM } 386710594SGeorge.Wilson@Sun.COM vdev_config_dirty(rvd); 386810594SGeorge.Wilson@Sun.COM 386910594SGeorge.Wilson@Sun.COM /* 387010594SGeorge.Wilson@Sun.COM * Reassess the health of our root vdev. 387110594SGeorge.Wilson@Sun.COM */ 387210594SGeorge.Wilson@Sun.COM vdev_reopen(rvd); 387310594SGeorge.Wilson@Sun.COM } 387410594SGeorge.Wilson@Sun.COM 387510594SGeorge.Wilson@Sun.COM /* 38765450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 387710594SGeorge.Wilson@Sun.COM * spares, slogs, and level 2 ARC devices. 38785450Sbrendan */ 38795450Sbrendan int 38805450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 38815450Sbrendan { 38825450Sbrendan vdev_t *vd; 388310974SJeff.Bonwick@Sun.COM metaslab_group_t *mg; 38847754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 388510594SGeorge.Wilson@Sun.COM uint64_t txg = 0; 38865450Sbrendan uint_t nspares, nl2cache; 38875450Sbrendan int error = 0; 38888241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 38898241SJeff.Bonwick@Sun.COM 38908241SJeff.Bonwick@Sun.COM if (!locked) 38918241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 38925450Sbrendan 38936643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 38945450Sbrendan 38955450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 38965450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 38977754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 38987754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 38997754SJeff.Bonwick@Sun.COM /* 39007754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 39017754SJeff.Bonwick@Sun.COM * in this pool. 39027754SJeff.Bonwick@Sun.COM */ 39037754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 39047754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 39057754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 39067754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 39077754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 39087754SJeff.Bonwick@Sun.COM } else { 39097754SJeff.Bonwick@Sun.COM error = EBUSY; 39107754SJeff.Bonwick@Sun.COM } 39117754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 39125450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 39137754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 39147754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 39157754SJeff.Bonwick@Sun.COM /* 39167754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 39177754SJeff.Bonwick@Sun.COM */ 39187754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 39197754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 39205450Sbrendan spa_load_l2cache(spa); 39215450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 392210594SGeorge.Wilson@Sun.COM } else if (vd != NULL && vd->vdev_islog) { 392310594SGeorge.Wilson@Sun.COM ASSERT(!locked); 392410922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 392510594SGeorge.Wilson@Sun.COM 392610594SGeorge.Wilson@Sun.COM /* 392710594SGeorge.Wilson@Sun.COM * XXX - Once we have bp-rewrite this should 392810594SGeorge.Wilson@Sun.COM * become the common case. 392910594SGeorge.Wilson@Sun.COM */ 393010594SGeorge.Wilson@Sun.COM 393110974SJeff.Bonwick@Sun.COM mg = vd->vdev_mg; 393210974SJeff.Bonwick@Sun.COM 393310594SGeorge.Wilson@Sun.COM /* 393410974SJeff.Bonwick@Sun.COM * Stop allocating from this vdev. 393510594SGeorge.Wilson@Sun.COM */ 393610974SJeff.Bonwick@Sun.COM metaslab_group_passivate(mg); 393710594SGeorge.Wilson@Sun.COM 393810922SJeff.Bonwick@Sun.COM /* 393910922SJeff.Bonwick@Sun.COM * Wait for the youngest allocations and frees to sync, 394010922SJeff.Bonwick@Sun.COM * and then wait for the deferral of those frees to finish. 394110922SJeff.Bonwick@Sun.COM */ 394210922SJeff.Bonwick@Sun.COM spa_vdev_config_exit(spa, NULL, 394310922SJeff.Bonwick@Sun.COM txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 394410922SJeff.Bonwick@Sun.COM 394510974SJeff.Bonwick@Sun.COM /* 394610974SJeff.Bonwick@Sun.COM * Attempt to evacuate the vdev. 394710974SJeff.Bonwick@Sun.COM */ 394810974SJeff.Bonwick@Sun.COM error = spa_vdev_remove_evacuate(spa, vd); 394910974SJeff.Bonwick@Sun.COM 395010594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 395110594SGeorge.Wilson@Sun.COM 395210974SJeff.Bonwick@Sun.COM /* 395310974SJeff.Bonwick@Sun.COM * If we couldn't evacuate the vdev, unwind. 395410974SJeff.Bonwick@Sun.COM */ 395510974SJeff.Bonwick@Sun.COM if (error) { 395610974SJeff.Bonwick@Sun.COM metaslab_group_activate(mg); 395710974SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 395810974SJeff.Bonwick@Sun.COM } 395910974SJeff.Bonwick@Sun.COM 396010974SJeff.Bonwick@Sun.COM /* 396110974SJeff.Bonwick@Sun.COM * Clean up the vdev namespace. 396210974SJeff.Bonwick@Sun.COM */ 396310974SJeff.Bonwick@Sun.COM spa_vdev_remove_from_namespace(spa, vd); 396410594SGeorge.Wilson@Sun.COM 39657754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 39667754SJeff.Bonwick@Sun.COM /* 39677754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 39687754SJeff.Bonwick@Sun.COM */ 39697754SJeff.Bonwick@Sun.COM error = ENOTSUP; 39707754SJeff.Bonwick@Sun.COM } else { 39717754SJeff.Bonwick@Sun.COM /* 39727754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 39737754SJeff.Bonwick@Sun.COM */ 39747754SJeff.Bonwick@Sun.COM error = ENOENT; 39755450Sbrendan } 39762082Seschrock 39778241SJeff.Bonwick@Sun.COM if (!locked) 39788241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 39798241SJeff.Bonwick@Sun.COM 39808241SJeff.Bonwick@Sun.COM return (error); 3981789Sahrens } 3982789Sahrens 3983789Sahrens /* 39844451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 39854451Seschrock * current spared, so we can detach it. 3986789Sahrens */ 39871544Seschrock static vdev_t * 39884451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3989789Sahrens { 39901544Seschrock vdev_t *newvd, *oldvd; 39919816SGeorge.Wilson@Sun.COM 39929816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 39934451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 39941544Seschrock if (oldvd != NULL) 39951544Seschrock return (oldvd); 39961544Seschrock } 3997789Sahrens 39984451Seschrock /* 39994451Seschrock * Check for a completed replacement. 40004451Seschrock */ 4001789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 40021544Seschrock oldvd = vd->vdev_child[0]; 40031544Seschrock newvd = vd->vdev_child[1]; 4004789Sahrens 40058241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 40068241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 40071544Seschrock return (oldvd); 40081544Seschrock } 4009789Sahrens 40104451Seschrock /* 40114451Seschrock * Check for a completed resilver with the 'unspare' flag set. 40124451Seschrock */ 40134451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 40144451Seschrock newvd = vd->vdev_child[0]; 40154451Seschrock oldvd = vd->vdev_child[1]; 40164451Seschrock 40174451Seschrock if (newvd->vdev_unspare && 40188241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 40198241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 40204451Seschrock newvd->vdev_unspare = 0; 40214451Seschrock return (oldvd); 40224451Seschrock } 40234451Seschrock } 40244451Seschrock 40251544Seschrock return (NULL); 4026789Sahrens } 4027789Sahrens 40281544Seschrock static void 40294451Seschrock spa_vdev_resilver_done(spa_t *spa) 4030789Sahrens { 40318241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 40328241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 40338241SJeff.Bonwick@Sun.COM 40348241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4035789Sahrens 40364451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 40378241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 40388241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 40391544Seschrock guid = vd->vdev_guid; 40408241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 40418241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 40428241SJeff.Bonwick@Sun.COM sguid = 0; 40432082Seschrock /* 40442082Seschrock * If we have just finished replacing a hot spared device, then 40452082Seschrock * we need to detach the parent's first child (the original hot 40462082Seschrock * spare) as well. 40472082Seschrock */ 40488241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 40492082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 40508241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 40518241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 40522082Seschrock } 40538241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 40548241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 40551544Seschrock return; 40568241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 40572082Seschrock return; 40588241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4059789Sahrens } 4060789Sahrens 40618241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 4062789Sahrens } 4063789Sahrens 4064789Sahrens /* 40659425SEric.Schrock@Sun.COM * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 40669425SEric.Schrock@Sun.COM * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 40671354Seschrock */ 40681354Seschrock int 40699425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 40709425SEric.Schrock@Sun.COM boolean_t ispath) 40711354Seschrock { 40726643Seschrock vdev_t *vd; 40731354Seschrock uint64_t txg; 40741354Seschrock 40751354Seschrock txg = spa_vdev_enter(spa); 40761354Seschrock 40779425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 40785450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 40791354Seschrock 40801585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 40811585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 40821585Sbonwick 40839425SEric.Schrock@Sun.COM if (ispath) { 40849425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 40859425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 40869425SEric.Schrock@Sun.COM } else { 40879425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 40889425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 40899425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 40909425SEric.Schrock@Sun.COM } 40911354Seschrock 40921354Seschrock vdev_config_dirty(vd->vdev_top); 40931354Seschrock 40941354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 40951354Seschrock } 40961354Seschrock 40979425SEric.Schrock@Sun.COM int 40989425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 40999425SEric.Schrock@Sun.COM { 41009425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 41019425SEric.Schrock@Sun.COM } 41029425SEric.Schrock@Sun.COM 41039425SEric.Schrock@Sun.COM int 41049425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 41059425SEric.Schrock@Sun.COM { 41069425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 41079425SEric.Schrock@Sun.COM } 41089425SEric.Schrock@Sun.COM 41091354Seschrock /* 4110789Sahrens * ========================================================================== 4111789Sahrens * SPA Scrubbing 4112789Sahrens * ========================================================================== 4113789Sahrens */ 4114789Sahrens 41157046Sahrens int 41167046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 4117789Sahrens { 41187754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 41194808Sek110237 4120789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 4121789Sahrens return (ENOTSUP); 4122789Sahrens 4123789Sahrens /* 41247046Sahrens * If a resilver was requested, but there is no DTL on a 41257046Sahrens * writeable leaf device, we have nothing to do. 4126789Sahrens */ 41277046Sahrens if (type == POOL_SCRUB_RESILVER && 41287046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 41297046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 41301544Seschrock return (0); 41311544Seschrock } 4132789Sahrens 41337046Sahrens if (type == POOL_SCRUB_EVERYTHING && 41347046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 41357046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 41367046Sahrens return (EBUSY); 41377046Sahrens 41387046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 41397046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 41407046Sahrens } else if (type == POOL_SCRUB_NONE) { 41417046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 41421544Seschrock } else { 41437046Sahrens return (EINVAL); 41441544Seschrock } 4145789Sahrens } 4146789Sahrens 41471544Seschrock /* 41481544Seschrock * ========================================================================== 41491544Seschrock * SPA async task processing 41501544Seschrock * ========================================================================== 41511544Seschrock */ 41521544Seschrock 41531544Seschrock static void 41544451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 4155789Sahrens { 41567361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 41577361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 41587361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 415910575SEric.Schrock@Sun.COM 416010575SEric.Schrock@Sun.COM /* 416110575SEric.Schrock@Sun.COM * We want to clear the stats, but we don't want to do a full 416210575SEric.Schrock@Sun.COM * vdev_clear() as that will cause us to throw away 416310575SEric.Schrock@Sun.COM * degraded/faulted state as well as attempt to reopen the 416410575SEric.Schrock@Sun.COM * device, all of which is a waste. 416510575SEric.Schrock@Sun.COM */ 416610575SEric.Schrock@Sun.COM vd->vdev_stat.vs_read_errors = 0; 416710575SEric.Schrock@Sun.COM vd->vdev_stat.vs_write_errors = 0; 416810575SEric.Schrock@Sun.COM vd->vdev_stat.vs_checksum_errors = 0; 416910575SEric.Schrock@Sun.COM 41707754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 41711544Seschrock } 41727361SBrendan.Gregg@Sun.COM 41737754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 41747361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 41751544Seschrock } 41761544Seschrock 41771544Seschrock static void 41787754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 41797754SJeff.Bonwick@Sun.COM { 41807754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 41817754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 41827754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 41837754SJeff.Bonwick@Sun.COM } 41847754SJeff.Bonwick@Sun.COM 41857754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 41867754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 41877754SJeff.Bonwick@Sun.COM } 41887754SJeff.Bonwick@Sun.COM 41897754SJeff.Bonwick@Sun.COM static void 41909816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa_t *spa, vdev_t *vd) 41919816SGeorge.Wilson@Sun.COM { 41929816SGeorge.Wilson@Sun.COM sysevent_id_t eid; 41939816SGeorge.Wilson@Sun.COM nvlist_t *attr; 41949816SGeorge.Wilson@Sun.COM char *physpath; 41959816SGeorge.Wilson@Sun.COM 41969816SGeorge.Wilson@Sun.COM if (!spa->spa_autoexpand) 41979816SGeorge.Wilson@Sun.COM return; 41989816SGeorge.Wilson@Sun.COM 41999816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 42009816SGeorge.Wilson@Sun.COM vdev_t *cvd = vd->vdev_child[c]; 42019816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, cvd); 42029816SGeorge.Wilson@Sun.COM } 42039816SGeorge.Wilson@Sun.COM 42049816SGeorge.Wilson@Sun.COM if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 42059816SGeorge.Wilson@Sun.COM return; 42069816SGeorge.Wilson@Sun.COM 42079816SGeorge.Wilson@Sun.COM physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 42089816SGeorge.Wilson@Sun.COM (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 42099816SGeorge.Wilson@Sun.COM 42109816SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 42119816SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 42129816SGeorge.Wilson@Sun.COM 42139816SGeorge.Wilson@Sun.COM (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 42149816SGeorge.Wilson@Sun.COM ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 42159816SGeorge.Wilson@Sun.COM 42169816SGeorge.Wilson@Sun.COM nvlist_free(attr); 42179816SGeorge.Wilson@Sun.COM kmem_free(physpath, MAXPATHLEN); 42189816SGeorge.Wilson@Sun.COM } 42199816SGeorge.Wilson@Sun.COM 42209816SGeorge.Wilson@Sun.COM static void 42211544Seschrock spa_async_thread(spa_t *spa) 42221544Seschrock { 42237754SJeff.Bonwick@Sun.COM int tasks; 42241544Seschrock 42251544Seschrock ASSERT(spa->spa_sync_on); 4226789Sahrens 42271544Seschrock mutex_enter(&spa->spa_async_lock); 42281544Seschrock tasks = spa->spa_async_tasks; 42291544Seschrock spa->spa_async_tasks = 0; 42301544Seschrock mutex_exit(&spa->spa_async_lock); 42311544Seschrock 42321544Seschrock /* 42331635Sbonwick * See if the config needs to be updated. 42341635Sbonwick */ 42351635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 423610922SJeff.Bonwick@Sun.COM uint64_t old_space, new_space; 42379816SGeorge.Wilson@Sun.COM 42381635Sbonwick mutex_enter(&spa_namespace_lock); 423910922SJeff.Bonwick@Sun.COM old_space = metaslab_class_get_space(spa_normal_class(spa)); 42401635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 424110922SJeff.Bonwick@Sun.COM new_space = metaslab_class_get_space(spa_normal_class(spa)); 42421635Sbonwick mutex_exit(&spa_namespace_lock); 42439816SGeorge.Wilson@Sun.COM 42449816SGeorge.Wilson@Sun.COM /* 42459816SGeorge.Wilson@Sun.COM * If the pool grew as a result of the config update, 42469816SGeorge.Wilson@Sun.COM * then log an internal history event. 42479816SGeorge.Wilson@Sun.COM */ 424810922SJeff.Bonwick@Sun.COM if (new_space != old_space) { 42499946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 42509946SMark.Musante@Sun.COM spa, NULL, CRED(), 42519946SMark.Musante@Sun.COM "pool '%s' size: %llu(+%llu)", 425210922SJeff.Bonwick@Sun.COM spa_name(spa), new_space, new_space - old_space); 42539816SGeorge.Wilson@Sun.COM } 42541635Sbonwick } 42551635Sbonwick 42561635Sbonwick /* 42574451Seschrock * See if any devices need to be marked REMOVED. 42581544Seschrock */ 42597754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 426010685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 42614451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 42627754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 42637361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 42647754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 42657361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 42667754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 42677754SJeff.Bonwick@Sun.COM } 42687754SJeff.Bonwick@Sun.COM 42699816SGeorge.Wilson@Sun.COM if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 42709816SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 42719816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, spa->spa_root_vdev); 42729816SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 42739816SGeorge.Wilson@Sun.COM } 42749816SGeorge.Wilson@Sun.COM 42757754SJeff.Bonwick@Sun.COM /* 42767754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 42777754SJeff.Bonwick@Sun.COM */ 42787754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 427910685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 42807754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 42817754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 42824451Seschrock } 42831544Seschrock 42841544Seschrock /* 42851544Seschrock * If any devices are done replacing, detach them. 42861544Seschrock */ 42874451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 42884451Seschrock spa_vdev_resilver_done(spa); 4289789Sahrens 42901544Seschrock /* 42911544Seschrock * Kick off a resilver. 42921544Seschrock */ 42937046Sahrens if (tasks & SPA_ASYNC_RESILVER) 42947046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 42951544Seschrock 42961544Seschrock /* 42971544Seschrock * Let the world know that we're done. 42981544Seschrock */ 42991544Seschrock mutex_enter(&spa->spa_async_lock); 43001544Seschrock spa->spa_async_thread = NULL; 43011544Seschrock cv_broadcast(&spa->spa_async_cv); 43021544Seschrock mutex_exit(&spa->spa_async_lock); 43031544Seschrock thread_exit(); 43041544Seschrock } 43051544Seschrock 43061544Seschrock void 43071544Seschrock spa_async_suspend(spa_t *spa) 43081544Seschrock { 43091544Seschrock mutex_enter(&spa->spa_async_lock); 43101544Seschrock spa->spa_async_suspended++; 43111544Seschrock while (spa->spa_async_thread != NULL) 43121544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 43131544Seschrock mutex_exit(&spa->spa_async_lock); 43141544Seschrock } 43151544Seschrock 43161544Seschrock void 43171544Seschrock spa_async_resume(spa_t *spa) 43181544Seschrock { 43191544Seschrock mutex_enter(&spa->spa_async_lock); 43201544Seschrock ASSERT(spa->spa_async_suspended != 0); 43211544Seschrock spa->spa_async_suspended--; 43221544Seschrock mutex_exit(&spa->spa_async_lock); 43231544Seschrock } 43241544Seschrock 43251544Seschrock static void 43261544Seschrock spa_async_dispatch(spa_t *spa) 43271544Seschrock { 43281544Seschrock mutex_enter(&spa->spa_async_lock); 43291544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 43301635Sbonwick spa->spa_async_thread == NULL && 43311635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 43321544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 43331544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 43341544Seschrock mutex_exit(&spa->spa_async_lock); 43351544Seschrock } 43361544Seschrock 43371544Seschrock void 43381544Seschrock spa_async_request(spa_t *spa, int task) 43391544Seschrock { 43401544Seschrock mutex_enter(&spa->spa_async_lock); 43411544Seschrock spa->spa_async_tasks |= task; 43421544Seschrock mutex_exit(&spa->spa_async_lock); 4343789Sahrens } 4344789Sahrens 4345789Sahrens /* 4346789Sahrens * ========================================================================== 4347789Sahrens * SPA syncing routines 4348789Sahrens * ========================================================================== 4349789Sahrens */ 4350789Sahrens static void 435110922SJeff.Bonwick@Sun.COM spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg) 4352789Sahrens { 4353789Sahrens blkptr_t blk; 4354789Sahrens uint64_t itor = 0; 4355789Sahrens uint8_t c = 1; 4356789Sahrens 43577754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 43587754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 435910922SJeff.Bonwick@Sun.COM zio_free(spa, txg, &blk); 43607754SJeff.Bonwick@Sun.COM } 4361789Sahrens 4362789Sahrens bplist_vacate(bpl, tx); 4363789Sahrens 4364789Sahrens /* 4365789Sahrens * Pre-dirty the first block so we sync to convergence faster. 4366789Sahrens * (Usually only the first block is needed.) 4367789Sahrens */ 436810922SJeff.Bonwick@Sun.COM dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx); 436910922SJeff.Bonwick@Sun.COM } 437010922SJeff.Bonwick@Sun.COM 437110922SJeff.Bonwick@Sun.COM static void 437210922SJeff.Bonwick@Sun.COM spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 437310922SJeff.Bonwick@Sun.COM { 437410922SJeff.Bonwick@Sun.COM zio_t *zio = arg; 437510922SJeff.Bonwick@Sun.COM 437610922SJeff.Bonwick@Sun.COM zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 437710922SJeff.Bonwick@Sun.COM zio->io_flags)); 4378789Sahrens } 4379789Sahrens 4380789Sahrens static void 43812082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 43822082Seschrock { 43832082Seschrock char *packed = NULL; 43847497STim.Haley@Sun.COM size_t bufsize; 43852082Seschrock size_t nvsize = 0; 43862082Seschrock dmu_buf_t *db; 43872082Seschrock 43882082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 43892082Seschrock 43907497STim.Haley@Sun.COM /* 43917497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 43927497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 43937497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 43947497STim.Haley@Sun.COM */ 43957497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 43967497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 43972082Seschrock 43982082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 43992082Seschrock KM_SLEEP) == 0); 44007497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 44017497STim.Haley@Sun.COM 44027497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 44037497STim.Haley@Sun.COM 44047497STim.Haley@Sun.COM kmem_free(packed, bufsize); 44052082Seschrock 44062082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 44072082Seschrock dmu_buf_will_dirty(db, tx); 44082082Seschrock *(uint64_t *)db->db_data = nvsize; 44092082Seschrock dmu_buf_rele(db, FTAG); 44102082Seschrock } 44112082Seschrock 44122082Seschrock static void 44135450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 44145450Sbrendan const char *config, const char *entry) 44152082Seschrock { 44162082Seschrock nvlist_t *nvroot; 44175450Sbrendan nvlist_t **list; 44182082Seschrock int i; 44192082Seschrock 44205450Sbrendan if (!sav->sav_sync) 44212082Seschrock return; 44222082Seschrock 44232082Seschrock /* 44245450Sbrendan * Update the MOS nvlist describing the list of available devices. 44255450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 44264451Seschrock * valid and the vdevs are labeled appropriately. 44272082Seschrock */ 44285450Sbrendan if (sav->sav_object == 0) { 44295450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 44305450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 44315450Sbrendan sizeof (uint64_t), tx); 44322082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 44335450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 44345450Sbrendan &sav->sav_object, tx) == 0); 44352082Seschrock } 44362082Seschrock 44372082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 44385450Sbrendan if (sav->sav_count == 0) { 44395450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 44402082Seschrock } else { 44415450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 44425450Sbrendan for (i = 0; i < sav->sav_count; i++) 44435450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 44445450Sbrendan B_FALSE, B_FALSE, B_TRUE); 44455450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 44465450Sbrendan sav->sav_count) == 0); 44475450Sbrendan for (i = 0; i < sav->sav_count; i++) 44485450Sbrendan nvlist_free(list[i]); 44495450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 44502082Seschrock } 44512082Seschrock 44525450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 44532926Sek110237 nvlist_free(nvroot); 44542082Seschrock 44555450Sbrendan sav->sav_sync = B_FALSE; 44562082Seschrock } 44572082Seschrock 44582082Seschrock static void 4459789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 4460789Sahrens { 4461789Sahrens nvlist_t *config; 4462789Sahrens 44637754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 4464789Sahrens return; 4465789Sahrens 44667754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 44677754SJeff.Bonwick@Sun.COM 44687754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 44697754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 44707754SJeff.Bonwick@Sun.COM 44717754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 4472789Sahrens 44731635Sbonwick if (spa->spa_config_syncing) 44741635Sbonwick nvlist_free(spa->spa_config_syncing); 44751635Sbonwick spa->spa_config_syncing = config; 4476789Sahrens 44772082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 4478789Sahrens } 4479789Sahrens 44805094Slling /* 44815094Slling * Set zpool properties. 44825094Slling */ 44833912Slling static void 44844543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 44853912Slling { 44863912Slling spa_t *spa = arg1; 44875094Slling objset_t *mos = spa->spa_meta_objset; 44883912Slling nvlist_t *nvp = arg2; 44895094Slling nvpair_t *elem; 44904451Seschrock uint64_t intval; 44916643Seschrock char *strval; 44925094Slling zpool_prop_t prop; 44935094Slling const char *propname; 44945094Slling zprop_type_t proptype; 44955094Slling 44967754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 44977754SJeff.Bonwick@Sun.COM 44985094Slling elem = NULL; 44995094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 45005094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 45015094Slling case ZPOOL_PROP_VERSION: 45025094Slling /* 45035094Slling * Only set version for non-zpool-creation cases 45045094Slling * (set/import). spa_create() needs special care 45055094Slling * for version setting. 45065094Slling */ 45075094Slling if (tx->tx_txg != TXG_INITIAL) { 45085094Slling VERIFY(nvpair_value_uint64(elem, 45095094Slling &intval) == 0); 45105094Slling ASSERT(intval <= SPA_VERSION); 45115094Slling ASSERT(intval >= spa_version(spa)); 45125094Slling spa->spa_uberblock.ub_version = intval; 45135094Slling vdev_config_dirty(spa->spa_root_vdev); 45145094Slling } 45155094Slling break; 45165094Slling 45175094Slling case ZPOOL_PROP_ALTROOT: 45185094Slling /* 45195094Slling * 'altroot' is a non-persistent property. It should 45205094Slling * have been set temporarily at creation or import time. 45215094Slling */ 45225094Slling ASSERT(spa->spa_root != NULL); 45235094Slling break; 45245094Slling 45255363Seschrock case ZPOOL_PROP_CACHEFILE: 45265094Slling /* 45278525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 45285094Slling */ 45294543Smarks break; 45305094Slling default: 45315094Slling /* 45325094Slling * Set pool property values in the poolprops mos object. 45335094Slling */ 45345094Slling if (spa->spa_pool_props_object == 0) { 45355094Slling VERIFY((spa->spa_pool_props_object = 45365094Slling zap_create(mos, DMU_OT_POOL_PROPS, 45375094Slling DMU_OT_NONE, 0, tx)) > 0); 45385094Slling 45395094Slling VERIFY(zap_update(mos, 45405094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 45415094Slling 8, 1, &spa->spa_pool_props_object, tx) 45425094Slling == 0); 45435094Slling } 45445094Slling 45455094Slling /* normalize the property name */ 45465094Slling propname = zpool_prop_to_name(prop); 45475094Slling proptype = zpool_prop_get_type(prop); 45485094Slling 45495094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 45505094Slling ASSERT(proptype == PROP_TYPE_STRING); 45515094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 45525094Slling VERIFY(zap_update(mos, 45535094Slling spa->spa_pool_props_object, propname, 45545094Slling 1, strlen(strval) + 1, strval, tx) == 0); 45555094Slling 45565094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 45575094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 45585094Slling 45595094Slling if (proptype == PROP_TYPE_INDEX) { 45605094Slling const char *unused; 45615094Slling VERIFY(zpool_prop_index_to_string( 45625094Slling prop, intval, &unused) == 0); 45635094Slling } 45645094Slling VERIFY(zap_update(mos, 45655094Slling spa->spa_pool_props_object, propname, 45665094Slling 8, 1, &intval, tx) == 0); 45675094Slling } else { 45685094Slling ASSERT(0); /* not allowed */ 45695094Slling } 45705094Slling 45715329Sgw25295 switch (prop) { 45725329Sgw25295 case ZPOOL_PROP_DELEGATION: 45735094Slling spa->spa_delegation = intval; 45745329Sgw25295 break; 45755329Sgw25295 case ZPOOL_PROP_BOOTFS: 45765094Slling spa->spa_bootfs = intval; 45775329Sgw25295 break; 45785329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 45795329Sgw25295 spa->spa_failmode = intval; 45805329Sgw25295 break; 45819816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 45829816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = intval; 45839816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 45849816SGeorge.Wilson@Sun.COM break; 458510922SJeff.Bonwick@Sun.COM case ZPOOL_PROP_DEDUPDITTO: 458610922SJeff.Bonwick@Sun.COM spa->spa_dedup_ditto = intval; 458710922SJeff.Bonwick@Sun.COM break; 45885329Sgw25295 default: 45895329Sgw25295 break; 45905329Sgw25295 } 45913912Slling } 45925094Slling 45935094Slling /* log internal history if this is not a zpool create */ 45945094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 45955094Slling tx->tx_txg != TXG_INITIAL) { 45965094Slling spa_history_internal_log(LOG_POOL_PROPSET, 45975094Slling spa, tx, cr, "%s %lld %s", 45987754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 45995094Slling } 46003912Slling } 46017754SJeff.Bonwick@Sun.COM 46027754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 46033912Slling } 46043912Slling 4605789Sahrens /* 4606789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4607789Sahrens * part of the process, so we iterate until it converges. 4608789Sahrens */ 4609789Sahrens void 4610789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4611789Sahrens { 4612789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4613789Sahrens objset_t *mos = spa->spa_meta_objset; 461410922SJeff.Bonwick@Sun.COM bplist_t *defer_bpl = &spa->spa_deferred_bplist; 461510922SJeff.Bonwick@Sun.COM bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 46161635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4617789Sahrens vdev_t *vd; 4618789Sahrens dmu_tx_t *tx; 46197754SJeff.Bonwick@Sun.COM int error; 4620789Sahrens 4621789Sahrens /* 4622789Sahrens * Lock out configuration changes. 4623789Sahrens */ 46247754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4625789Sahrens 4626789Sahrens spa->spa_syncing_txg = txg; 4627789Sahrens spa->spa_sync_pass = 0; 4628789Sahrens 46297754SJeff.Bonwick@Sun.COM /* 46307754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 46317754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 46327754SJeff.Bonwick@Sun.COM */ 46337754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 46348241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 46358241SJeff.Bonwick@Sun.COM /* 46368241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 46378241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 46388241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 46398241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 46408241SJeff.Bonwick@Sun.COM * into the root vdev tree. 46418241SJeff.Bonwick@Sun.COM */ 46428241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 46438241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 46448241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 46458241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 46468241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 46478241SJeff.Bonwick@Sun.COM } 46488241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 46498241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 46507754SJeff.Bonwick@Sun.COM } 46517754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 46527754SJeff.Bonwick@Sun.COM 465310922SJeff.Bonwick@Sun.COM VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj)); 4654789Sahrens 46552082Seschrock tx = dmu_tx_create_assigned(dp, txg); 46562082Seschrock 46572082Seschrock /* 46584577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 46592082Seschrock * set spa_deflate if we have no raid-z vdevs. 46602082Seschrock */ 46614577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 46624577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 46632082Seschrock int i; 46642082Seschrock 46652082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 46662082Seschrock vd = rvd->vdev_child[i]; 46672082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 46682082Seschrock break; 46692082Seschrock } 46702082Seschrock if (i == rvd->vdev_children) { 46712082Seschrock spa->spa_deflate = TRUE; 46722082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 46732082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 46742082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 46752082Seschrock } 46762082Seschrock } 46772082Seschrock 46787046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 46797046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 46807046Sahrens dsl_pool_create_origin(dp, tx); 46817046Sahrens 46827046Sahrens /* Keeping the origin open increases spa_minref */ 46837046Sahrens spa->spa_minref += 3; 46847046Sahrens } 46857046Sahrens 46867046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 46877046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 46887046Sahrens dsl_pool_upgrade_clones(dp, tx); 46897046Sahrens } 46907046Sahrens 4691789Sahrens /* 4692789Sahrens * If anything has changed in this txg, push the deferred frees 4693789Sahrens * from the previous txg. If not, leave them alone so that we 4694789Sahrens * don't generate work on an otherwise idle system. 4695789Sahrens */ 4696789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 46972329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 46982329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 469910922SJeff.Bonwick@Sun.COM spa_sync_deferred_bplist(spa, defer_bpl, tx, txg); 4700789Sahrens 4701789Sahrens /* 4702789Sahrens * Iterate to convergence. 4703789Sahrens */ 4704789Sahrens do { 470510922SJeff.Bonwick@Sun.COM int pass = ++spa->spa_sync_pass; 4706789Sahrens 4707789Sahrens spa_sync_config_object(spa, tx); 47085450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 47095450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 47105450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 47115450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 47121544Seschrock spa_errlog_sync(spa, txg); 4713789Sahrens dsl_pool_sync(dp, txg); 4714789Sahrens 471510922SJeff.Bonwick@Sun.COM if (pass <= SYNC_PASS_DEFERRED_FREE) { 471610922SJeff.Bonwick@Sun.COM zio_t *zio = zio_root(spa, NULL, NULL, 0); 471710922SJeff.Bonwick@Sun.COM bplist_sync(free_bpl, spa_sync_free, zio, tx); 471810922SJeff.Bonwick@Sun.COM VERIFY(zio_wait(zio) == 0); 471910922SJeff.Bonwick@Sun.COM } else { 472010922SJeff.Bonwick@Sun.COM bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx); 4721789Sahrens } 4722789Sahrens 472310922SJeff.Bonwick@Sun.COM ddt_sync(spa, txg); 472410922SJeff.Bonwick@Sun.COM 472510922SJeff.Bonwick@Sun.COM while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 472610922SJeff.Bonwick@Sun.COM vdev_sync(vd, txg); 472710922SJeff.Bonwick@Sun.COM 472810922SJeff.Bonwick@Sun.COM } while (dmu_objset_is_dirty(mos, txg)); 472910922SJeff.Bonwick@Sun.COM 473010922SJeff.Bonwick@Sun.COM ASSERT(free_bpl->bpl_queue == NULL); 473110922SJeff.Bonwick@Sun.COM 473210922SJeff.Bonwick@Sun.COM bplist_close(defer_bpl); 4733789Sahrens 4734789Sahrens /* 4735789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4736789Sahrens * to commit the transaction group. 47371635Sbonwick * 47385688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 47395688Sbonwick * random top-level vdevs that are known to be visible in the 47407754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 47417754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4742789Sahrens */ 47437754SJeff.Bonwick@Sun.COM for (;;) { 47447754SJeff.Bonwick@Sun.COM /* 47457754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 47467754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 47477754SJeff.Bonwick@Sun.COM */ 47487754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 47497754SJeff.Bonwick@Sun.COM 47507754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 47517754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 47527754SJeff.Bonwick@Sun.COM int svdcount = 0; 47537754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 47547754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 47559816SGeorge.Wilson@Sun.COM 47569816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 47577754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 47587754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 47597754SJeff.Bonwick@Sun.COM continue; 47607754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 47617754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 47627754SJeff.Bonwick@Sun.COM break; 47637754SJeff.Bonwick@Sun.COM } 47649725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 47659725SEric.Schrock@Sun.COM if (error != 0) 47669725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 47679725SEric.Schrock@Sun.COM B_TRUE); 47687754SJeff.Bonwick@Sun.COM } else { 47697754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 47709725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 47719725SEric.Schrock@Sun.COM if (error != 0) 47729725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 47739725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 47741635Sbonwick } 47757754SJeff.Bonwick@Sun.COM 47767754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 47777754SJeff.Bonwick@Sun.COM 47787754SJeff.Bonwick@Sun.COM if (error == 0) 47797754SJeff.Bonwick@Sun.COM break; 47807754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 47817754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 47821635Sbonwick } 47832082Seschrock dmu_tx_commit(tx); 47842082Seschrock 47851635Sbonwick /* 47861635Sbonwick * Clear the dirty config list. 47871635Sbonwick */ 47887754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 47891635Sbonwick vdev_config_clean(vd); 47901635Sbonwick 47911635Sbonwick /* 47921635Sbonwick * Now that the new config has synced transactionally, 47931635Sbonwick * let it become visible to the config cache. 47941635Sbonwick */ 47951635Sbonwick if (spa->spa_config_syncing != NULL) { 47961635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 47971635Sbonwick spa->spa_config_txg = txg; 47981635Sbonwick spa->spa_config_syncing = NULL; 47991635Sbonwick } 4800789Sahrens 4801789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4802789Sahrens 480310922SJeff.Bonwick@Sun.COM dsl_pool_sync_done(dp, txg); 4804789Sahrens 4805789Sahrens /* 4806789Sahrens * Update usable space statistics. 4807789Sahrens */ 4808789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4809789Sahrens vdev_sync_done(vd, txg); 4810789Sahrens 481110956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 481210956SGeorge.Wilson@Sun.COM 4813789Sahrens /* 4814789Sahrens * It had better be the case that we didn't dirty anything 48152082Seschrock * since vdev_config_sync(). 4816789Sahrens */ 4817789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4818789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4819789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 482010922SJeff.Bonwick@Sun.COM ASSERT(defer_bpl->bpl_queue == NULL); 482110922SJeff.Bonwick@Sun.COM ASSERT(free_bpl->bpl_queue == NULL); 482210922SJeff.Bonwick@Sun.COM 482310922SJeff.Bonwick@Sun.COM spa->spa_sync_pass = 0; 4824789Sahrens 48257754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 48261544Seschrock 482710921STim.Haley@Sun.COM spa_handle_ignored_writes(spa); 482810921STim.Haley@Sun.COM 48291544Seschrock /* 48301544Seschrock * If any async tasks have been requested, kick them off. 48311544Seschrock */ 48321544Seschrock spa_async_dispatch(spa); 4833789Sahrens } 4834789Sahrens 4835789Sahrens /* 4836789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4837789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4838789Sahrens * sync. 4839789Sahrens */ 4840789Sahrens void 4841789Sahrens spa_sync_allpools(void) 4842789Sahrens { 4843789Sahrens spa_t *spa = NULL; 4844789Sahrens mutex_enter(&spa_namespace_lock); 4845789Sahrens while ((spa = spa_next(spa)) != NULL) { 48467754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4847789Sahrens continue; 4848789Sahrens spa_open_ref(spa, FTAG); 4849789Sahrens mutex_exit(&spa_namespace_lock); 4850789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4851789Sahrens mutex_enter(&spa_namespace_lock); 4852789Sahrens spa_close(spa, FTAG); 4853789Sahrens } 4854789Sahrens mutex_exit(&spa_namespace_lock); 4855789Sahrens } 4856789Sahrens 4857789Sahrens /* 4858789Sahrens * ========================================================================== 4859789Sahrens * Miscellaneous routines 4860789Sahrens * ========================================================================== 4861789Sahrens */ 4862789Sahrens 4863789Sahrens /* 4864789Sahrens * Remove all pools in the system. 4865789Sahrens */ 4866789Sahrens void 4867789Sahrens spa_evict_all(void) 4868789Sahrens { 4869789Sahrens spa_t *spa; 4870789Sahrens 4871789Sahrens /* 4872789Sahrens * Remove all cached state. All pools should be closed now, 4873789Sahrens * so every spa in the AVL tree should be unreferenced. 4874789Sahrens */ 4875789Sahrens mutex_enter(&spa_namespace_lock); 4876789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4877789Sahrens /* 48781544Seschrock * Stop async tasks. The async thread may need to detach 48791544Seschrock * a device that's been replaced, which requires grabbing 48801544Seschrock * spa_namespace_lock, so we must drop it here. 4881789Sahrens */ 4882789Sahrens spa_open_ref(spa, FTAG); 4883789Sahrens mutex_exit(&spa_namespace_lock); 48841544Seschrock spa_async_suspend(spa); 48854808Sek110237 mutex_enter(&spa_namespace_lock); 4886789Sahrens spa_close(spa, FTAG); 4887789Sahrens 4888789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4889789Sahrens spa_unload(spa); 4890789Sahrens spa_deactivate(spa); 4891789Sahrens } 4892789Sahrens spa_remove(spa); 4893789Sahrens } 4894789Sahrens mutex_exit(&spa_namespace_lock); 4895789Sahrens } 48961544Seschrock 48971544Seschrock vdev_t * 48989425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 48991544Seschrock { 49006643Seschrock vdev_t *vd; 49016643Seschrock int i; 49026643Seschrock 49036643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 49046643Seschrock return (vd); 49056643Seschrock 49069425SEric.Schrock@Sun.COM if (aux) { 49076643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 49086643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 49096643Seschrock if (vd->vdev_guid == guid) 49106643Seschrock return (vd); 49116643Seschrock } 49129425SEric.Schrock@Sun.COM 49139425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 49149425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 49159425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 49169425SEric.Schrock@Sun.COM return (vd); 49179425SEric.Schrock@Sun.COM } 49186643Seschrock } 49196643Seschrock 49206643Seschrock return (NULL); 49211544Seschrock } 49221760Seschrock 49231760Seschrock void 49245094Slling spa_upgrade(spa_t *spa, uint64_t version) 49251760Seschrock { 49267754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 49271760Seschrock 49281760Seschrock /* 49291760Seschrock * This should only be called for a non-faulted pool, and since a 49301760Seschrock * future version would result in an unopenable pool, this shouldn't be 49311760Seschrock * possible. 49321760Seschrock */ 49334577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 49345094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 49355094Slling 49365094Slling spa->spa_uberblock.ub_version = version; 49371760Seschrock vdev_config_dirty(spa->spa_root_vdev); 49381760Seschrock 49397754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 49402082Seschrock 49412082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 49421760Seschrock } 49432082Seschrock 49442082Seschrock boolean_t 49452082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 49462082Seschrock { 49472082Seschrock int i; 49483377Seschrock uint64_t spareguid; 49495450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 49505450Sbrendan 49515450Sbrendan for (i = 0; i < sav->sav_count; i++) 49525450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 49532082Seschrock return (B_TRUE); 49542082Seschrock 49555450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 49565450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 49575450Sbrendan &spareguid) == 0 && spareguid == guid) 49583377Seschrock return (B_TRUE); 49593377Seschrock } 49603377Seschrock 49612082Seschrock return (B_FALSE); 49622082Seschrock } 49633912Slling 49644451Seschrock /* 49657214Slling * Check if a pool has an active shared spare device. 49667214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 49677214Slling */ 49687214Slling static boolean_t 49697214Slling spa_has_active_shared_spare(spa_t *spa) 49707214Slling { 49717214Slling int i, refcnt; 49727214Slling uint64_t pool; 49737214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 49747214Slling 49757214Slling for (i = 0; i < sav->sav_count; i++) { 49767214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 49777214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 49787214Slling refcnt > 2) 49797214Slling return (B_TRUE); 49807214Slling } 49817214Slling 49827214Slling return (B_FALSE); 49837214Slling } 49847214Slling 49857214Slling /* 49864451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 49874451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 49884451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 49894451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 49904451Seschrock * or zdb as real changes. 49914451Seschrock */ 49924451Seschrock void 49934451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 49944451Seschrock { 49954451Seschrock #ifdef _KERNEL 49964451Seschrock sysevent_t *ev; 49974451Seschrock sysevent_attr_list_t *attr = NULL; 49984451Seschrock sysevent_value_t value; 49994451Seschrock sysevent_id_t eid; 50004451Seschrock 50014451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 50024451Seschrock SE_SLEEP); 50034451Seschrock 50044451Seschrock value.value_type = SE_DATA_TYPE_STRING; 50054451Seschrock value.value.sv_string = spa_name(spa); 50064451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 50074451Seschrock goto done; 50084451Seschrock 50094451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 50104451Seschrock value.value.sv_uint64 = spa_guid(spa); 50114451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 50124451Seschrock goto done; 50134451Seschrock 50144451Seschrock if (vd) { 50154451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 50164451Seschrock value.value.sv_uint64 = vd->vdev_guid; 50174451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 50184451Seschrock SE_SLEEP) != 0) 50194451Seschrock goto done; 50204451Seschrock 50214451Seschrock if (vd->vdev_path) { 50224451Seschrock value.value_type = SE_DATA_TYPE_STRING; 50234451Seschrock value.value.sv_string = vd->vdev_path; 50244451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 50254451Seschrock &value, SE_SLEEP) != 0) 50264451Seschrock goto done; 50274451Seschrock } 50284451Seschrock } 50294451Seschrock 50305756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 50315756Seschrock goto done; 50325756Seschrock attr = NULL; 50335756Seschrock 50344451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 50354451Seschrock 50364451Seschrock done: 50374451Seschrock if (attr) 50384451Seschrock sysevent_free_attr(attr); 50394451Seschrock sysevent_free(ev); 50404451Seschrock #endif 50414451Seschrock } 5042