1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/zio_compress.h> 39789Sahrens #include <sys/dmu.h> 40789Sahrens #include <sys/dmu_tx.h> 41789Sahrens #include <sys/zap.h> 42789Sahrens #include <sys/zil.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 45789Sahrens #include <sys/uberblock_impl.h> 46789Sahrens #include <sys/txg.h> 47789Sahrens #include <sys/avl.h> 48789Sahrens #include <sys/dmu_traverse.h> 493912Slling #include <sys/dmu_objset.h> 50789Sahrens #include <sys/unique.h> 51789Sahrens #include <sys/dsl_pool.h> 523912Slling #include <sys/dsl_dataset.h> 53789Sahrens #include <sys/dsl_dir.h> 54789Sahrens #include <sys/dsl_prop.h> 553912Slling #include <sys/dsl_synctask.h> 56789Sahrens #include <sys/fs/zfs.h> 575450Sbrendan #include <sys/arc.h> 58789Sahrens #include <sys/callb.h> 593975Sek110237 #include <sys/systeminfo.h> 603975Sek110237 #include <sys/sunddi.h> 616423Sgw25295 #include <sys/spa_boot.h> 629816SGeorge.Wilson@Sun.COM #include <sys/zfs_ioctl.h> 63789Sahrens 648662SJordan.Vaughan@Sun.com #ifdef _KERNEL 658662SJordan.Vaughan@Sun.com #include <sys/zone.h> 668662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 678662SJordan.Vaughan@Sun.com 685094Slling #include "zfs_prop.h" 695913Sperrin #include "zfs_comutil.h" 705094Slling 719515SJonathan.Adams@Sun.COM enum zti_modes { 729515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 739515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 749515SJonathan.Adams@Sun.COM zti_mode_tune, /* fill from zio_taskq_tune_* */ 759515SJonathan.Adams@Sun.COM zti_nmodes 767754SJeff.Bonwick@Sun.COM }; 772986Sek110237 789515SJonathan.Adams@Sun.COM #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 799515SJonathan.Adams@Sun.COM #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 809515SJonathan.Adams@Sun.COM #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 819515SJonathan.Adams@Sun.COM 829515SJonathan.Adams@Sun.COM #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 839515SJonathan.Adams@Sun.COM 849515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 859515SJonathan.Adams@Sun.COM const char *zti_name; 869515SJonathan.Adams@Sun.COM struct { 879515SJonathan.Adams@Sun.COM enum zti_modes zti_mode; 889515SJonathan.Adams@Sun.COM uint_t zti_value; 899515SJonathan.Adams@Sun.COM } zti_nthreads[ZIO_TASKQ_TYPES]; 909515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 919515SJonathan.Adams@Sun.COM 929515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 939515SJonathan.Adams@Sun.COM "issue", "intr" 949515SJonathan.Adams@Sun.COM }; 959515SJonathan.Adams@Sun.COM 969515SJonathan.Adams@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 979515SJonathan.Adams@Sun.COM /* ISSUE INTR */ 989515SJonathan.Adams@Sun.COM { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 999515SJonathan.Adams@Sun.COM { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1009515SJonathan.Adams@Sun.COM { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1019515SJonathan.Adams@Sun.COM { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1029515SJonathan.Adams@Sun.COM { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1039515SJonathan.Adams@Sun.COM { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1049515SJonathan.Adams@Sun.COM }; 1059515SJonathan.Adams@Sun.COM 1069515SJonathan.Adams@Sun.COM enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1079515SJonathan.Adams@Sun.COM uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1089515SJonathan.Adams@Sun.COM 1095094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1107214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1115094Slling 1125094Slling /* 1135094Slling * ========================================================================== 1145094Slling * SPA properties routines 1155094Slling * ========================================================================== 1165094Slling */ 1175094Slling 1185094Slling /* 1195094Slling * Add a (source=src, propname=propval) list to an nvlist. 1205094Slling */ 1215949Slling static void 1225094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1235094Slling uint64_t intval, zprop_source_t src) 1245094Slling { 1255094Slling const char *propname = zpool_prop_to_name(prop); 1265094Slling nvlist_t *propval; 1275949Slling 1285949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1295949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1305949Slling 1315949Slling if (strval != NULL) 1325949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1335949Slling else 1345949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1355949Slling 1365949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1375094Slling nvlist_free(propval); 1385094Slling } 1395094Slling 1405094Slling /* 1415094Slling * Get property values from the spa configuration. 1425094Slling */ 1435949Slling static void 1445094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1455094Slling { 1468525SEric.Schrock@Sun.COM uint64_t size; 1478525SEric.Schrock@Sun.COM uint64_t used; 1485094Slling uint64_t cap, version; 1495094Slling zprop_source_t src = ZPROP_SRC_NONE; 1506643Seschrock spa_config_dirent_t *dp; 1515094Slling 1527754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1537754SJeff.Bonwick@Sun.COM 1548525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 1558525SEric.Schrock@Sun.COM size = spa_get_space(spa); 1568525SEric.Schrock@Sun.COM used = spa_get_alloc(spa); 1578525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1588525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 1598525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 1608525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 1618525SEric.Schrock@Sun.COM size - used, src); 1628525SEric.Schrock@Sun.COM 1638525SEric.Schrock@Sun.COM cap = (size == 0) ? 0 : (used * 100 / size); 1648525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1658525SEric.Schrock@Sun.COM 1668525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1678525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1688525SEric.Schrock@Sun.COM 1698525SEric.Schrock@Sun.COM version = spa_version(spa); 1708525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1718525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1728525SEric.Schrock@Sun.COM else 1738525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1748525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1758525SEric.Schrock@Sun.COM } 1765949Slling 1775949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1785949Slling 1795949Slling if (spa->spa_root != NULL) 1805949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1815949Slling 0, ZPROP_SRC_LOCAL); 1825094Slling 1836643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 1846643Seschrock if (dp->scd_path == NULL) { 1855949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1866643Seschrock "none", 0, ZPROP_SRC_LOCAL); 1876643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1885949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 1896643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1905363Seschrock } 1915363Seschrock } 1925094Slling } 1935094Slling 1945094Slling /* 1955094Slling * Get zpool property values. 1965094Slling */ 1975094Slling int 1985094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 1995094Slling { 2005094Slling zap_cursor_t zc; 2015094Slling zap_attribute_t za; 2025094Slling objset_t *mos = spa->spa_meta_objset; 2035094Slling int err; 2045094Slling 2055949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2065094Slling 2077754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2087754SJeff.Bonwick@Sun.COM 2095094Slling /* 2105094Slling * Get properties from the spa config. 2115094Slling */ 2125949Slling spa_prop_get_config(spa, nvp); 2135094Slling 2145094Slling /* If no pool property object, no more prop to get. */ 2155094Slling if (spa->spa_pool_props_object == 0) { 2165094Slling mutex_exit(&spa->spa_props_lock); 2175094Slling return (0); 2185094Slling } 2195094Slling 2205094Slling /* 2215094Slling * Get properties from the MOS pool property object. 2225094Slling */ 2235094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2245094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2255094Slling zap_cursor_advance(&zc)) { 2265094Slling uint64_t intval = 0; 2275094Slling char *strval = NULL; 2285094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2295094Slling zpool_prop_t prop; 2305094Slling 2315094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2325094Slling continue; 2335094Slling 2345094Slling switch (za.za_integer_length) { 2355094Slling case 8: 2365094Slling /* integer property */ 2375094Slling if (za.za_first_integer != 2385094Slling zpool_prop_default_numeric(prop)) 2395094Slling src = ZPROP_SRC_LOCAL; 2405094Slling 2415094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2425094Slling dsl_pool_t *dp; 2435094Slling dsl_dataset_t *ds = NULL; 2445094Slling 2455094Slling dp = spa_get_dsl(spa); 2465094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2476689Smaybee if (err = dsl_dataset_hold_obj(dp, 2486689Smaybee za.za_first_integer, FTAG, &ds)) { 2495094Slling rw_exit(&dp->dp_config_rwlock); 2505094Slling break; 2515094Slling } 2525094Slling 2535094Slling strval = kmem_alloc( 2545094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2555094Slling KM_SLEEP); 2565094Slling dsl_dataset_name(ds, strval); 2576689Smaybee dsl_dataset_rele(ds, FTAG); 2585094Slling rw_exit(&dp->dp_config_rwlock); 2595094Slling } else { 2605094Slling strval = NULL; 2615094Slling intval = za.za_first_integer; 2625094Slling } 2635094Slling 2645949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2655094Slling 2665094Slling if (strval != NULL) 2675094Slling kmem_free(strval, 2685094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2695094Slling 2705094Slling break; 2715094Slling 2725094Slling case 1: 2735094Slling /* string property */ 2745094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2755094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2765094Slling za.za_name, 1, za.za_num_integers, strval); 2775094Slling if (err) { 2785094Slling kmem_free(strval, za.za_num_integers); 2795094Slling break; 2805094Slling } 2815949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 2825094Slling kmem_free(strval, za.za_num_integers); 2835094Slling break; 2845094Slling 2855094Slling default: 2865094Slling break; 2875094Slling } 2885094Slling } 2895094Slling zap_cursor_fini(&zc); 2905094Slling mutex_exit(&spa->spa_props_lock); 2915094Slling out: 2925094Slling if (err && err != ENOENT) { 2935094Slling nvlist_free(*nvp); 2945949Slling *nvp = NULL; 2955094Slling return (err); 2965094Slling } 2975094Slling 2985094Slling return (0); 2995094Slling } 3005094Slling 3015094Slling /* 3025094Slling * Validate the given pool properties nvlist and modify the list 3035094Slling * for the property values to be set. 3045094Slling */ 3055094Slling static int 3065094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3075094Slling { 3085094Slling nvpair_t *elem; 3095094Slling int error = 0, reset_bootfs = 0; 3105094Slling uint64_t objnum; 3115094Slling 3125094Slling elem = NULL; 3135094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3145094Slling zpool_prop_t prop; 3155094Slling char *propname, *strval; 3165094Slling uint64_t intval; 3175094Slling objset_t *os; 3185363Seschrock char *slash; 3195094Slling 3205094Slling propname = nvpair_name(elem); 3215094Slling 3225094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3235094Slling return (EINVAL); 3245094Slling 3255094Slling switch (prop) { 3265094Slling case ZPOOL_PROP_VERSION: 3275094Slling error = nvpair_value_uint64(elem, &intval); 3285094Slling if (!error && 3295094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3305094Slling error = EINVAL; 3315094Slling break; 3325094Slling 3335094Slling case ZPOOL_PROP_DELEGATION: 3345094Slling case ZPOOL_PROP_AUTOREPLACE: 3357538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3369816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 3375094Slling error = nvpair_value_uint64(elem, &intval); 3385094Slling if (!error && intval > 1) 3395094Slling error = EINVAL; 3405094Slling break; 3415094Slling 3425094Slling case ZPOOL_PROP_BOOTFS: 3439630SJeff.Bonwick@Sun.COM /* 3449630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3459630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3469630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3479630SJeff.Bonwick@Sun.COM */ 3485094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3495094Slling error = ENOTSUP; 3505094Slling break; 3515094Slling } 3525094Slling 3535094Slling /* 3547042Sgw25295 * Make sure the vdev config is bootable 3555094Slling */ 3567042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3575094Slling error = ENOTSUP; 3585094Slling break; 3595094Slling } 3605094Slling 3615094Slling reset_bootfs = 1; 3625094Slling 3635094Slling error = nvpair_value_string(elem, &strval); 3645094Slling 3655094Slling if (!error) { 3667042Sgw25295 uint64_t compress; 3677042Sgw25295 3685094Slling if (strval == NULL || strval[0] == '\0') { 3695094Slling objnum = zpool_prop_default_numeric( 3705094Slling ZPOOL_PROP_BOOTFS); 3715094Slling break; 3725094Slling } 3735094Slling 37410298SMatthew.Ahrens@Sun.COM if (error = dmu_objset_hold(strval, FTAG, &os)) 3755094Slling break; 3767042Sgw25295 37710298SMatthew.Ahrens@Sun.COM /* Must be ZPL and not gzip compressed. */ 37810298SMatthew.Ahrens@Sun.COM 37910298SMatthew.Ahrens@Sun.COM if (dmu_objset_type(os) != DMU_OST_ZFS) { 38010298SMatthew.Ahrens@Sun.COM error = ENOTSUP; 38110298SMatthew.Ahrens@Sun.COM } else if ((error = dsl_prop_get_integer(strval, 3827042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 3837042Sgw25295 &compress, NULL)) == 0 && 3847042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 3857042Sgw25295 error = ENOTSUP; 3867042Sgw25295 } else { 3877042Sgw25295 objnum = dmu_objset_id(os); 3887042Sgw25295 } 38910298SMatthew.Ahrens@Sun.COM dmu_objset_rele(os, FTAG); 3905094Slling } 3915094Slling break; 3927754SJeff.Bonwick@Sun.COM 3935329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 3945329Sgw25295 error = nvpair_value_uint64(elem, &intval); 3955329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3965329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 3975329Sgw25295 error = EINVAL; 3985329Sgw25295 3995329Sgw25295 /* 4005329Sgw25295 * This is a special case which only occurs when 4015329Sgw25295 * the pool has completely failed. This allows 4025329Sgw25295 * the user to change the in-core failmode property 4035329Sgw25295 * without syncing it out to disk (I/Os might 4045329Sgw25295 * currently be blocked). We do this by returning 4055329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4065329Sgw25295 * into thinking we encountered a property validation 4075329Sgw25295 * error. 4085329Sgw25295 */ 4097754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4105329Sgw25295 spa->spa_failmode = intval; 4115329Sgw25295 error = EIO; 4125329Sgw25295 } 4135329Sgw25295 break; 4145363Seschrock 4155363Seschrock case ZPOOL_PROP_CACHEFILE: 4165363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4175363Seschrock break; 4185363Seschrock 4195363Seschrock if (strval[0] == '\0') 4205363Seschrock break; 4215363Seschrock 4225363Seschrock if (strcmp(strval, "none") == 0) 4235363Seschrock break; 4245363Seschrock 4255363Seschrock if (strval[0] != '/') { 4265363Seschrock error = EINVAL; 4275363Seschrock break; 4285363Seschrock } 4295363Seschrock 4305363Seschrock slash = strrchr(strval, '/'); 4315363Seschrock ASSERT(slash != NULL); 4325363Seschrock 4335363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4345363Seschrock strcmp(slash, "/..") == 0) 4355363Seschrock error = EINVAL; 4365363Seschrock break; 4375094Slling } 4385094Slling 4395094Slling if (error) 4405094Slling break; 4415094Slling } 4425094Slling 4435094Slling if (!error && reset_bootfs) { 4445094Slling error = nvlist_remove(props, 4455094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4465094Slling 4475094Slling if (!error) { 4485094Slling error = nvlist_add_uint64(props, 4495094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4505094Slling } 4515094Slling } 4525094Slling 4535094Slling return (error); 4545094Slling } 4555094Slling 4568525SEric.Schrock@Sun.COM void 4578525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4588525SEric.Schrock@Sun.COM { 4598525SEric.Schrock@Sun.COM char *cachefile; 4608525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4618525SEric.Schrock@Sun.COM 4628525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4638525SEric.Schrock@Sun.COM &cachefile) != 0) 4648525SEric.Schrock@Sun.COM return; 4658525SEric.Schrock@Sun.COM 4668525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4678525SEric.Schrock@Sun.COM KM_SLEEP); 4688525SEric.Schrock@Sun.COM 4698525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 4708525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 4718525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 4728525SEric.Schrock@Sun.COM dp->scd_path = NULL; 4738525SEric.Schrock@Sun.COM else 4748525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 4758525SEric.Schrock@Sun.COM 4768525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 4778525SEric.Schrock@Sun.COM if (need_sync) 4788525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 4798525SEric.Schrock@Sun.COM } 4808525SEric.Schrock@Sun.COM 4815094Slling int 4825094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 4835094Slling { 4845094Slling int error; 4858525SEric.Schrock@Sun.COM nvpair_t *elem; 4868525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 4878525SEric.Schrock@Sun.COM zpool_prop_t prop; 4885094Slling 4895094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 4905094Slling return (error); 4915094Slling 4928525SEric.Schrock@Sun.COM elem = NULL; 4938525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 4948525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 4958525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 4968525SEric.Schrock@Sun.COM return (EINVAL); 4978525SEric.Schrock@Sun.COM 4988525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 4998525SEric.Schrock@Sun.COM continue; 5008525SEric.Schrock@Sun.COM 5018525SEric.Schrock@Sun.COM need_sync = B_TRUE; 5028525SEric.Schrock@Sun.COM break; 5038525SEric.Schrock@Sun.COM } 5048525SEric.Schrock@Sun.COM 5058525SEric.Schrock@Sun.COM if (need_sync) 5068525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5078525SEric.Schrock@Sun.COM spa, nvp, 3)); 5088525SEric.Schrock@Sun.COM else 5098525SEric.Schrock@Sun.COM return (0); 5105094Slling } 5115094Slling 5125094Slling /* 5135094Slling * If the bootfs property value is dsobj, clear it. 5145094Slling */ 5155094Slling void 5165094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5175094Slling { 5185094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5195094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5205094Slling spa->spa_pool_props_object, 5215094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5225094Slling spa->spa_bootfs = 0; 5235094Slling } 5245094Slling } 5255094Slling 526789Sahrens /* 527789Sahrens * ========================================================================== 528789Sahrens * SPA state manipulation (open/create/destroy/import/export) 529789Sahrens * ========================================================================== 530789Sahrens */ 531789Sahrens 5321544Seschrock static int 5331544Seschrock spa_error_entry_compare(const void *a, const void *b) 5341544Seschrock { 5351544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5361544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5371544Seschrock int ret; 5381544Seschrock 5391544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5401544Seschrock sizeof (zbookmark_t)); 5411544Seschrock 5421544Seschrock if (ret < 0) 5431544Seschrock return (-1); 5441544Seschrock else if (ret > 0) 5451544Seschrock return (1); 5461544Seschrock else 5471544Seschrock return (0); 5481544Seschrock } 5491544Seschrock 5501544Seschrock /* 5511544Seschrock * Utility function which retrieves copies of the current logs and 5521544Seschrock * re-initializes them in the process. 5531544Seschrock */ 5541544Seschrock void 5551544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5561544Seschrock { 5571544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5581544Seschrock 5591544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5601544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5611544Seschrock 5621544Seschrock avl_create(&spa->spa_errlist_scrub, 5631544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5641544Seschrock offsetof(spa_error_entry_t, se_avl)); 5651544Seschrock avl_create(&spa->spa_errlist_last, 5661544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5671544Seschrock offsetof(spa_error_entry_t, se_avl)); 5681544Seschrock } 5691544Seschrock 570789Sahrens /* 571789Sahrens * Activate an uninitialized pool. 572789Sahrens */ 573789Sahrens static void 5748241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 575789Sahrens { 576789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 577789Sahrens 578789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5798241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 580789Sahrens 5819480SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops); 5829480SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops); 583789Sahrens 5847754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 5859515SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t]; 5867754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 5879515SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 5889515SJonathan.Adams@Sun.COM uint_t value = ztip->zti_nthreads[q].zti_value; 5899515SJonathan.Adams@Sun.COM char name[32]; 5909515SJonathan.Adams@Sun.COM 5919515SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 5929515SJonathan.Adams@Sun.COM "%s_%s", ztip->zti_name, zio_taskq_types[q]); 5939515SJonathan.Adams@Sun.COM 5949515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) { 5959515SJonathan.Adams@Sun.COM mode = zio_taskq_tune_mode; 5969515SJonathan.Adams@Sun.COM value = zio_taskq_tune_value; 5979515SJonathan.Adams@Sun.COM if (mode == zti_mode_tune) 5989515SJonathan.Adams@Sun.COM mode = zti_mode_online_percent; 5999515SJonathan.Adams@Sun.COM } 6009515SJonathan.Adams@Sun.COM 6019515SJonathan.Adams@Sun.COM switch (mode) { 6029515SJonathan.Adams@Sun.COM case zti_mode_fixed: 6039515SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 6049515SJonathan.Adams@Sun.COM value = MAX(value, 1); 6059515SJonathan.Adams@Sun.COM 6069515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6079515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6089515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE); 6099515SJonathan.Adams@Sun.COM break; 6109515SJonathan.Adams@Sun.COM 6119515SJonathan.Adams@Sun.COM case zti_mode_online_percent: 6129515SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = taskq_create(name, 6139515SJonathan.Adams@Sun.COM value, maxclsyspri, 50, INT_MAX, 6149515SJonathan.Adams@Sun.COM TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6159515SJonathan.Adams@Sun.COM break; 6169515SJonathan.Adams@Sun.COM 6179515SJonathan.Adams@Sun.COM case zti_mode_tune: 6189515SJonathan.Adams@Sun.COM default: 6199515SJonathan.Adams@Sun.COM panic("unrecognized mode for " 6209515SJonathan.Adams@Sun.COM "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6219515SJonathan.Adams@Sun.COM "in spa_activate()", 6229515SJonathan.Adams@Sun.COM t, q, mode, value); 6239515SJonathan.Adams@Sun.COM break; 6249515SJonathan.Adams@Sun.COM } 6257754SJeff.Bonwick@Sun.COM } 626789Sahrens } 627789Sahrens 6287754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 6297754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 6307754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 6317754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 632789Sahrens 633789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 634789Sahrens offsetof(struct vdev, vdev_txg_node)); 6351544Seschrock 6361544Seschrock avl_create(&spa->spa_errlist_scrub, 6371544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6381544Seschrock offsetof(spa_error_entry_t, se_avl)); 6391544Seschrock avl_create(&spa->spa_errlist_last, 6401544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6411544Seschrock offsetof(spa_error_entry_t, se_avl)); 642789Sahrens } 643789Sahrens 644789Sahrens /* 645789Sahrens * Opposite of spa_activate(). 646789Sahrens */ 647789Sahrens static void 648789Sahrens spa_deactivate(spa_t *spa) 649789Sahrens { 650789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 651789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 652789Sahrens ASSERT(spa->spa_root_vdev == NULL); 6539630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 654789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 655789Sahrens 656789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 657789Sahrens 6587754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 6597754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 6607754SJeff.Bonwick@Sun.COM 6617754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 6627754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6637754SJeff.Bonwick@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 6647754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 6657754SJeff.Bonwick@Sun.COM } 666789Sahrens } 667789Sahrens 668789Sahrens metaslab_class_destroy(spa->spa_normal_class); 669789Sahrens spa->spa_normal_class = NULL; 670789Sahrens 6714527Sperrin metaslab_class_destroy(spa->spa_log_class); 6724527Sperrin spa->spa_log_class = NULL; 6734527Sperrin 6741544Seschrock /* 6751544Seschrock * If this was part of an import or the open otherwise failed, we may 6761544Seschrock * still have errors left in the queues. Empty them just in case. 6771544Seschrock */ 6781544Seschrock spa_errlog_drain(spa); 6791544Seschrock 6801544Seschrock avl_destroy(&spa->spa_errlist_scrub); 6811544Seschrock avl_destroy(&spa->spa_errlist_last); 6821544Seschrock 683789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 684789Sahrens } 685789Sahrens 686789Sahrens /* 687789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 688789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 689789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 690789Sahrens * All vdev validation is done by the vdev_alloc() routine. 691789Sahrens */ 6922082Seschrock static int 6932082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 6942082Seschrock uint_t id, int atype) 695789Sahrens { 696789Sahrens nvlist_t **child; 6979816SGeorge.Wilson@Sun.COM uint_t children; 6982082Seschrock int error; 6992082Seschrock 7002082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 7012082Seschrock return (error); 7022082Seschrock 7032082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 7042082Seschrock return (0); 705789Sahrens 7067754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 7077754SJeff.Bonwick@Sun.COM &child, &children); 7087754SJeff.Bonwick@Sun.COM 7097754SJeff.Bonwick@Sun.COM if (error == ENOENT) 7107754SJeff.Bonwick@Sun.COM return (0); 7117754SJeff.Bonwick@Sun.COM 7127754SJeff.Bonwick@Sun.COM if (error) { 7132082Seschrock vdev_free(*vdp); 7142082Seschrock *vdp = NULL; 7152082Seschrock return (EINVAL); 716789Sahrens } 717789Sahrens 7189816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 7192082Seschrock vdev_t *vd; 7202082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 7212082Seschrock atype)) != 0) { 7222082Seschrock vdev_free(*vdp); 7232082Seschrock *vdp = NULL; 7242082Seschrock return (error); 725789Sahrens } 726789Sahrens } 727789Sahrens 7282082Seschrock ASSERT(*vdp != NULL); 7292082Seschrock 7302082Seschrock return (0); 731789Sahrens } 732789Sahrens 733789Sahrens /* 734789Sahrens * Opposite of spa_load(). 735789Sahrens */ 736789Sahrens static void 737789Sahrens spa_unload(spa_t *spa) 738789Sahrens { 7392082Seschrock int i; 7402082Seschrock 7417754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 7427754SJeff.Bonwick@Sun.COM 743789Sahrens /* 7441544Seschrock * Stop async tasks. 7451544Seschrock */ 7461544Seschrock spa_async_suspend(spa); 7471544Seschrock 7481544Seschrock /* 749789Sahrens * Stop syncing. 750789Sahrens */ 751789Sahrens if (spa->spa_sync_on) { 752789Sahrens txg_sync_stop(spa->spa_dsl_pool); 753789Sahrens spa->spa_sync_on = B_FALSE; 754789Sahrens } 755789Sahrens 756789Sahrens /* 7577754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 758789Sahrens */ 7599234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 7609234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 7619234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 7629234SGeorge.Wilson@Sun.COM } 763789Sahrens 764789Sahrens /* 765789Sahrens * Close the dsl pool. 766789Sahrens */ 767789Sahrens if (spa->spa_dsl_pool) { 768789Sahrens dsl_pool_close(spa->spa_dsl_pool); 769789Sahrens spa->spa_dsl_pool = NULL; 770789Sahrens } 771789Sahrens 7728241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7738241SJeff.Bonwick@Sun.COM 7748241SJeff.Bonwick@Sun.COM /* 7758241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 7768241SJeff.Bonwick@Sun.COM */ 7778241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 7788241SJeff.Bonwick@Sun.COM 779789Sahrens /* 780789Sahrens * Close all vdevs. 781789Sahrens */ 7821585Sbonwick if (spa->spa_root_vdev) 783789Sahrens vdev_free(spa->spa_root_vdev); 7841585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 7851544Seschrock 7865450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 7875450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 7885450Sbrendan if (spa->spa_spares.sav_vdevs) { 7895450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 7905450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 7915450Sbrendan spa->spa_spares.sav_vdevs = NULL; 7925450Sbrendan } 7935450Sbrendan if (spa->spa_spares.sav_config) { 7945450Sbrendan nvlist_free(spa->spa_spares.sav_config); 7955450Sbrendan spa->spa_spares.sav_config = NULL; 7962082Seschrock } 7977377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 7985450Sbrendan 7995450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 8005450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 8015450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 8025450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 8035450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 8045450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 8055450Sbrendan } 8065450Sbrendan if (spa->spa_l2cache.sav_config) { 8075450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 8085450Sbrendan spa->spa_l2cache.sav_config = NULL; 8092082Seschrock } 8107377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 8112082Seschrock 8121544Seschrock spa->spa_async_suspended = 0; 8138241SJeff.Bonwick@Sun.COM 8148241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 815789Sahrens } 816789Sahrens 817789Sahrens /* 8182082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 8192082Seschrock * this pool. When this is called, we have some form of basic information in 8205450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 8215450Sbrendan * then re-generate a more complete list including status information. 8222082Seschrock */ 8232082Seschrock static void 8242082Seschrock spa_load_spares(spa_t *spa) 8252082Seschrock { 8262082Seschrock nvlist_t **spares; 8272082Seschrock uint_t nspares; 8282082Seschrock int i; 8293377Seschrock vdev_t *vd, *tvd; 8302082Seschrock 8317754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 8327754SJeff.Bonwick@Sun.COM 8332082Seschrock /* 8342082Seschrock * First, close and free any existing spare vdevs. 8352082Seschrock */ 8365450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8375450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 8383377Seschrock 8393377Seschrock /* Undo the call to spa_activate() below */ 8406643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8416643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 8423377Seschrock spa_spare_remove(tvd); 8433377Seschrock vdev_close(vd); 8443377Seschrock vdev_free(vd); 8452082Seschrock } 8463377Seschrock 8475450Sbrendan if (spa->spa_spares.sav_vdevs) 8485450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 8495450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 8505450Sbrendan 8515450Sbrendan if (spa->spa_spares.sav_config == NULL) 8522082Seschrock nspares = 0; 8532082Seschrock else 8545450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 8552082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 8562082Seschrock 8575450Sbrendan spa->spa_spares.sav_count = (int)nspares; 8585450Sbrendan spa->spa_spares.sav_vdevs = NULL; 8592082Seschrock 8602082Seschrock if (nspares == 0) 8612082Seschrock return; 8622082Seschrock 8632082Seschrock /* 8642082Seschrock * Construct the array of vdevs, opening them to get status in the 8653377Seschrock * process. For each spare, there is potentially two different vdev_t 8663377Seschrock * structures associated with it: one in the list of spares (used only 8673377Seschrock * for basic validation purposes) and one in the active vdev 8683377Seschrock * configuration (if it's spared in). During this phase we open and 8693377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 8703377Seschrock * active configuration, then we also mark this vdev as an active spare. 8712082Seschrock */ 8725450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 8735450Sbrendan KM_SLEEP); 8745450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 8752082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 8762082Seschrock VDEV_ALLOC_SPARE) == 0); 8772082Seschrock ASSERT(vd != NULL); 8782082Seschrock 8795450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 8802082Seschrock 8816643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 8826643Seschrock B_FALSE)) != NULL) { 8833377Seschrock if (!tvd->vdev_isspare) 8843377Seschrock spa_spare_add(tvd); 8853377Seschrock 8863377Seschrock /* 8873377Seschrock * We only mark the spare active if we were successfully 8883377Seschrock * able to load the vdev. Otherwise, importing a pool 8893377Seschrock * with a bad active spare would result in strange 8903377Seschrock * behavior, because multiple pool would think the spare 8913377Seschrock * is actively in use. 8923377Seschrock * 8933377Seschrock * There is a vulnerability here to an equally bizarre 8943377Seschrock * circumstance, where a dead active spare is later 8953377Seschrock * brought back to life (onlined or otherwise). Given 8963377Seschrock * the rarity of this scenario, and the extra complexity 8973377Seschrock * it adds, we ignore the possibility. 8983377Seschrock */ 8993377Seschrock if (!vdev_is_dead(tvd)) 9003377Seschrock spa_spare_activate(tvd); 9013377Seschrock } 9023377Seschrock 9037754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 9049425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 9057754SJeff.Bonwick@Sun.COM 9062082Seschrock if (vdev_open(vd) != 0) 9072082Seschrock continue; 9082082Seschrock 9095450Sbrendan if (vdev_validate_aux(vd) == 0) 9105450Sbrendan spa_spare_add(vd); 9112082Seschrock } 9122082Seschrock 9132082Seschrock /* 9142082Seschrock * Recompute the stashed list of spares, with status information 9152082Seschrock * this time. 9162082Seschrock */ 9175450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 9182082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 9192082Seschrock 9205450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 9215450Sbrendan KM_SLEEP); 9225450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9235450Sbrendan spares[i] = vdev_config_generate(spa, 9245450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 9255450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 9265450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 9275450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9282082Seschrock nvlist_free(spares[i]); 9295450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 9305450Sbrendan } 9315450Sbrendan 9325450Sbrendan /* 9335450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 9345450Sbrendan * this pool. When this is called, we have some form of basic information in 9355450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 9365450Sbrendan * then re-generate a more complete list including status information. 9375450Sbrendan * Devices which are already active have their details maintained, and are 9385450Sbrendan * not re-opened. 9395450Sbrendan */ 9405450Sbrendan static void 9415450Sbrendan spa_load_l2cache(spa_t *spa) 9425450Sbrendan { 9435450Sbrendan nvlist_t **l2cache; 9445450Sbrendan uint_t nl2cache; 9455450Sbrendan int i, j, oldnvdevs; 9469816SGeorge.Wilson@Sun.COM uint64_t guid; 9475450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 9485450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 9495450Sbrendan 9507754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 9517754SJeff.Bonwick@Sun.COM 9525450Sbrendan if (sav->sav_config != NULL) { 9535450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 9545450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 9555450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 9565450Sbrendan } else { 9575450Sbrendan nl2cache = 0; 9585450Sbrendan } 9595450Sbrendan 9605450Sbrendan oldvdevs = sav->sav_vdevs; 9615450Sbrendan oldnvdevs = sav->sav_count; 9625450Sbrendan sav->sav_vdevs = NULL; 9635450Sbrendan sav->sav_count = 0; 9645450Sbrendan 9655450Sbrendan /* 9665450Sbrendan * Process new nvlist of vdevs. 9675450Sbrendan */ 9685450Sbrendan for (i = 0; i < nl2cache; i++) { 9695450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 9705450Sbrendan &guid) == 0); 9715450Sbrendan 9725450Sbrendan newvdevs[i] = NULL; 9735450Sbrendan for (j = 0; j < oldnvdevs; j++) { 9745450Sbrendan vd = oldvdevs[j]; 9755450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 9765450Sbrendan /* 9775450Sbrendan * Retain previous vdev for add/remove ops. 9785450Sbrendan */ 9795450Sbrendan newvdevs[i] = vd; 9805450Sbrendan oldvdevs[j] = NULL; 9815450Sbrendan break; 9825450Sbrendan } 9835450Sbrendan } 9845450Sbrendan 9855450Sbrendan if (newvdevs[i] == NULL) { 9865450Sbrendan /* 9875450Sbrendan * Create new vdev 9885450Sbrendan */ 9895450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 9905450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 9915450Sbrendan ASSERT(vd != NULL); 9925450Sbrendan newvdevs[i] = vd; 9935450Sbrendan 9945450Sbrendan /* 9955450Sbrendan * Commit this vdev as an l2cache device, 9965450Sbrendan * even if it fails to open. 9975450Sbrendan */ 9985450Sbrendan spa_l2cache_add(vd); 9995450Sbrendan 10006643Seschrock vd->vdev_top = vd; 10016643Seschrock vd->vdev_aux = sav; 10026643Seschrock 10036643Seschrock spa_l2cache_activate(vd); 10046643Seschrock 10055450Sbrendan if (vdev_open(vd) != 0) 10065450Sbrendan continue; 10075450Sbrendan 10085450Sbrendan (void) vdev_validate_aux(vd); 10095450Sbrendan 10109816SGeorge.Wilson@Sun.COM if (!vdev_is_dead(vd)) 10119816SGeorge.Wilson@Sun.COM l2arc_add_vdev(spa, vd); 10125450Sbrendan } 10135450Sbrendan } 10145450Sbrendan 10155450Sbrendan /* 10165450Sbrendan * Purge vdevs that were dropped 10175450Sbrendan */ 10185450Sbrendan for (i = 0; i < oldnvdevs; i++) { 10195450Sbrendan uint64_t pool; 10205450Sbrendan 10215450Sbrendan vd = oldvdevs[i]; 10225450Sbrendan if (vd != NULL) { 10238241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10248241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 10255450Sbrendan l2arc_remove_vdev(vd); 10265450Sbrendan (void) vdev_close(vd); 10275450Sbrendan spa_l2cache_remove(vd); 10285450Sbrendan } 10295450Sbrendan } 10305450Sbrendan 10315450Sbrendan if (oldvdevs) 10325450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 10335450Sbrendan 10345450Sbrendan if (sav->sav_config == NULL) 10355450Sbrendan goto out; 10365450Sbrendan 10375450Sbrendan sav->sav_vdevs = newvdevs; 10385450Sbrendan sav->sav_count = (int)nl2cache; 10395450Sbrendan 10405450Sbrendan /* 10415450Sbrendan * Recompute the stashed list of l2cache devices, with status 10425450Sbrendan * information this time. 10435450Sbrendan */ 10445450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 10455450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 10465450Sbrendan 10475450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 10485450Sbrendan for (i = 0; i < sav->sav_count; i++) 10495450Sbrendan l2cache[i] = vdev_config_generate(spa, 10505450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 10515450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 10525450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 10535450Sbrendan out: 10545450Sbrendan for (i = 0; i < sav->sav_count; i++) 10555450Sbrendan nvlist_free(l2cache[i]); 10565450Sbrendan if (sav->sav_count) 10575450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 10582082Seschrock } 10592082Seschrock 10602082Seschrock static int 10612082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 10622082Seschrock { 10632082Seschrock dmu_buf_t *db; 10642082Seschrock char *packed = NULL; 10652082Seschrock size_t nvsize = 0; 10662082Seschrock int error; 10672082Seschrock *value = NULL; 10682082Seschrock 10692082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 10702082Seschrock nvsize = *(uint64_t *)db->db_data; 10712082Seschrock dmu_buf_rele(db, FTAG); 10722082Seschrock 10732082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10749512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10759512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 10762082Seschrock if (error == 0) 10772082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 10782082Seschrock kmem_free(packed, nvsize); 10792082Seschrock 10802082Seschrock return (error); 10812082Seschrock } 10822082Seschrock 10832082Seschrock /* 10844451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10854451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10864451Seschrock */ 10874451Seschrock static void 10884451Seschrock spa_check_removed(vdev_t *vd) 10894451Seschrock { 10909816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 10914451Seschrock spa_check_removed(vd->vdev_child[c]); 10924451Seschrock 10934451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10944451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10954451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10964451Seschrock } 10974451Seschrock } 10984451Seschrock 10994451Seschrock /* 11009701SGeorge.Wilson@Sun.COM * Load the slog device state from the config object since it's possible 11019701SGeorge.Wilson@Sun.COM * that the label does not contain the most up-to-date information. 11029701SGeorge.Wilson@Sun.COM */ 11039701SGeorge.Wilson@Sun.COM void 11049701SGeorge.Wilson@Sun.COM spa_load_log_state(spa_t *spa) 11059701SGeorge.Wilson@Sun.COM { 11069701SGeorge.Wilson@Sun.COM nvlist_t *nv, *nvroot, **child; 11079701SGeorge.Wilson@Sun.COM uint64_t is_log; 11089816SGeorge.Wilson@Sun.COM uint_t children; 11099701SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 11109701SGeorge.Wilson@Sun.COM 11119701SGeorge.Wilson@Sun.COM VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0); 11129701SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 11139701SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 11149701SGeorge.Wilson@Sun.COM &child, &children) == 0); 11159701SGeorge.Wilson@Sun.COM 11169816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 11179701SGeorge.Wilson@Sun.COM vdev_t *tvd = rvd->vdev_child[c]; 11189701SGeorge.Wilson@Sun.COM 11199701SGeorge.Wilson@Sun.COM if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 11209701SGeorge.Wilson@Sun.COM &is_log) == 0 && is_log) 11219701SGeorge.Wilson@Sun.COM vdev_load_log_state(tvd, child[c]); 11229701SGeorge.Wilson@Sun.COM } 11239701SGeorge.Wilson@Sun.COM nvlist_free(nv); 11249701SGeorge.Wilson@Sun.COM } 11259701SGeorge.Wilson@Sun.COM 11269701SGeorge.Wilson@Sun.COM /* 11277294Sperrin * Check for missing log devices 11287294Sperrin */ 11297294Sperrin int 11307294Sperrin spa_check_logs(spa_t *spa) 11317294Sperrin { 11327294Sperrin switch (spa->spa_log_state) { 11337294Sperrin case SPA_LOG_MISSING: 11347294Sperrin /* need to recheck in case slog has been restored */ 11357294Sperrin case SPA_LOG_UNKNOWN: 11367294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 11377294Sperrin DS_FIND_CHILDREN)) { 11387294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 11397294Sperrin return (1); 11407294Sperrin } 11417294Sperrin break; 11427294Sperrin } 11437294Sperrin return (0); 11447294Sperrin } 11457294Sperrin 11467294Sperrin /* 1147789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 11481544Seschrock * source of configuration information. 1149789Sahrens */ 1150789Sahrens static int 11511544Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1152789Sahrens { 1153789Sahrens int error = 0; 1154789Sahrens nvlist_t *nvroot = NULL; 1155789Sahrens vdev_t *rvd; 1156789Sahrens uberblock_t *ub = &spa->spa_uberblock; 11571635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1158789Sahrens uint64_t pool_guid; 11592082Seschrock uint64_t version; 11604451Seschrock uint64_t autoreplace = 0; 11618241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 11627294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1163789Sahrens 11648241SJeff.Bonwick@Sun.COM /* 11658241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 11668241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 11678241SJeff.Bonwick@Sun.COM */ 11688241SJeff.Bonwick@Sun.COM if (!mosconfig) 11698241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 11708241SJeff.Bonwick@Sun.COM 11717754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 11727754SJeff.Bonwick@Sun.COM 11731544Seschrock spa->spa_load_state = state; 11741635Sbonwick 1175789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 11761733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 11771544Seschrock error = EINVAL; 11781544Seschrock goto out; 11791544Seschrock } 1180789Sahrens 11812082Seschrock /* 11822082Seschrock * Versioning wasn't explicitly added to the label until later, so if 11832082Seschrock * it's not present treat it as the initial version. 11842082Seschrock */ 11852082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 11864577Sahrens version = SPA_VERSION_INITIAL; 11872082Seschrock 11881733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 11891733Sbonwick &spa->spa_config_txg); 11901733Sbonwick 11911635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 11921544Seschrock spa_guid_exists(pool_guid, 0)) { 11931544Seschrock error = EEXIST; 11941544Seschrock goto out; 11951544Seschrock } 1196789Sahrens 11972174Seschrock spa->spa_load_guid = pool_guid; 11982174Seschrock 1199789Sahrens /* 12009234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 12019234SGeorge.Wilson@Sun.COM */ 12029630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 12039630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 12049234SGeorge.Wilson@Sun.COM 12059234SGeorge.Wilson@Sun.COM /* 12062082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 12072082Seschrock * value that will be returned by spa_version() since parsing the 12082082Seschrock * configuration requires knowing the version number. 1209789Sahrens */ 12107754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12112082Seschrock spa->spa_ubsync.ub_version = version; 12122082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 12137754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1214789Sahrens 12152082Seschrock if (error != 0) 12161544Seschrock goto out; 1217789Sahrens 12181585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1219789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1220789Sahrens 1221789Sahrens /* 1222789Sahrens * Try to open all vdevs, loading each label in the process. 1223789Sahrens */ 12247754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12254070Smc142369 error = vdev_open(rvd); 12267754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12274070Smc142369 if (error != 0) 12281544Seschrock goto out; 1229789Sahrens 1230789Sahrens /* 12319276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 12329276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 12339276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 12349276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 12359276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 12369276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 12379276SMark.Musante@Sun.COM * the vdev config. 12381986Seschrock */ 12399276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12409276SMark.Musante@Sun.COM error = vdev_validate(rvd); 12419276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 12429276SMark.Musante@Sun.COM if (error != 0) 12439276SMark.Musante@Sun.COM goto out; 12441986Seschrock 12451986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 12461986Seschrock error = ENXIO; 12471986Seschrock goto out; 12481986Seschrock } 12491986Seschrock 12501986Seschrock /* 1251789Sahrens * Find the best uberblock. 1252789Sahrens */ 12537754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1254789Sahrens 1255789Sahrens /* 1256789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1257789Sahrens */ 1258789Sahrens if (ub->ub_txg == 0) { 12591760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12601760Seschrock VDEV_AUX_CORRUPT_DATA); 12611544Seschrock error = ENXIO; 12621544Seschrock goto out; 12631544Seschrock } 12641544Seschrock 12651544Seschrock /* 12661544Seschrock * If the pool is newer than the code, we can't open it. 12671544Seschrock */ 12684577Sahrens if (ub->ub_version > SPA_VERSION) { 12691760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12701760Seschrock VDEV_AUX_VERSION_NEWER); 12711544Seschrock error = ENOTSUP; 12721544Seschrock goto out; 1273789Sahrens } 1274789Sahrens 1275789Sahrens /* 1276789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1277789Sahrens * incomplete configuration. 1278789Sahrens */ 12791732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 12801544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12811544Seschrock VDEV_AUX_BAD_GUID_SUM); 12821544Seschrock error = ENXIO; 12831544Seschrock goto out; 1284789Sahrens } 1285789Sahrens 1286789Sahrens /* 1287789Sahrens * Initialize internal SPA structures. 1288789Sahrens */ 1289789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1290789Sahrens spa->spa_ubsync = spa->spa_uberblock; 1291789Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 12921544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 12931544Seschrock if (error) { 12941544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 12951544Seschrock VDEV_AUX_CORRUPT_DATA); 12961544Seschrock goto out; 12971544Seschrock } 1298789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1299789Sahrens 13001544Seschrock if (zap_lookup(spa->spa_meta_objset, 1301789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 13021544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 13031544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13041544Seschrock VDEV_AUX_CORRUPT_DATA); 13051544Seschrock error = EIO; 13061544Seschrock goto out; 13071544Seschrock } 1308789Sahrens 1309789Sahrens if (!mosconfig) { 13102082Seschrock nvlist_t *newconfig; 13113975Sek110237 uint64_t hostid; 13122082Seschrock 13132082Seschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 13141544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13151544Seschrock VDEV_AUX_CORRUPT_DATA); 13161544Seschrock error = EIO; 13171544Seschrock goto out; 13181544Seschrock } 1319789Sahrens 13207706SLin.Ling@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig, 13217706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 13223975Sek110237 char *hostname; 13233975Sek110237 unsigned long myhostid = 0; 13243975Sek110237 13253975Sek110237 VERIFY(nvlist_lookup_string(newconfig, 13263975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 13273975Sek110237 13288662SJordan.Vaughan@Sun.com #ifdef _KERNEL 13298662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 13308662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 13318662SJordan.Vaughan@Sun.com /* 13328662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 13338662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 13348662SJordan.Vaughan@Sun.com */ 13353975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13368662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 13374178Slling if (hostid != 0 && myhostid != 0 && 13388662SJordan.Vaughan@Sun.com hostid != myhostid) { 13393975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 13403975Sek110237 "loaded as it was last accessed by " 13417706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 13423975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 13437754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 13443975Sek110237 (unsigned long)hostid); 13453975Sek110237 error = EBADF; 13463975Sek110237 goto out; 13473975Sek110237 } 13483975Sek110237 } 13493975Sek110237 1350789Sahrens spa_config_set(spa, newconfig); 1351789Sahrens spa_unload(spa); 1352789Sahrens spa_deactivate(spa); 13538241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1354789Sahrens 13551544Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 13561544Seschrock } 13571544Seschrock 13581544Seschrock if (zap_lookup(spa->spa_meta_objset, 13591544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 13601544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 13611544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13621544Seschrock VDEV_AUX_CORRUPT_DATA); 13631544Seschrock error = EIO; 13641544Seschrock goto out; 1365789Sahrens } 1366789Sahrens 13671544Seschrock /* 13682082Seschrock * Load the bit that tells us to use the new accounting function 13692082Seschrock * (raid-z deflation). If we have an older pool, this will not 13702082Seschrock * be present. 13712082Seschrock */ 13722082Seschrock error = zap_lookup(spa->spa_meta_objset, 13732082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 13742082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 13752082Seschrock if (error != 0 && error != ENOENT) { 13762082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13772082Seschrock VDEV_AUX_CORRUPT_DATA); 13782082Seschrock error = EIO; 13792082Seschrock goto out; 13802082Seschrock } 13812082Seschrock 13822082Seschrock /* 13831544Seschrock * Load the persistent error log. If we have an older pool, this will 13841544Seschrock * not be present. 13851544Seschrock */ 13861544Seschrock error = zap_lookup(spa->spa_meta_objset, 13871544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 13881544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 13891807Sbonwick if (error != 0 && error != ENOENT) { 13901544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 13911544Seschrock VDEV_AUX_CORRUPT_DATA); 13921544Seschrock error = EIO; 13931544Seschrock goto out; 13941544Seschrock } 13951544Seschrock 13961544Seschrock error = zap_lookup(spa->spa_meta_objset, 13971544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 13981544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 13991544Seschrock if (error != 0 && error != ENOENT) { 14001544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14011544Seschrock VDEV_AUX_CORRUPT_DATA); 14021544Seschrock error = EIO; 14031544Seschrock goto out; 14041544Seschrock } 1405789Sahrens 1406789Sahrens /* 14072926Sek110237 * Load the history object. If we have an older pool, this 14082926Sek110237 * will not be present. 14092926Sek110237 */ 14102926Sek110237 error = zap_lookup(spa->spa_meta_objset, 14112926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 14122926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 14132926Sek110237 if (error != 0 && error != ENOENT) { 14142926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14152926Sek110237 VDEV_AUX_CORRUPT_DATA); 14162926Sek110237 error = EIO; 14172926Sek110237 goto out; 14182926Sek110237 } 14192926Sek110237 14202926Sek110237 /* 14212082Seschrock * Load any hot spares for this pool. 14222082Seschrock */ 14232082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14245450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 14252082Seschrock if (error != 0 && error != ENOENT) { 14262082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14272082Seschrock VDEV_AUX_CORRUPT_DATA); 14282082Seschrock error = EIO; 14292082Seschrock goto out; 14302082Seschrock } 14312082Seschrock if (error == 0) { 14324577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 14335450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 14345450Sbrendan &spa->spa_spares.sav_config) != 0) { 14352082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14362082Seschrock VDEV_AUX_CORRUPT_DATA); 14372082Seschrock error = EIO; 14382082Seschrock goto out; 14392082Seschrock } 14402082Seschrock 14417754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14422082Seschrock spa_load_spares(spa); 14437754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14442082Seschrock } 14452082Seschrock 14465450Sbrendan /* 14475450Sbrendan * Load any level 2 ARC devices for this pool. 14485450Sbrendan */ 14495450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14505450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 14515450Sbrendan &spa->spa_l2cache.sav_object); 14525450Sbrendan if (error != 0 && error != ENOENT) { 14535450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14545450Sbrendan VDEV_AUX_CORRUPT_DATA); 14555450Sbrendan error = EIO; 14565450Sbrendan goto out; 14575450Sbrendan } 14585450Sbrendan if (error == 0) { 14595450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 14605450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 14615450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 14625450Sbrendan vdev_set_state(rvd, B_TRUE, 14635450Sbrendan VDEV_STATE_CANT_OPEN, 14645450Sbrendan VDEV_AUX_CORRUPT_DATA); 14655450Sbrendan error = EIO; 14665450Sbrendan goto out; 14675450Sbrendan } 14685450Sbrendan 14697754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14705450Sbrendan spa_load_l2cache(spa); 14717754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 14725450Sbrendan } 14735450Sbrendan 14749701SGeorge.Wilson@Sun.COM spa_load_log_state(spa); 14759701SGeorge.Wilson@Sun.COM 14767294Sperrin if (spa_check_logs(spa)) { 14777294Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14787294Sperrin VDEV_AUX_BAD_LOG); 14797294Sperrin error = ENXIO; 14807294Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 14817294Sperrin goto out; 14827294Sperrin } 14837294Sperrin 14847294Sperrin 14855094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 14864543Smarks 14873912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 14883912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 14893912Slling 14903912Slling if (error && error != ENOENT) { 14913912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 14923912Slling VDEV_AUX_CORRUPT_DATA); 14933912Slling error = EIO; 14943912Slling goto out; 14953912Slling } 14963912Slling 14973912Slling if (error == 0) { 14983912Slling (void) zap_lookup(spa->spa_meta_objset, 14993912Slling spa->spa_pool_props_object, 15004451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 15013912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 15024451Seschrock (void) zap_lookup(spa->spa_meta_objset, 15034451Seschrock spa->spa_pool_props_object, 15044451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 15054451Seschrock sizeof (uint64_t), 1, &autoreplace); 15064543Smarks (void) zap_lookup(spa->spa_meta_objset, 15074543Smarks spa->spa_pool_props_object, 15084543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 15094543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 15105329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 15115329Sgw25295 spa->spa_pool_props_object, 15125329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 15135329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 15149816SGeorge.Wilson@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 15159816SGeorge.Wilson@Sun.COM spa->spa_pool_props_object, 15169816SGeorge.Wilson@Sun.COM zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 15179816SGeorge.Wilson@Sun.COM sizeof (uint64_t), 1, &spa->spa_autoexpand); 15183912Slling } 15193912Slling 15202082Seschrock /* 15214451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 15224451Seschrock * the ZFS DE that it should not issue any faults for unopenable 15234451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 15244451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 15254451Seschrock * over. 15264451Seschrock */ 15275756Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 15284451Seschrock spa_check_removed(spa->spa_root_vdev); 15294451Seschrock 15304451Seschrock /* 15311986Seschrock * Load the vdev state for all toplevel vdevs. 1532789Sahrens */ 15331986Seschrock vdev_load(rvd); 1534789Sahrens 1535789Sahrens /* 1536789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1537789Sahrens */ 15387754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1539789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 15407754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1541789Sahrens 1542789Sahrens /* 1543789Sahrens * Check the state of the root vdev. If it can't be opened, it 1544789Sahrens * indicates one or more toplevel vdevs are faulted. 1545789Sahrens */ 15461544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 15471544Seschrock error = ENXIO; 15481544Seschrock goto out; 15491544Seschrock } 1550789Sahrens 15518241SJeff.Bonwick@Sun.COM if (spa_writeable(spa)) { 15521635Sbonwick dmu_tx_t *tx; 15531635Sbonwick int need_update = B_FALSE; 15548241SJeff.Bonwick@Sun.COM 15558241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 15561601Sbonwick 15571635Sbonwick /* 15581635Sbonwick * Claim log blocks that haven't been committed yet. 15591635Sbonwick * This must all happen in a single txg. 15601635Sbonwick */ 15611601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1562789Sahrens spa_first_txg(spa)); 15637754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 15642417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1565789Sahrens dmu_tx_commit(tx); 1566789Sahrens 15679701SGeorge.Wilson@Sun.COM spa->spa_log_state = SPA_LOG_GOOD; 1568789Sahrens spa->spa_sync_on = B_TRUE; 1569789Sahrens txg_sync_start(spa->spa_dsl_pool); 1570789Sahrens 1571789Sahrens /* 1572789Sahrens * Wait for all claims to sync. 1573789Sahrens */ 1574789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15751585Sbonwick 15761585Sbonwick /* 15771635Sbonwick * If the config cache is stale, or we have uninitialized 15781635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 157910100SLin.Ling@Sun.COM * 158010100SLin.Ling@Sun.COM * If spa_load_verbatim is true, trust the current 158110100SLin.Ling@Sun.COM * in-core spa_config and update the disk labels. 15821585Sbonwick */ 15831635Sbonwick if (config_cache_txg != spa->spa_config_txg || 158410100SLin.Ling@Sun.COM state == SPA_LOAD_IMPORT || spa->spa_load_verbatim) 15851635Sbonwick need_update = B_TRUE; 15861635Sbonwick 15878241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 15881635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 15891635Sbonwick need_update = B_TRUE; 15901585Sbonwick 15911585Sbonwick /* 15921635Sbonwick * Update the config cache asychronously in case we're the 15931635Sbonwick * root pool, in which case the config cache isn't writable yet. 15941585Sbonwick */ 15951635Sbonwick if (need_update) 15961635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 15978241SJeff.Bonwick@Sun.COM 15988241SJeff.Bonwick@Sun.COM /* 15998241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 16008241SJeff.Bonwick@Sun.COM */ 16018241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 16028241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 160310298SMatthew.Ahrens@Sun.COM 160410298SMatthew.Ahrens@Sun.COM /* 160510298SMatthew.Ahrens@Sun.COM * Delete any inconsistent datasets. 160610298SMatthew.Ahrens@Sun.COM */ 160710298SMatthew.Ahrens@Sun.COM (void) dmu_objset_find(spa_name(spa), 160810298SMatthew.Ahrens@Sun.COM dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 160910342Schris.kirby@sun.com 161010342Schris.kirby@sun.com /* 161110342Schris.kirby@sun.com * Clean up any stale temporary dataset userrefs. 161210342Schris.kirby@sun.com */ 161310342Schris.kirby@sun.com dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1614789Sahrens } 1615789Sahrens 16161544Seschrock error = 0; 16171544Seschrock out: 16187046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 16192082Seschrock if (error && error != EBADF) 16207294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 16211544Seschrock spa->spa_load_state = SPA_LOAD_NONE; 16221544Seschrock spa->spa_ena = 0; 16231544Seschrock 16241544Seschrock return (error); 1625789Sahrens } 1626789Sahrens 1627789Sahrens /* 1628789Sahrens * Pool Open/Import 1629789Sahrens * 1630789Sahrens * The import case is identical to an open except that the configuration is sent 1631789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1632789Sahrens * case of an open, the pool configuration will exist in the 16334451Seschrock * POOL_STATE_UNINITIALIZED state. 1634789Sahrens * 1635789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1636789Sahrens * the same time open the pool, without having to keep around the spa_t in some 1637789Sahrens * ambiguous state. 1638789Sahrens */ 1639789Sahrens static int 1640789Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1641789Sahrens { 1642789Sahrens spa_t *spa; 1643789Sahrens int error; 1644789Sahrens int locked = B_FALSE; 1645789Sahrens 1646789Sahrens *spapp = NULL; 1647789Sahrens 1648789Sahrens /* 1649789Sahrens * As disgusting as this is, we need to support recursive calls to this 1650789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1651789Sahrens * up calling spa_open() again. The real fix is to figure out how to 1652789Sahrens * avoid dsl_dir_open() calling this in the first place. 1653789Sahrens */ 1654789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1655789Sahrens mutex_enter(&spa_namespace_lock); 1656789Sahrens locked = B_TRUE; 1657789Sahrens } 1658789Sahrens 1659789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1660789Sahrens if (locked) 1661789Sahrens mutex_exit(&spa_namespace_lock); 1662789Sahrens return (ENOENT); 1663789Sahrens } 1664789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1665789Sahrens 16668241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 1667789Sahrens 16681635Sbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1669789Sahrens 1670789Sahrens if (error == EBADF) { 1671789Sahrens /* 16721986Seschrock * If vdev_validate() returns failure (indicated by 16731986Seschrock * EBADF), it indicates that one of the vdevs indicates 16741986Seschrock * that the pool has been exported or destroyed. If 16751986Seschrock * this is the case, the config cache is out of sync and 16761986Seschrock * we should remove the pool from the namespace. 1677789Sahrens */ 1678789Sahrens spa_unload(spa); 1679789Sahrens spa_deactivate(spa); 16806643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1681789Sahrens spa_remove(spa); 1682789Sahrens if (locked) 1683789Sahrens mutex_exit(&spa_namespace_lock); 1684789Sahrens return (ENOENT); 16851544Seschrock } 16861544Seschrock 16871544Seschrock if (error) { 1688789Sahrens /* 1689789Sahrens * We can't open the pool, but we still have useful 1690789Sahrens * information: the state of each vdev after the 1691789Sahrens * attempted vdev_open(). Return this to the user. 1692789Sahrens */ 16937754SJeff.Bonwick@Sun.COM if (config != NULL && spa->spa_root_vdev != NULL) 1694789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1695789Sahrens B_TRUE); 1696789Sahrens spa_unload(spa); 1697789Sahrens spa_deactivate(spa); 16981544Seschrock spa->spa_last_open_failed = B_TRUE; 1699789Sahrens if (locked) 1700789Sahrens mutex_exit(&spa_namespace_lock); 1701789Sahrens *spapp = NULL; 1702789Sahrens return (error); 17031544Seschrock } else { 17041544Seschrock spa->spa_last_open_failed = B_FALSE; 1705789Sahrens } 1706789Sahrens } 1707789Sahrens 1708789Sahrens spa_open_ref(spa, tag); 17094451Seschrock 1710789Sahrens if (locked) 1711789Sahrens mutex_exit(&spa_namespace_lock); 1712789Sahrens 1713789Sahrens *spapp = spa; 1714789Sahrens 17157754SJeff.Bonwick@Sun.COM if (config != NULL) 1716789Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1717789Sahrens 1718789Sahrens return (0); 1719789Sahrens } 1720789Sahrens 1721789Sahrens int 1722789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1723789Sahrens { 1724789Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1725789Sahrens } 1726789Sahrens 17271544Seschrock /* 17281544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 17291544Seschrock * preventing it from being exported or destroyed. 17301544Seschrock */ 17311544Seschrock spa_t * 17321544Seschrock spa_inject_addref(char *name) 17331544Seschrock { 17341544Seschrock spa_t *spa; 17351544Seschrock 17361544Seschrock mutex_enter(&spa_namespace_lock); 17371544Seschrock if ((spa = spa_lookup(name)) == NULL) { 17381544Seschrock mutex_exit(&spa_namespace_lock); 17391544Seschrock return (NULL); 17401544Seschrock } 17411544Seschrock spa->spa_inject_ref++; 17421544Seschrock mutex_exit(&spa_namespace_lock); 17431544Seschrock 17441544Seschrock return (spa); 17451544Seschrock } 17461544Seschrock 17471544Seschrock void 17481544Seschrock spa_inject_delref(spa_t *spa) 17491544Seschrock { 17501544Seschrock mutex_enter(&spa_namespace_lock); 17511544Seschrock spa->spa_inject_ref--; 17521544Seschrock mutex_exit(&spa_namespace_lock); 17531544Seschrock } 17541544Seschrock 17555450Sbrendan /* 17565450Sbrendan * Add spares device information to the nvlist. 17575450Sbrendan */ 17582082Seschrock static void 17592082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 17602082Seschrock { 17612082Seschrock nvlist_t **spares; 17622082Seschrock uint_t i, nspares; 17632082Seschrock nvlist_t *nvroot; 17642082Seschrock uint64_t guid; 17652082Seschrock vdev_stat_t *vs; 17662082Seschrock uint_t vsc; 17673377Seschrock uint64_t pool; 17682082Seschrock 17699425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17709425SEric.Schrock@Sun.COM 17715450Sbrendan if (spa->spa_spares.sav_count == 0) 17722082Seschrock return; 17732082Seschrock 17742082Seschrock VERIFY(nvlist_lookup_nvlist(config, 17752082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 17765450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 17772082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17782082Seschrock if (nspares != 0) { 17792082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 17802082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 17812082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 17822082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 17832082Seschrock 17842082Seschrock /* 17852082Seschrock * Go through and find any spares which have since been 17862082Seschrock * repurposed as an active spare. If this is the case, update 17872082Seschrock * their status appropriately. 17882082Seschrock */ 17892082Seschrock for (i = 0; i < nspares; i++) { 17902082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 17912082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 17927214Slling if (spa_spare_exists(guid, &pool, NULL) && 17937214Slling pool != 0ULL) { 17942082Seschrock VERIFY(nvlist_lookup_uint64_array( 17952082Seschrock spares[i], ZPOOL_CONFIG_STATS, 17962082Seschrock (uint64_t **)&vs, &vsc) == 0); 17972082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 17982082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 17992082Seschrock } 18002082Seschrock } 18012082Seschrock } 18022082Seschrock } 18032082Seschrock 18045450Sbrendan /* 18055450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 18065450Sbrendan */ 18075450Sbrendan static void 18085450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 18095450Sbrendan { 18105450Sbrendan nvlist_t **l2cache; 18115450Sbrendan uint_t i, j, nl2cache; 18125450Sbrendan nvlist_t *nvroot; 18135450Sbrendan uint64_t guid; 18145450Sbrendan vdev_t *vd; 18155450Sbrendan vdev_stat_t *vs; 18165450Sbrendan uint_t vsc; 18175450Sbrendan 18189425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 18199425SEric.Schrock@Sun.COM 18205450Sbrendan if (spa->spa_l2cache.sav_count == 0) 18215450Sbrendan return; 18225450Sbrendan 18235450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 18245450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 18255450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 18265450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18275450Sbrendan if (nl2cache != 0) { 18285450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 18295450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 18305450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 18315450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 18325450Sbrendan 18335450Sbrendan /* 18345450Sbrendan * Update level 2 cache device stats. 18355450Sbrendan */ 18365450Sbrendan 18375450Sbrendan for (i = 0; i < nl2cache; i++) { 18385450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 18395450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 18405450Sbrendan 18415450Sbrendan vd = NULL; 18425450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 18435450Sbrendan if (guid == 18445450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 18455450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 18465450Sbrendan break; 18475450Sbrendan } 18485450Sbrendan } 18495450Sbrendan ASSERT(vd != NULL); 18505450Sbrendan 18515450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 18525450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 18535450Sbrendan vdev_get_stats(vd, vs); 18545450Sbrendan } 18555450Sbrendan } 18565450Sbrendan } 18575450Sbrendan 1858789Sahrens int 18591544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1860789Sahrens { 1861789Sahrens int error; 1862789Sahrens spa_t *spa; 1863789Sahrens 1864789Sahrens *config = NULL; 1865789Sahrens error = spa_open_common(name, &spa, FTAG, config); 1866789Sahrens 18679425SEric.Schrock@Sun.COM if (spa != NULL) { 18689425SEric.Schrock@Sun.COM /* 18699425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 18709425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 18719425SEric.Schrock@Sun.COM * self-inconsistent. 18729425SEric.Schrock@Sun.COM */ 18739425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 18749425SEric.Schrock@Sun.COM 18759425SEric.Schrock@Sun.COM if (*config != NULL) { 18767754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 18779425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 18789425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 18799425SEric.Schrock@Sun.COM 18809425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 18819425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 18829425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 18839425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 18849425SEric.Schrock@Sun.COM 18859425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 18869425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 18879425SEric.Schrock@Sun.COM } 18882082Seschrock } 18892082Seschrock 18901544Seschrock /* 18911544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 18921544Seschrock * and call spa_lookup() directly. 18931544Seschrock */ 18941544Seschrock if (altroot) { 18951544Seschrock if (spa == NULL) { 18961544Seschrock mutex_enter(&spa_namespace_lock); 18971544Seschrock spa = spa_lookup(name); 18981544Seschrock if (spa) 18991544Seschrock spa_altroot(spa, altroot, buflen); 19001544Seschrock else 19011544Seschrock altroot[0] = '\0'; 19021544Seschrock spa = NULL; 19031544Seschrock mutex_exit(&spa_namespace_lock); 19041544Seschrock } else { 19051544Seschrock spa_altroot(spa, altroot, buflen); 19061544Seschrock } 19071544Seschrock } 19081544Seschrock 19099425SEric.Schrock@Sun.COM if (spa != NULL) { 19109425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 1911789Sahrens spa_close(spa, FTAG); 19129425SEric.Schrock@Sun.COM } 1913789Sahrens 1914789Sahrens return (error); 1915789Sahrens } 1916789Sahrens 1917789Sahrens /* 19185450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 19195450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 19205450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 19215450Sbrendan * specified, as long as they are well-formed. 19222082Seschrock */ 19232082Seschrock static int 19245450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 19255450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 19265450Sbrendan vdev_labeltype_t label) 19272082Seschrock { 19285450Sbrendan nvlist_t **dev; 19295450Sbrendan uint_t i, ndev; 19302082Seschrock vdev_t *vd; 19312082Seschrock int error; 19322082Seschrock 19337754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 19347754SJeff.Bonwick@Sun.COM 19352082Seschrock /* 19365450Sbrendan * It's acceptable to have no devs specified. 19372082Seschrock */ 19385450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 19392082Seschrock return (0); 19402082Seschrock 19415450Sbrendan if (ndev == 0) 19422082Seschrock return (EINVAL); 19432082Seschrock 19442082Seschrock /* 19455450Sbrendan * Make sure the pool is formatted with a version that supports this 19465450Sbrendan * device type. 19472082Seschrock */ 19485450Sbrendan if (spa_version(spa) < version) 19492082Seschrock return (ENOTSUP); 19502082Seschrock 19513377Seschrock /* 19525450Sbrendan * Set the pending device list so we correctly handle device in-use 19533377Seschrock * checking. 19543377Seschrock */ 19555450Sbrendan sav->sav_pending = dev; 19565450Sbrendan sav->sav_npending = ndev; 19575450Sbrendan 19585450Sbrendan for (i = 0; i < ndev; i++) { 19595450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 19602082Seschrock mode)) != 0) 19613377Seschrock goto out; 19622082Seschrock 19632082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 19642082Seschrock vdev_free(vd); 19653377Seschrock error = EINVAL; 19663377Seschrock goto out; 19672082Seschrock } 19682082Seschrock 19695450Sbrendan /* 19707754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 19717754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 19725450Sbrendan */ 19737754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 19745450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 19755450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 19765450Sbrendan error = ENOTBLK; 19775450Sbrendan goto out; 19785450Sbrendan } 19797754SJeff.Bonwick@Sun.COM #endif 19802082Seschrock vd->vdev_top = vd; 19813377Seschrock 19823377Seschrock if ((error = vdev_open(vd)) == 0 && 19835450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 19845450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 19853377Seschrock vd->vdev_guid) == 0); 19862082Seschrock } 19872082Seschrock 19882082Seschrock vdev_free(vd); 19893377Seschrock 19905450Sbrendan if (error && 19915450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 19923377Seschrock goto out; 19933377Seschrock else 19943377Seschrock error = 0; 19952082Seschrock } 19962082Seschrock 19973377Seschrock out: 19985450Sbrendan sav->sav_pending = NULL; 19995450Sbrendan sav->sav_npending = 0; 20003377Seschrock return (error); 20012082Seschrock } 20022082Seschrock 20035450Sbrendan static int 20045450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 20055450Sbrendan { 20065450Sbrendan int error; 20075450Sbrendan 20087754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 20097754SJeff.Bonwick@Sun.COM 20105450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20115450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 20125450Sbrendan VDEV_LABEL_SPARE)) != 0) { 20135450Sbrendan return (error); 20145450Sbrendan } 20155450Sbrendan 20165450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 20175450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 20185450Sbrendan VDEV_LABEL_L2CACHE)); 20195450Sbrendan } 20205450Sbrendan 20215450Sbrendan static void 20225450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 20235450Sbrendan const char *config) 20245450Sbrendan { 20255450Sbrendan int i; 20265450Sbrendan 20275450Sbrendan if (sav->sav_config != NULL) { 20285450Sbrendan nvlist_t **olddevs; 20295450Sbrendan uint_t oldndevs; 20305450Sbrendan nvlist_t **newdevs; 20315450Sbrendan 20325450Sbrendan /* 20335450Sbrendan * Generate new dev list by concatentating with the 20345450Sbrendan * current dev list. 20355450Sbrendan */ 20365450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 20375450Sbrendan &olddevs, &oldndevs) == 0); 20385450Sbrendan 20395450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 20405450Sbrendan (ndevs + oldndevs), KM_SLEEP); 20415450Sbrendan for (i = 0; i < oldndevs; i++) 20425450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 20435450Sbrendan KM_SLEEP) == 0); 20445450Sbrendan for (i = 0; i < ndevs; i++) 20455450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 20465450Sbrendan KM_SLEEP) == 0); 20475450Sbrendan 20485450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 20495450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 20505450Sbrendan 20515450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 20525450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 20535450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 20545450Sbrendan nvlist_free(newdevs[i]); 20555450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 20565450Sbrendan } else { 20575450Sbrendan /* 20585450Sbrendan * Generate a new dev list. 20595450Sbrendan */ 20605450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 20615450Sbrendan KM_SLEEP) == 0); 20625450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 20635450Sbrendan devs, ndevs) == 0); 20645450Sbrendan } 20655450Sbrendan } 20665450Sbrendan 20675450Sbrendan /* 20685450Sbrendan * Stop and drop level 2 ARC devices 20695450Sbrendan */ 20705450Sbrendan void 20715450Sbrendan spa_l2cache_drop(spa_t *spa) 20725450Sbrendan { 20735450Sbrendan vdev_t *vd; 20745450Sbrendan int i; 20755450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 20765450Sbrendan 20775450Sbrendan for (i = 0; i < sav->sav_count; i++) { 20785450Sbrendan uint64_t pool; 20795450Sbrendan 20805450Sbrendan vd = sav->sav_vdevs[i]; 20815450Sbrendan ASSERT(vd != NULL); 20825450Sbrendan 20838241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 20848241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 20855450Sbrendan l2arc_remove_vdev(vd); 20865450Sbrendan if (vd->vdev_isl2cache) 20875450Sbrendan spa_l2cache_remove(vd); 20885450Sbrendan vdev_clear_stats(vd); 20895450Sbrendan (void) vdev_close(vd); 20905450Sbrendan } 20915450Sbrendan } 20925450Sbrendan 20932082Seschrock /* 2094789Sahrens * Pool Creation 2095789Sahrens */ 2096789Sahrens int 20975094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 20987184Stimh const char *history_str, nvlist_t *zplprops) 2099789Sahrens { 2100789Sahrens spa_t *spa; 21015094Slling char *altroot = NULL; 21021635Sbonwick vdev_t *rvd; 2103789Sahrens dsl_pool_t *dp; 2104789Sahrens dmu_tx_t *tx; 21059816SGeorge.Wilson@Sun.COM int error = 0; 2106789Sahrens uint64_t txg = TXG_INITIAL; 21075450Sbrendan nvlist_t **spares, **l2cache; 21085450Sbrendan uint_t nspares, nl2cache; 21095094Slling uint64_t version; 2110789Sahrens 2111789Sahrens /* 2112789Sahrens * If this pool already exists, return failure. 2113789Sahrens */ 2114789Sahrens mutex_enter(&spa_namespace_lock); 2115789Sahrens if (spa_lookup(pool) != NULL) { 2116789Sahrens mutex_exit(&spa_namespace_lock); 2117789Sahrens return (EEXIST); 2118789Sahrens } 2119789Sahrens 2120789Sahrens /* 2121789Sahrens * Allocate a new spa_t structure. 2122789Sahrens */ 21235094Slling (void) nvlist_lookup_string(props, 21245094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21251635Sbonwick spa = spa_add(pool, altroot); 21268241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2127789Sahrens 2128789Sahrens spa->spa_uberblock.ub_txg = txg - 1; 21295094Slling 21305094Slling if (props && (error = spa_prop_validate(spa, props))) { 21315094Slling spa_deactivate(spa); 21325094Slling spa_remove(spa); 21336643Seschrock mutex_exit(&spa_namespace_lock); 21345094Slling return (error); 21355094Slling } 21365094Slling 21375094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 21385094Slling &version) != 0) 21395094Slling version = SPA_VERSION; 21405094Slling ASSERT(version <= SPA_VERSION); 21415094Slling spa->spa_uberblock.ub_version = version; 2142789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2143789Sahrens 21441635Sbonwick /* 21459234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 21469234SGeorge.Wilson@Sun.COM */ 21479630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 21489630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 21499234SGeorge.Wilson@Sun.COM 21509234SGeorge.Wilson@Sun.COM /* 21511635Sbonwick * Create the root vdev. 21521635Sbonwick */ 21537754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21541635Sbonwick 21552082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21562082Seschrock 21572082Seschrock ASSERT(error != 0 || rvd != NULL); 21582082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21592082Seschrock 21605913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21611635Sbonwick error = EINVAL; 21622082Seschrock 21632082Seschrock if (error == 0 && 21642082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 21655450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 21662082Seschrock VDEV_ALLOC_ADD)) == 0) { 21679816SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 21689816SGeorge.Wilson@Sun.COM vdev_metaslab_set_size(rvd->vdev_child[c]); 21699816SGeorge.Wilson@Sun.COM vdev_expand(rvd->vdev_child[c], txg); 21709816SGeorge.Wilson@Sun.COM } 21711635Sbonwick } 21721635Sbonwick 21737754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2174789Sahrens 21752082Seschrock if (error != 0) { 2176789Sahrens spa_unload(spa); 2177789Sahrens spa_deactivate(spa); 2178789Sahrens spa_remove(spa); 2179789Sahrens mutex_exit(&spa_namespace_lock); 2180789Sahrens return (error); 2181789Sahrens } 2182789Sahrens 21832082Seschrock /* 21842082Seschrock * Get the list of spares, if specified. 21852082Seschrock */ 21862082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 21872082Seschrock &spares, &nspares) == 0) { 21885450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 21892082Seschrock KM_SLEEP) == 0); 21905450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 21912082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 21927754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21932082Seschrock spa_load_spares(spa); 21947754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 21955450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 21965450Sbrendan } 21975450Sbrendan 21985450Sbrendan /* 21995450Sbrendan * Get the list of level 2 cache devices, if specified. 22005450Sbrendan */ 22015450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 22025450Sbrendan &l2cache, &nl2cache) == 0) { 22035450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 22045450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 22055450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 22065450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 22077754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 22085450Sbrendan spa_load_l2cache(spa); 22097754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 22105450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 22112082Seschrock } 22122082Seschrock 22137184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2214789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2215789Sahrens 2216789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2217789Sahrens 2218789Sahrens /* 2219789Sahrens * Create the pool config object. 2220789Sahrens */ 2221789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 22227497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2223789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2224789Sahrens 22251544Seschrock if (zap_add(spa->spa_meta_objset, 2226789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 22271544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 22281544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 22291544Seschrock } 2230789Sahrens 22315094Slling /* Newly created pools with the right version are always deflated. */ 22325094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 22335094Slling spa->spa_deflate = TRUE; 22345094Slling if (zap_add(spa->spa_meta_objset, 22355094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 22365094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 22375094Slling cmn_err(CE_PANIC, "failed to add deflate"); 22385094Slling } 22392082Seschrock } 22402082Seschrock 2241789Sahrens /* 2242789Sahrens * Create the deferred-free bplist object. Turn off compression 2243789Sahrens * because sync-to-convergence takes longer if the blocksize 2244789Sahrens * keeps changing. 2245789Sahrens */ 2246789Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2247789Sahrens 1 << 14, tx); 2248789Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2249789Sahrens ZIO_COMPRESS_OFF, tx); 2250789Sahrens 22511544Seschrock if (zap_add(spa->spa_meta_objset, 2252789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 22531544Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 22541544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 22551544Seschrock } 2256789Sahrens 22572926Sek110237 /* 22582926Sek110237 * Create the pool's history object. 22592926Sek110237 */ 22605094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 22615094Slling spa_history_create_obj(spa, tx); 22625094Slling 22635094Slling /* 22645094Slling * Set pool properties. 22655094Slling */ 22665094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 22675094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22685329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 22699816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 22708525SEric.Schrock@Sun.COM if (props != NULL) { 22718525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 22725094Slling spa_sync_props(spa, props, CRED(), tx); 22738525SEric.Schrock@Sun.COM } 22742926Sek110237 2275789Sahrens dmu_tx_commit(tx); 2276789Sahrens 2277789Sahrens spa->spa_sync_on = B_TRUE; 2278789Sahrens txg_sync_start(spa->spa_dsl_pool); 2279789Sahrens 2280789Sahrens /* 2281789Sahrens * We explicitly wait for the first transaction to complete so that our 2282789Sahrens * bean counters are appropriately updated. 2283789Sahrens */ 2284789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2285789Sahrens 22866643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2287789Sahrens 22885094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 22894715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 22909946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_CREATE); 22914715Sek110237 22928667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 22938667SGeorge.Wilson@Sun.COM 2294789Sahrens mutex_exit(&spa_namespace_lock); 2295789Sahrens 2296789Sahrens return (0); 2297789Sahrens } 2298789Sahrens 22996423Sgw25295 #ifdef _KERNEL 23006423Sgw25295 /* 23019790SLin.Ling@Sun.COM * Get the root pool information from the root disk, then import the root pool 23029790SLin.Ling@Sun.COM * during the system boot up time. 23036423Sgw25295 */ 23049790SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 23059790SLin.Ling@Sun.COM 23069790SLin.Ling@Sun.COM static nvlist_t * 23079790SLin.Ling@Sun.COM spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 23086423Sgw25295 { 23099790SLin.Ling@Sun.COM nvlist_t *config; 23106423Sgw25295 nvlist_t *nvtop, *nvroot; 23116423Sgw25295 uint64_t pgid; 23126423Sgw25295 23139790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 23149790SLin.Ling@Sun.COM return (NULL); 23159790SLin.Ling@Sun.COM 23166423Sgw25295 /* 23176423Sgw25295 * Add this top-level vdev to the child array. 23186423Sgw25295 */ 23199790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 23209790SLin.Ling@Sun.COM &nvtop) == 0); 23219790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 23229790SLin.Ling@Sun.COM &pgid) == 0); 23239790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 23246423Sgw25295 23256423Sgw25295 /* 23266423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 23276423Sgw25295 */ 23286423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 23299790SLin.Ling@Sun.COM VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 23309790SLin.Ling@Sun.COM VDEV_TYPE_ROOT) == 0); 23316423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 23326423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 23336423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 23346423Sgw25295 &nvtop, 1) == 0); 23356423Sgw25295 23366423Sgw25295 /* 23376423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 23386423Sgw25295 * this pool's configuration (remove the old, add the new). 23396423Sgw25295 */ 23406423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 23416423Sgw25295 nvlist_free(nvroot); 23429790SLin.Ling@Sun.COM return (config); 23436423Sgw25295 } 23446423Sgw25295 23456423Sgw25295 /* 23469790SLin.Ling@Sun.COM * Walk the vdev tree and see if we can find a device with "better" 23479790SLin.Ling@Sun.COM * configuration. A configuration is "better" if the label on that 23489790SLin.Ling@Sun.COM * device has a more recent txg. 23496423Sgw25295 */ 23509790SLin.Ling@Sun.COM static void 23519790SLin.Ling@Sun.COM spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 23527147Staylor { 23539816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 23549790SLin.Ling@Sun.COM spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 23559790SLin.Ling@Sun.COM 23569790SLin.Ling@Sun.COM if (vd->vdev_ops->vdev_op_leaf) { 23579790SLin.Ling@Sun.COM nvlist_t *label; 23589790SLin.Ling@Sun.COM uint64_t label_txg; 23599790SLin.Ling@Sun.COM 23609790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 23619790SLin.Ling@Sun.COM &label) != 0) 23629790SLin.Ling@Sun.COM return; 23639790SLin.Ling@Sun.COM 23649790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 23659790SLin.Ling@Sun.COM &label_txg) == 0); 23669790SLin.Ling@Sun.COM 23679790SLin.Ling@Sun.COM /* 23689790SLin.Ling@Sun.COM * Do we have a better boot device? 23699790SLin.Ling@Sun.COM */ 23709790SLin.Ling@Sun.COM if (label_txg > *txg) { 23719790SLin.Ling@Sun.COM *txg = label_txg; 23729790SLin.Ling@Sun.COM *avd = vd; 23737147Staylor } 23749790SLin.Ling@Sun.COM nvlist_free(label); 23757147Staylor } 23767147Staylor } 23777147Staylor 23786423Sgw25295 /* 23796423Sgw25295 * Import a root pool. 23806423Sgw25295 * 23817147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 23827147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 23837147Staylor * The GRUB "findroot" command will return the vdev we should boot. 23846423Sgw25295 * 23856423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 23866423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 23876423Sgw25295 * e.g. 23886423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 23896423Sgw25295 */ 23906423Sgw25295 int 23917147Staylor spa_import_rootpool(char *devpath, char *devid) 23926423Sgw25295 { 23939790SLin.Ling@Sun.COM spa_t *spa; 23949790SLin.Ling@Sun.COM vdev_t *rvd, *bvd, *avd = NULL; 23959790SLin.Ling@Sun.COM nvlist_t *config, *nvtop; 23969790SLin.Ling@Sun.COM uint64_t guid, txg; 23976423Sgw25295 char *pname; 23986423Sgw25295 int error; 23996423Sgw25295 24006423Sgw25295 /* 24019790SLin.Ling@Sun.COM * Read the label from the boot device and generate a configuration. 24026423Sgw25295 */ 24039790SLin.Ling@Sun.COM if ((config = spa_generate_rootconf(devpath, devid, &guid)) == NULL) { 24049790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 24059790SLin.Ling@Sun.COM devpath); 24069790SLin.Ling@Sun.COM return (EIO); 24079790SLin.Ling@Sun.COM } 24089790SLin.Ling@Sun.COM 24099790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 24109790SLin.Ling@Sun.COM &pname) == 0); 24119790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 24126423Sgw25295 24139425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 24149425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 24159425SEric.Schrock@Sun.COM /* 24169425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 24179425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 24189425SEric.Schrock@Sun.COM */ 24199425SEric.Schrock@Sun.COM spa_remove(spa); 24209425SEric.Schrock@Sun.COM } 24219425SEric.Schrock@Sun.COM 24229425SEric.Schrock@Sun.COM spa = spa_add(pname, NULL); 24239425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 242410100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 24259790SLin.Ling@Sun.COM 24269790SLin.Ling@Sun.COM /* 24279790SLin.Ling@Sun.COM * Build up a vdev tree based on the boot device's label config. 24289790SLin.Ling@Sun.COM */ 24299790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 24309790SLin.Ling@Sun.COM &nvtop) == 0); 24319790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24329790SLin.Ling@Sun.COM error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 24339790SLin.Ling@Sun.COM VDEV_ALLOC_ROOTPOOL); 24349790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24359790SLin.Ling@Sun.COM if (error) { 24369790SLin.Ling@Sun.COM mutex_exit(&spa_namespace_lock); 24379790SLin.Ling@Sun.COM nvlist_free(config); 24389790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 24399790SLin.Ling@Sun.COM pname); 24409790SLin.Ling@Sun.COM return (error); 24419790SLin.Ling@Sun.COM } 24429790SLin.Ling@Sun.COM 24439790SLin.Ling@Sun.COM /* 24449790SLin.Ling@Sun.COM * Get the boot vdev. 24459790SLin.Ling@Sun.COM */ 24469790SLin.Ling@Sun.COM if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 24479790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 24489790SLin.Ling@Sun.COM (u_longlong_t)guid); 24499790SLin.Ling@Sun.COM error = ENOENT; 24509790SLin.Ling@Sun.COM goto out; 24519790SLin.Ling@Sun.COM } 24529790SLin.Ling@Sun.COM 24539790SLin.Ling@Sun.COM /* 24549790SLin.Ling@Sun.COM * Determine if there is a better boot device. 24559790SLin.Ling@Sun.COM */ 24569790SLin.Ling@Sun.COM avd = bvd; 24579790SLin.Ling@Sun.COM spa_alt_rootvdev(rvd, &avd, &txg); 24589790SLin.Ling@Sun.COM if (avd != bvd) { 24599790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 24609790SLin.Ling@Sun.COM "try booting from '%s'", avd->vdev_path); 24619790SLin.Ling@Sun.COM error = EINVAL; 24629790SLin.Ling@Sun.COM goto out; 24639790SLin.Ling@Sun.COM } 24649790SLin.Ling@Sun.COM 24659790SLin.Ling@Sun.COM /* 24669790SLin.Ling@Sun.COM * If the boot device is part of a spare vdev then ensure that 24679790SLin.Ling@Sun.COM * we're booting off the active spare. 24689790SLin.Ling@Sun.COM */ 24699790SLin.Ling@Sun.COM if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 24709790SLin.Ling@Sun.COM !bvd->vdev_isspare) { 24719790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is currently spared. Please " 24729790SLin.Ling@Sun.COM "try booting from '%s'", 24739790SLin.Ling@Sun.COM bvd->vdev_parent->vdev_child[1]->vdev_path); 24749790SLin.Ling@Sun.COM error = EINVAL; 24759790SLin.Ling@Sun.COM goto out; 24769790SLin.Ling@Sun.COM } 24779790SLin.Ling@Sun.COM 24789790SLin.Ling@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 24799790SLin.Ling@Sun.COM error = 0; 24809946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 24819790SLin.Ling@Sun.COM out: 24829790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24839790SLin.Ling@Sun.COM vdev_free(rvd); 24849790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 24859425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 24866423Sgw25295 24879790SLin.Ling@Sun.COM nvlist_free(config); 24886423Sgw25295 return (error); 24896423Sgw25295 } 24909790SLin.Ling@Sun.COM 24916423Sgw25295 #endif 24926423Sgw25295 24936423Sgw25295 /* 24949425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 24959425SEric.Schrock@Sun.COM * boot. 24969425SEric.Schrock@Sun.COM */ 24979425SEric.Schrock@Sun.COM int 24989425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 24999425SEric.Schrock@Sun.COM { 25009425SEric.Schrock@Sun.COM spa_t *spa; 25019425SEric.Schrock@Sun.COM char *altroot = NULL; 25029425SEric.Schrock@Sun.COM 25039425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25049425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 25059425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25069425SEric.Schrock@Sun.COM return (EEXIST); 25079425SEric.Schrock@Sun.COM } 25089425SEric.Schrock@Sun.COM 25099425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25109425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25119425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25129425SEric.Schrock@Sun.COM 251310100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 251410000SVictor.Latushkin@Sun.COM 25159425SEric.Schrock@Sun.COM VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25169425SEric.Schrock@Sun.COM 25179425SEric.Schrock@Sun.COM if (props != NULL) 25189425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25199425SEric.Schrock@Sun.COM 25209425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 25219425SEric.Schrock@Sun.COM 25229425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25239946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 25249425SEric.Schrock@Sun.COM 25259425SEric.Schrock@Sun.COM return (0); 25269425SEric.Schrock@Sun.COM } 25279425SEric.Schrock@Sun.COM 25289425SEric.Schrock@Sun.COM /* 25296423Sgw25295 * Import a non-root pool into the system. 25306423Sgw25295 */ 25316423Sgw25295 int 25326423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 25336423Sgw25295 { 25349425SEric.Schrock@Sun.COM spa_t *spa; 25359425SEric.Schrock@Sun.COM char *altroot = NULL; 25369425SEric.Schrock@Sun.COM int error; 25379425SEric.Schrock@Sun.COM nvlist_t *nvroot; 25389425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 25399425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 25409425SEric.Schrock@Sun.COM 25419425SEric.Schrock@Sun.COM /* 25429425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 25439425SEric.Schrock@Sun.COM */ 25449425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 25459425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 25469425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 25479425SEric.Schrock@Sun.COM return (EEXIST); 25489425SEric.Schrock@Sun.COM } 25499425SEric.Schrock@Sun.COM 25509425SEric.Schrock@Sun.COM /* 25519425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 25529425SEric.Schrock@Sun.COM */ 25539425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 25549425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25559425SEric.Schrock@Sun.COM spa = spa_add(pool, altroot); 25569425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 25579425SEric.Schrock@Sun.COM 25589425SEric.Schrock@Sun.COM /* 25599630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 25609630SJeff.Bonwick@Sun.COM */ 25619630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 25629630SJeff.Bonwick@Sun.COM 25639630SJeff.Bonwick@Sun.COM /* 25649425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25659425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 25669425SEric.Schrock@Sun.COM * doing an import. 25679425SEric.Schrock@Sun.COM */ 25689425SEric.Schrock@Sun.COM error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 25699425SEric.Schrock@Sun.COM 25709425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 25719425SEric.Schrock@Sun.COM /* 25729425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 25739425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 25749425SEric.Schrock@Sun.COM */ 25759425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 25769425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 25779425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 25789425SEric.Schrock@Sun.COM spa_load_spares(spa); 25799425SEric.Schrock@Sun.COM } 25809425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 25819425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 25829425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 25839425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 25849425SEric.Schrock@Sun.COM } 25859425SEric.Schrock@Sun.COM 25869425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 25879425SEric.Schrock@Sun.COM &nvroot) == 0); 25889425SEric.Schrock@Sun.COM if (error == 0) 25899425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25909425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 25919425SEric.Schrock@Sun.COM if (error == 0) 25929425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 25939425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 25949425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 25959425SEric.Schrock@Sun.COM 25969425SEric.Schrock@Sun.COM if (props != NULL) 25979425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 25989425SEric.Schrock@Sun.COM 25999425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 26009425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 26019425SEric.Schrock@Sun.COM spa_unload(spa); 26029425SEric.Schrock@Sun.COM spa_deactivate(spa); 26039425SEric.Schrock@Sun.COM spa_remove(spa); 26049425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26059425SEric.Schrock@Sun.COM return (error); 26069425SEric.Schrock@Sun.COM } 26079425SEric.Schrock@Sun.COM 26089630SJeff.Bonwick@Sun.COM spa_async_resume(spa); 26099630SJeff.Bonwick@Sun.COM 26109425SEric.Schrock@Sun.COM /* 26119425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 26129425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 26139425SEric.Schrock@Sun.COM */ 26149425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26159425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 26169425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 26179425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 26189425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 26199425SEric.Schrock@Sun.COM else 26209425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 26219425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26229425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26239425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26249425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26259425SEric.Schrock@Sun.COM spa_load_spares(spa); 26269425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26279425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 26289425SEric.Schrock@Sun.COM } 26299425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26309425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 26319425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 26329425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26339425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26349425SEric.Schrock@Sun.COM else 26359425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26369425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 26379425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26389425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26399425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26409425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 26419425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26429425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 26439425SEric.Schrock@Sun.COM } 26449425SEric.Schrock@Sun.COM 26459425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 26469425SEric.Schrock@Sun.COM /* 26479425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 26489425SEric.Schrock@Sun.COM */ 264910100SLin.Ling@Sun.COM spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 26509425SEric.Schrock@Sun.COM } 26519425SEric.Schrock@Sun.COM 26529816SGeorge.Wilson@Sun.COM /* 26539816SGeorge.Wilson@Sun.COM * It's possible that the pool was expanded while it was exported. 26549816SGeorge.Wilson@Sun.COM * We kick off an async task to handle this for us. 26559816SGeorge.Wilson@Sun.COM */ 26569816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 26579816SGeorge.Wilson@Sun.COM 26589425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 26599946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 26609425SEric.Schrock@Sun.COM 26619425SEric.Schrock@Sun.COM return (0); 26626643Seschrock } 26636643Seschrock 26646643Seschrock 2665789Sahrens /* 2666789Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2667789Sahrens * to get the vdev stats associated with the imported devices. 2668789Sahrens */ 2669789Sahrens #define TRYIMPORT_NAME "$import" 2670789Sahrens 2671789Sahrens nvlist_t * 2672789Sahrens spa_tryimport(nvlist_t *tryconfig) 2673789Sahrens { 2674789Sahrens nvlist_t *config = NULL; 2675789Sahrens char *poolname; 2676789Sahrens spa_t *spa; 2677789Sahrens uint64_t state; 26788680SLin.Ling@Sun.COM int error; 2679789Sahrens 2680789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2681789Sahrens return (NULL); 2682789Sahrens 2683789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2684789Sahrens return (NULL); 2685789Sahrens 26861635Sbonwick /* 26871635Sbonwick * Create and initialize the spa structure. 26881635Sbonwick */ 2689789Sahrens mutex_enter(&spa_namespace_lock); 26901635Sbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 26918241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 2692789Sahrens 2693789Sahrens /* 26941635Sbonwick * Pass off the heavy lifting to spa_load(). 26951732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 26961732Sbonwick * is actually the one to trust when doing an import. 2697789Sahrens */ 26988680SLin.Ling@Sun.COM error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2699789Sahrens 2700789Sahrens /* 2701789Sahrens * If 'tryconfig' was at least parsable, return the current config. 2702789Sahrens */ 2703789Sahrens if (spa->spa_root_vdev != NULL) { 2704789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2705789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2706789Sahrens poolname) == 0); 2707789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2708789Sahrens state) == 0); 27093975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 27103975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 27112082Seschrock 27122082Seschrock /* 27136423Sgw25295 * If the bootfs property exists on this pool then we 27146423Sgw25295 * copy it out so that external consumers can tell which 27156423Sgw25295 * pools are bootable. 27166423Sgw25295 */ 27178680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 27186423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27196423Sgw25295 27206423Sgw25295 /* 27216423Sgw25295 * We have to play games with the name since the 27226423Sgw25295 * pool was opened as TRYIMPORT_NAME. 27236423Sgw25295 */ 27247754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 27256423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 27266423Sgw25295 char *cp; 27276423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 27286423Sgw25295 27296423Sgw25295 cp = strchr(tmpname, '/'); 27306423Sgw25295 if (cp == NULL) { 27316423Sgw25295 (void) strlcpy(dsname, tmpname, 27326423Sgw25295 MAXPATHLEN); 27336423Sgw25295 } else { 27346423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 27356423Sgw25295 "%s/%s", poolname, ++cp); 27366423Sgw25295 } 27376423Sgw25295 VERIFY(nvlist_add_string(config, 27386423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 27396423Sgw25295 kmem_free(dsname, MAXPATHLEN); 27406423Sgw25295 } 27416423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 27426423Sgw25295 } 27436423Sgw25295 27446423Sgw25295 /* 27455450Sbrendan * Add the list of hot spares and level 2 cache devices. 27462082Seschrock */ 27479425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 27482082Seschrock spa_add_spares(spa, config); 27495450Sbrendan spa_add_l2cache(spa, config); 27509425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2751789Sahrens } 2752789Sahrens 2753789Sahrens spa_unload(spa); 2754789Sahrens spa_deactivate(spa); 2755789Sahrens spa_remove(spa); 2756789Sahrens mutex_exit(&spa_namespace_lock); 2757789Sahrens 2758789Sahrens return (config); 2759789Sahrens } 2760789Sahrens 2761789Sahrens /* 2762789Sahrens * Pool export/destroy 2763789Sahrens * 2764789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2765789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2766789Sahrens * update the pool state and sync all the labels to disk, removing the 27678211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 27688211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 2769789Sahrens */ 2770789Sahrens static int 27717214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 27728211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 2773789Sahrens { 2774789Sahrens spa_t *spa; 2775789Sahrens 27761775Sbillm if (oldconfig) 27771775Sbillm *oldconfig = NULL; 27781775Sbillm 27798241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 2780789Sahrens return (EROFS); 2781789Sahrens 2782789Sahrens mutex_enter(&spa_namespace_lock); 2783789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2784789Sahrens mutex_exit(&spa_namespace_lock); 2785789Sahrens return (ENOENT); 2786789Sahrens } 2787789Sahrens 2788789Sahrens /* 27891544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 27901544Seschrock * reacquire the namespace lock, and see if we can export. 27911544Seschrock */ 27921544Seschrock spa_open_ref(spa, FTAG); 27931544Seschrock mutex_exit(&spa_namespace_lock); 27941544Seschrock spa_async_suspend(spa); 27951544Seschrock mutex_enter(&spa_namespace_lock); 27961544Seschrock spa_close(spa, FTAG); 27971544Seschrock 27981544Seschrock /* 2799789Sahrens * The pool will be in core if it's openable, 2800789Sahrens * in which case we can modify its state. 2801789Sahrens */ 2802789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2803789Sahrens /* 2804789Sahrens * Objsets may be open only because they're dirty, so we 2805789Sahrens * have to force it to sync before checking spa_refcnt. 2806789Sahrens */ 2807789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2808789Sahrens 28091544Seschrock /* 28101544Seschrock * A pool cannot be exported or destroyed if there are active 28111544Seschrock * references. If we are resetting a pool, allow references by 28121544Seschrock * fault injection handlers. 28131544Seschrock */ 28141544Seschrock if (!spa_refcount_zero(spa) || 28151544Seschrock (spa->spa_inject_ref != 0 && 28161544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 28171544Seschrock spa_async_resume(spa); 2818789Sahrens mutex_exit(&spa_namespace_lock); 2819789Sahrens return (EBUSY); 2820789Sahrens } 2821789Sahrens 2822789Sahrens /* 28237214Slling * A pool cannot be exported if it has an active shared spare. 28247214Slling * This is to prevent other pools stealing the active spare 28257214Slling * from an exported pool. At user's own will, such pool can 28267214Slling * be forcedly exported. 28277214Slling */ 28287214Slling if (!force && new_state == POOL_STATE_EXPORTED && 28297214Slling spa_has_active_shared_spare(spa)) { 28307214Slling spa_async_resume(spa); 28317214Slling mutex_exit(&spa_namespace_lock); 28327214Slling return (EXDEV); 28337214Slling } 28347214Slling 28357214Slling /* 2836789Sahrens * We want this to be reflected on every label, 2837789Sahrens * so mark them all dirty. spa_unload() will do the 2838789Sahrens * final sync that pushes these changes out. 2839789Sahrens */ 28408211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 28417754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28421544Seschrock spa->spa_state = new_state; 28431635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 28441544Seschrock vdev_config_dirty(spa->spa_root_vdev); 28457754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 28461544Seschrock } 2847789Sahrens } 2848789Sahrens 28494451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28504451Seschrock 2851789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2852789Sahrens spa_unload(spa); 2853789Sahrens spa_deactivate(spa); 2854789Sahrens } 2855789Sahrens 28561775Sbillm if (oldconfig && spa->spa_config) 28571775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 28581775Sbillm 28591544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 28608211SGeorge.Wilson@Sun.COM if (!hardforce) 28618211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 28621544Seschrock spa_remove(spa); 28631544Seschrock } 2864789Sahrens mutex_exit(&spa_namespace_lock); 2865789Sahrens 2866789Sahrens return (0); 2867789Sahrens } 2868789Sahrens 2869789Sahrens /* 2870789Sahrens * Destroy a storage pool. 2871789Sahrens */ 2872789Sahrens int 2873789Sahrens spa_destroy(char *pool) 2874789Sahrens { 28758211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 28768211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 2877789Sahrens } 2878789Sahrens 2879789Sahrens /* 2880789Sahrens * Export a storage pool. 2881789Sahrens */ 2882789Sahrens int 28838211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 28848211SGeorge.Wilson@Sun.COM boolean_t hardforce) 2885789Sahrens { 28868211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 28878211SGeorge.Wilson@Sun.COM force, hardforce)); 2888789Sahrens } 2889789Sahrens 2890789Sahrens /* 28911544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 28921544Seschrock * from the namespace in any way. 28931544Seschrock */ 28941544Seschrock int 28951544Seschrock spa_reset(char *pool) 28961544Seschrock { 28977214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 28988211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 28991544Seschrock } 29001544Seschrock 29011544Seschrock /* 2902789Sahrens * ========================================================================== 2903789Sahrens * Device manipulation 2904789Sahrens * ========================================================================== 2905789Sahrens */ 2906789Sahrens 2907789Sahrens /* 29084527Sperrin * Add a device to a storage pool. 2909789Sahrens */ 2910789Sahrens int 2911789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2912789Sahrens { 2913789Sahrens uint64_t txg; 29148241SJeff.Bonwick@Sun.COM int error; 2915789Sahrens vdev_t *rvd = spa->spa_root_vdev; 29161585Sbonwick vdev_t *vd, *tvd; 29175450Sbrendan nvlist_t **spares, **l2cache; 29185450Sbrendan uint_t nspares, nl2cache; 2919789Sahrens 2920789Sahrens txg = spa_vdev_enter(spa); 2921789Sahrens 29222082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 29232082Seschrock VDEV_ALLOC_ADD)) != 0) 29242082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 29252082Seschrock 29267754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 2927789Sahrens 29285450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 29295450Sbrendan &nspares) != 0) 29302082Seschrock nspares = 0; 29312082Seschrock 29325450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 29335450Sbrendan &nl2cache) != 0) 29345450Sbrendan nl2cache = 0; 29355450Sbrendan 29367754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 29372082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 29387754SJeff.Bonwick@Sun.COM 29397754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 29407754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 29417754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 29422082Seschrock 29433377Seschrock /* 29445450Sbrendan * We must validate the spares and l2cache devices after checking the 29455450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 29463377Seschrock */ 29477754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 29483377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 29493377Seschrock 29503377Seschrock /* 29513377Seschrock * Transfer each new top-level vdev from vd to rvd. 29523377Seschrock */ 29538241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 29543377Seschrock tvd = vd->vdev_child[c]; 29553377Seschrock vdev_remove_child(vd, tvd); 29563377Seschrock tvd->vdev_id = rvd->vdev_children; 29573377Seschrock vdev_add_child(rvd, tvd); 29583377Seschrock vdev_config_dirty(tvd); 29593377Seschrock } 29603377Seschrock 29612082Seschrock if (nspares != 0) { 29625450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 29635450Sbrendan ZPOOL_CONFIG_SPARES); 29642082Seschrock spa_load_spares(spa); 29655450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 29665450Sbrendan } 29675450Sbrendan 29685450Sbrendan if (nl2cache != 0) { 29695450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 29705450Sbrendan ZPOOL_CONFIG_L2CACHE); 29715450Sbrendan spa_load_l2cache(spa); 29725450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2973789Sahrens } 2974789Sahrens 2975789Sahrens /* 29761585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 29771585Sbonwick * If other threads start allocating from these vdevs before we 29781585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 29791585Sbonwick * fail to open the pool because there are DVAs that the config cache 29801585Sbonwick * can't translate. Therefore, we first add the vdevs without 29811585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 29821635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 29831585Sbonwick * 29841585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 29851585Sbonwick * if we lose power at any point in this sequence, the remaining 29861585Sbonwick * steps will be completed the next time we load the pool. 2987789Sahrens */ 29881635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 29891585Sbonwick 29901635Sbonwick mutex_enter(&spa_namespace_lock); 29911635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 29921635Sbonwick mutex_exit(&spa_namespace_lock); 2993789Sahrens 29941635Sbonwick return (0); 2995789Sahrens } 2996789Sahrens 2997789Sahrens /* 2998789Sahrens * Attach a device to a mirror. The arguments are the path to any device 2999789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3000789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3001789Sahrens * 3002789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3003789Sahrens * existing device; in this case the two devices are made into their own 30044451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3005789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3006789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3007789Sahrens * completion of resilvering, the first disk (the one being replaced) 3008789Sahrens * is automatically detached. 3009789Sahrens */ 3010789Sahrens int 30111544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3012789Sahrens { 3013789Sahrens uint64_t txg, open_txg; 3014789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3015789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 30162082Seschrock vdev_ops_t *pvops; 30177313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 30187313SEric.Kustarz@Sun.COM int newvd_isspare; 30197313SEric.Kustarz@Sun.COM int error; 3020789Sahrens 3021789Sahrens txg = spa_vdev_enter(spa); 3022789Sahrens 30236643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3024789Sahrens 3025789Sahrens if (oldvd == NULL) 3026789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3027789Sahrens 30281585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 30291585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30301585Sbonwick 3031789Sahrens pvd = oldvd->vdev_parent; 3032789Sahrens 30332082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30344451Seschrock VDEV_ALLOC_ADD)) != 0) 30354451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30364451Seschrock 30374451Seschrock if (newrootvd->vdev_children != 1) 3038789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3039789Sahrens 3040789Sahrens newvd = newrootvd->vdev_child[0]; 3041789Sahrens 3042789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3043789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3044789Sahrens 30452082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3046789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3047789Sahrens 30484527Sperrin /* 30494527Sperrin * Spares can't replace logs 30504527Sperrin */ 30517326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 30524527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30534527Sperrin 30542082Seschrock if (!replacing) { 30552082Seschrock /* 30562082Seschrock * For attach, the only allowable parent is a mirror or the root 30572082Seschrock * vdev. 30582082Seschrock */ 30592082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 30602082Seschrock pvd->vdev_ops != &vdev_root_ops) 30612082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30622082Seschrock 30632082Seschrock pvops = &vdev_mirror_ops; 30642082Seschrock } else { 30652082Seschrock /* 30662082Seschrock * Active hot spares can only be replaced by inactive hot 30672082Seschrock * spares. 30682082Seschrock */ 30692082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 30702082Seschrock pvd->vdev_child[1] == oldvd && 30712082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 30722082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30732082Seschrock 30742082Seschrock /* 30752082Seschrock * If the source is a hot spare, and the parent isn't already a 30762082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 30773377Seschrock * want to create a replacing vdev. The user is not allowed to 30783377Seschrock * attach to a spared vdev child unless the 'isspare' state is 30793377Seschrock * the same (spare replaces spare, non-spare replaces 30803377Seschrock * non-spare). 30812082Seschrock */ 30822082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 30832082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30843377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 30853377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 30863377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30872082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 30882082Seschrock newvd->vdev_isspare) 30892082Seschrock pvops = &vdev_spare_ops; 30902082Seschrock else 30912082Seschrock pvops = &vdev_replacing_ops; 30922082Seschrock } 30932082Seschrock 30941175Slling /* 30959816SGeorge.Wilson@Sun.COM * Make sure the new device is big enough. 30961175Slling */ 30979816SGeorge.Wilson@Sun.COM if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3098789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3099789Sahrens 31001732Sbonwick /* 31011732Sbonwick * The new device cannot have a higher alignment requirement 31021732Sbonwick * than the top-level vdev. 31031732Sbonwick */ 31041732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3105789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3106789Sahrens 3107789Sahrens /* 3108789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3109789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3110789Sahrens */ 3111789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3112789Sahrens spa_strfree(oldvd->vdev_path); 3113789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3114789Sahrens KM_SLEEP); 3115789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3116789Sahrens newvd->vdev_path, "old"); 3117789Sahrens if (oldvd->vdev_devid != NULL) { 3118789Sahrens spa_strfree(oldvd->vdev_devid); 3119789Sahrens oldvd->vdev_devid = NULL; 3120789Sahrens } 3121789Sahrens } 3122789Sahrens 3123789Sahrens /* 31242082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 31252082Seschrock * mirror/replacing/spare vdev above oldvd. 3126789Sahrens */ 3127789Sahrens if (pvd->vdev_ops != pvops) 3128789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3129789Sahrens 3130789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3131789Sahrens ASSERT(pvd->vdev_ops == pvops); 3132789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3133789Sahrens 3134789Sahrens /* 3135789Sahrens * Extract the new device from its root and add it to pvd. 3136789Sahrens */ 3137789Sahrens vdev_remove_child(newrootvd, newvd); 3138789Sahrens newvd->vdev_id = pvd->vdev_children; 3139789Sahrens vdev_add_child(pvd, newvd); 3140789Sahrens 3141789Sahrens tvd = newvd->vdev_top; 3142789Sahrens ASSERT(pvd->vdev_top == tvd); 3143789Sahrens ASSERT(tvd->vdev_parent == rvd); 3144789Sahrens 3145789Sahrens vdev_config_dirty(tvd); 3146789Sahrens 3147789Sahrens /* 3148789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3149789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3150789Sahrens */ 3151789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3152789Sahrens 31538241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 31548241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3155789Sahrens 31569425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 31573377Seschrock spa_spare_activate(newvd); 31589425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 31599425SEric.Schrock@Sun.COM } 31609425SEric.Schrock@Sun.COM 31617754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 31627754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 31637313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 31641544Seschrock 3165789Sahrens /* 3166789Sahrens * Mark newvd's DTL dirty in this txg. 3167789Sahrens */ 31681732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3169789Sahrens 3170789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3171789Sahrens 31729946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 31739946SMark.Musante@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 31749946SMark.Musante@Sun.COM replacing && newvd_isspare ? "spare in" : 31759946SMark.Musante@Sun.COM replacing ? "replace" : "attach", newvdpath, 31769946SMark.Musante@Sun.COM replacing ? "for" : "to", oldvdpath); 31777313SEric.Kustarz@Sun.COM 31787313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 31797313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 31807313SEric.Kustarz@Sun.COM 3181789Sahrens /* 31827046Sahrens * Kick off a resilver to update newvd. 3183789Sahrens */ 31847046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3185789Sahrens 3186789Sahrens return (0); 3187789Sahrens } 3188789Sahrens 3189789Sahrens /* 3190789Sahrens * Detach a device from a mirror or replacing vdev. 3191789Sahrens * If 'replace_done' is specified, only detach if the parent 3192789Sahrens * is a replacing vdev. 3193789Sahrens */ 3194789Sahrens int 31958241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3196789Sahrens { 3197789Sahrens uint64_t txg; 31988241SJeff.Bonwick@Sun.COM int error; 3199789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3200789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 32012082Seschrock boolean_t unspare = B_FALSE; 32022082Seschrock uint64_t unspare_guid; 32036673Seschrock size_t len; 3204789Sahrens 3205789Sahrens txg = spa_vdev_enter(spa); 3206789Sahrens 32076643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3208789Sahrens 3209789Sahrens if (vd == NULL) 3210789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3211789Sahrens 32121585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 32131585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32141585Sbonwick 3215789Sahrens pvd = vd->vdev_parent; 3216789Sahrens 3217789Sahrens /* 32188241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 32198241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32208241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 32218241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 32228241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 32238241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 32248241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 32258241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 32268241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 32278241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 32288241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 32298241SJeff.Bonwick@Sun.COM */ 32308241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 32318241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32328241SJeff.Bonwick@Sun.COM 32338241SJeff.Bonwick@Sun.COM /* 3234789Sahrens * If replace_done is specified, only remove this device if it's 32352082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 32362082Seschrock * disk can be removed. 3237789Sahrens */ 32382082Seschrock if (replace_done) { 32392082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 32402082Seschrock if (vd->vdev_id != 0) 32412082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32422082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 32432082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32442082Seschrock } 32452082Seschrock } 32462082Seschrock 32472082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 32484577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3249789Sahrens 3250789Sahrens /* 32512082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3252789Sahrens */ 3253789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 32542082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 32552082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3256789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3257789Sahrens 3258789Sahrens /* 32598241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 32608241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3261789Sahrens */ 32628241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3263789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3264789Sahrens 32658241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 32668241SJeff.Bonwick@Sun.COM 3267789Sahrens /* 32686673Seschrock * If we are detaching the second disk from a replacing vdev, then 32696673Seschrock * check to see if we changed the original vdev's path to have "/old" 32706673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 32716673Seschrock */ 32726673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 32736673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 32746673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 32756673Seschrock ASSERT(pvd->vdev_child[1] == vd); 32766673Seschrock cvd = pvd->vdev_child[0]; 32776673Seschrock len = strlen(vd->vdev_path); 32786673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 32796673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 32806673Seschrock spa_strfree(cvd->vdev_path); 32816673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 32826673Seschrock } 32836673Seschrock } 32846673Seschrock 32856673Seschrock /* 32862082Seschrock * If we are detaching the original disk from a spare, then it implies 32872082Seschrock * that the spare should become a real disk, and be removed from the 32882082Seschrock * active spare list for the pool. 32892082Seschrock */ 32902082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 32918241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 32922082Seschrock unspare = B_TRUE; 32932082Seschrock 32942082Seschrock /* 3295789Sahrens * Erase the disk labels so the disk can be used for other things. 3296789Sahrens * This must be done after all other error cases are handled, 3297789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3298789Sahrens * But if we can't do it, don't treat the error as fatal -- 3299789Sahrens * it may be that the unwritability of the disk is the reason 3300789Sahrens * it's being detached! 3301789Sahrens */ 33023377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3303789Sahrens 3304789Sahrens /* 3305789Sahrens * Remove vd from its parent and compact the parent's children. 3306789Sahrens */ 3307789Sahrens vdev_remove_child(pvd, vd); 3308789Sahrens vdev_compact_children(pvd); 3309789Sahrens 3310789Sahrens /* 3311789Sahrens * Remember one of the remaining children so we can get tvd below. 3312789Sahrens */ 3313789Sahrens cvd = pvd->vdev_child[0]; 3314789Sahrens 3315789Sahrens /* 33162082Seschrock * If we need to remove the remaining child from the list of hot spares, 33178241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 33188241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 33198241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 33208241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 33218241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 33228241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 33232082Seschrock */ 33242082Seschrock if (unspare) { 33252082Seschrock ASSERT(cvd->vdev_isspare); 33263377Seschrock spa_spare_remove(cvd); 33272082Seschrock unspare_guid = cvd->vdev_guid; 33288241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33292082Seschrock } 33302082Seschrock 33312082Seschrock /* 3332789Sahrens * If the parent mirror/replacing vdev only has one child, 3333789Sahrens * the parent is no longer needed. Remove it from the tree. 3334789Sahrens */ 3335789Sahrens if (pvd->vdev_children == 1) 3336789Sahrens vdev_remove_parent(cvd); 3337789Sahrens 3338789Sahrens /* 3339789Sahrens * We don't set tvd until now because the parent we just removed 3340789Sahrens * may have been the previous top-level vdev. 3341789Sahrens */ 3342789Sahrens tvd = cvd->vdev_top; 3343789Sahrens ASSERT(tvd->vdev_parent == rvd); 3344789Sahrens 3345789Sahrens /* 33463377Seschrock * Reevaluate the parent vdev state. 3347789Sahrens */ 33484451Seschrock vdev_propagate_state(cvd); 3349789Sahrens 3350789Sahrens /* 33519816SGeorge.Wilson@Sun.COM * If the 'autoexpand' property is set on the pool then automatically 33529816SGeorge.Wilson@Sun.COM * try to expand the size of the pool. For example if the device we 33539816SGeorge.Wilson@Sun.COM * just detached was smaller than the others, it may be possible to 33549816SGeorge.Wilson@Sun.COM * add metaslabs (i.e. grow the pool). We need to reopen the vdev 33559816SGeorge.Wilson@Sun.COM * first so that we can obtain the updated sizes of the leaf vdevs. 3356789Sahrens */ 33579816SGeorge.Wilson@Sun.COM if (spa->spa_autoexpand) { 33589816SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 33599816SGeorge.Wilson@Sun.COM vdev_expand(tvd, txg); 33609816SGeorge.Wilson@Sun.COM } 3361789Sahrens 3362789Sahrens vdev_config_dirty(tvd); 3363789Sahrens 3364789Sahrens /* 33653377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 33663377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 33673377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 33683377Seschrock * prevent vd from being accessed after it's freed. 3369789Sahrens */ 33708241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3371789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 33721732Sbonwick vd->vdev_detached = B_TRUE; 33731732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3374789Sahrens 33754451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 33764451Seschrock 33772082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 33782082Seschrock 33792082Seschrock /* 33803377Seschrock * If this was the removal of the original device in a hot spare vdev, 33813377Seschrock * then we want to go through and remove the device from the hot spare 33823377Seschrock * list of every other pool. 33832082Seschrock */ 33842082Seschrock if (unspare) { 33858241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 33862082Seschrock spa = NULL; 33872082Seschrock mutex_enter(&spa_namespace_lock); 33882082Seschrock while ((spa = spa_next(spa)) != NULL) { 33892082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 33902082Seschrock continue; 33918241SJeff.Bonwick@Sun.COM if (spa == myspa) 33928241SJeff.Bonwick@Sun.COM continue; 33937793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 33947793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 33952082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 33967793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 33977793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 33982082Seschrock } 33992082Seschrock mutex_exit(&spa_namespace_lock); 34002082Seschrock } 34012082Seschrock 34022082Seschrock return (error); 34032082Seschrock } 34042082Seschrock 34057754SJeff.Bonwick@Sun.COM static nvlist_t * 34067754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 34072082Seschrock { 34087754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 34097754SJeff.Bonwick@Sun.COM uint64_t guid; 34107754SJeff.Bonwick@Sun.COM 34117754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 34127754SJeff.Bonwick@Sun.COM &guid) == 0); 34137754SJeff.Bonwick@Sun.COM 34147754SJeff.Bonwick@Sun.COM if (guid == target_guid) 34157754SJeff.Bonwick@Sun.COM return (nvpp[i]); 34162082Seschrock } 34172082Seschrock 34187754SJeff.Bonwick@Sun.COM return (NULL); 34195450Sbrendan } 34205450Sbrendan 34217754SJeff.Bonwick@Sun.COM static void 34227754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 34237754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 34245450Sbrendan { 34257754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 34267754SJeff.Bonwick@Sun.COM 34277754SJeff.Bonwick@Sun.COM if (count > 1) 34287754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 34297754SJeff.Bonwick@Sun.COM 34307754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 34317754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 34327754SJeff.Bonwick@Sun.COM continue; 34337754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 34345450Sbrendan } 34355450Sbrendan 34367754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 34377754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 34387754SJeff.Bonwick@Sun.COM 34397754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 34407754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 34417754SJeff.Bonwick@Sun.COM 34427754SJeff.Bonwick@Sun.COM if (count > 1) 34437754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 34445450Sbrendan } 34455450Sbrendan 34465450Sbrendan /* 34475450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 34485450Sbrendan * spares and level 2 ARC devices. 34495450Sbrendan */ 34505450Sbrendan int 34515450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 34525450Sbrendan { 34535450Sbrendan vdev_t *vd; 34547754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 34555450Sbrendan uint_t nspares, nl2cache; 34568241SJeff.Bonwick@Sun.COM uint64_t txg = 0; 34575450Sbrendan int error = 0; 34588241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 34598241SJeff.Bonwick@Sun.COM 34608241SJeff.Bonwick@Sun.COM if (!locked) 34618241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 34625450Sbrendan 34636643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 34645450Sbrendan 34655450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 34665450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 34677754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 34687754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 34697754SJeff.Bonwick@Sun.COM /* 34707754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 34717754SJeff.Bonwick@Sun.COM * in this pool. 34727754SJeff.Bonwick@Sun.COM */ 34737754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 34747754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 34757754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 34767754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 34777754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 34787754SJeff.Bonwick@Sun.COM } else { 34797754SJeff.Bonwick@Sun.COM error = EBUSY; 34807754SJeff.Bonwick@Sun.COM } 34817754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 34825450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 34837754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 34847754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 34857754SJeff.Bonwick@Sun.COM /* 34867754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 34877754SJeff.Bonwick@Sun.COM */ 34887754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 34897754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 34905450Sbrendan spa_load_l2cache(spa); 34915450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 34927754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 34937754SJeff.Bonwick@Sun.COM /* 34947754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 34957754SJeff.Bonwick@Sun.COM */ 34967754SJeff.Bonwick@Sun.COM error = ENOTSUP; 34977754SJeff.Bonwick@Sun.COM } else { 34987754SJeff.Bonwick@Sun.COM /* 34997754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 35007754SJeff.Bonwick@Sun.COM */ 35017754SJeff.Bonwick@Sun.COM error = ENOENT; 35025450Sbrendan } 35032082Seschrock 35048241SJeff.Bonwick@Sun.COM if (!locked) 35058241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 35068241SJeff.Bonwick@Sun.COM 35078241SJeff.Bonwick@Sun.COM return (error); 3508789Sahrens } 3509789Sahrens 3510789Sahrens /* 35114451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 35124451Seschrock * current spared, so we can detach it. 3513789Sahrens */ 35141544Seschrock static vdev_t * 35154451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3516789Sahrens { 35171544Seschrock vdev_t *newvd, *oldvd; 35189816SGeorge.Wilson@Sun.COM 35199816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 35204451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 35211544Seschrock if (oldvd != NULL) 35221544Seschrock return (oldvd); 35231544Seschrock } 3524789Sahrens 35254451Seschrock /* 35264451Seschrock * Check for a completed replacement. 35274451Seschrock */ 3528789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 35291544Seschrock oldvd = vd->vdev_child[0]; 35301544Seschrock newvd = vd->vdev_child[1]; 3531789Sahrens 35328241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 35338241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 35341544Seschrock return (oldvd); 35351544Seschrock } 3536789Sahrens 35374451Seschrock /* 35384451Seschrock * Check for a completed resilver with the 'unspare' flag set. 35394451Seschrock */ 35404451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 35414451Seschrock newvd = vd->vdev_child[0]; 35424451Seschrock oldvd = vd->vdev_child[1]; 35434451Seschrock 35444451Seschrock if (newvd->vdev_unspare && 35458241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 35468241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 35474451Seschrock newvd->vdev_unspare = 0; 35484451Seschrock return (oldvd); 35494451Seschrock } 35504451Seschrock } 35514451Seschrock 35521544Seschrock return (NULL); 3553789Sahrens } 3554789Sahrens 35551544Seschrock static void 35564451Seschrock spa_vdev_resilver_done(spa_t *spa) 3557789Sahrens { 35588241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 35598241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 35608241SJeff.Bonwick@Sun.COM 35618241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3562789Sahrens 35634451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 35648241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 35658241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 35661544Seschrock guid = vd->vdev_guid; 35678241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 35688241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 35698241SJeff.Bonwick@Sun.COM sguid = 0; 35702082Seschrock /* 35712082Seschrock * If we have just finished replacing a hot spared device, then 35722082Seschrock * we need to detach the parent's first child (the original hot 35732082Seschrock * spare) as well. 35742082Seschrock */ 35758241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 35762082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 35778241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 35788241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 35792082Seschrock } 35808241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 35818241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 35821544Seschrock return; 35838241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 35842082Seschrock return; 35858241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3586789Sahrens } 3587789Sahrens 35888241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 3589789Sahrens } 3590789Sahrens 3591789Sahrens /* 35929425SEric.Schrock@Sun.COM * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 35939425SEric.Schrock@Sun.COM * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 35941354Seschrock */ 35951354Seschrock int 35969425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 35979425SEric.Schrock@Sun.COM boolean_t ispath) 35981354Seschrock { 35996643Seschrock vdev_t *vd; 36001354Seschrock uint64_t txg; 36011354Seschrock 36021354Seschrock txg = spa_vdev_enter(spa); 36031354Seschrock 36049425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 36055450Sbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 36061354Seschrock 36071585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 36081585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 36091585Sbonwick 36109425SEric.Schrock@Sun.COM if (ispath) { 36119425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 36129425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 36139425SEric.Schrock@Sun.COM } else { 36149425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 36159425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 36169425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 36179425SEric.Schrock@Sun.COM } 36181354Seschrock 36191354Seschrock vdev_config_dirty(vd->vdev_top); 36201354Seschrock 36211354Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 36221354Seschrock } 36231354Seschrock 36249425SEric.Schrock@Sun.COM int 36259425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 36269425SEric.Schrock@Sun.COM { 36279425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 36289425SEric.Schrock@Sun.COM } 36299425SEric.Schrock@Sun.COM 36309425SEric.Schrock@Sun.COM int 36319425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 36329425SEric.Schrock@Sun.COM { 36339425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 36349425SEric.Schrock@Sun.COM } 36359425SEric.Schrock@Sun.COM 36361354Seschrock /* 3637789Sahrens * ========================================================================== 3638789Sahrens * SPA Scrubbing 3639789Sahrens * ========================================================================== 3640789Sahrens */ 3641789Sahrens 36427046Sahrens int 36437046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3644789Sahrens { 36457754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 36464808Sek110237 3647789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3648789Sahrens return (ENOTSUP); 3649789Sahrens 3650789Sahrens /* 36517046Sahrens * If a resilver was requested, but there is no DTL on a 36527046Sahrens * writeable leaf device, we have nothing to do. 3653789Sahrens */ 36547046Sahrens if (type == POOL_SCRUB_RESILVER && 36557046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 36567046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 36571544Seschrock return (0); 36581544Seschrock } 3659789Sahrens 36607046Sahrens if (type == POOL_SCRUB_EVERYTHING && 36617046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 36627046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 36637046Sahrens return (EBUSY); 36647046Sahrens 36657046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 36667046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 36677046Sahrens } else if (type == POOL_SCRUB_NONE) { 36687046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 36691544Seschrock } else { 36707046Sahrens return (EINVAL); 36711544Seschrock } 3672789Sahrens } 3673789Sahrens 36741544Seschrock /* 36751544Seschrock * ========================================================================== 36761544Seschrock * SPA async task processing 36771544Seschrock * ========================================================================== 36781544Seschrock */ 36791544Seschrock 36801544Seschrock static void 36814451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3682789Sahrens { 36837361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 36847361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 36857361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 3686*10575SEric.Schrock@Sun.COM 3687*10575SEric.Schrock@Sun.COM /* 3688*10575SEric.Schrock@Sun.COM * We want to clear the stats, but we don't want to do a full 3689*10575SEric.Schrock@Sun.COM * vdev_clear() as that will cause us to throw away 3690*10575SEric.Schrock@Sun.COM * degraded/faulted state as well as attempt to reopen the 3691*10575SEric.Schrock@Sun.COM * device, all of which is a waste. 3692*10575SEric.Schrock@Sun.COM */ 3693*10575SEric.Schrock@Sun.COM vd->vdev_stat.vs_read_errors = 0; 3694*10575SEric.Schrock@Sun.COM vd->vdev_stat.vs_write_errors = 0; 3695*10575SEric.Schrock@Sun.COM vd->vdev_stat.vs_checksum_errors = 0; 3696*10575SEric.Schrock@Sun.COM 36977754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 36981544Seschrock } 36997361SBrendan.Gregg@Sun.COM 37007754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 37017361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 37021544Seschrock } 37031544Seschrock 37041544Seschrock static void 37057754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 37067754SJeff.Bonwick@Sun.COM { 37077754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 37087754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 37097754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 37107754SJeff.Bonwick@Sun.COM } 37117754SJeff.Bonwick@Sun.COM 37127754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 37137754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 37147754SJeff.Bonwick@Sun.COM } 37157754SJeff.Bonwick@Sun.COM 37167754SJeff.Bonwick@Sun.COM static void 37179816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa_t *spa, vdev_t *vd) 37189816SGeorge.Wilson@Sun.COM { 37199816SGeorge.Wilson@Sun.COM sysevent_id_t eid; 37209816SGeorge.Wilson@Sun.COM nvlist_t *attr; 37219816SGeorge.Wilson@Sun.COM char *physpath; 37229816SGeorge.Wilson@Sun.COM 37239816SGeorge.Wilson@Sun.COM if (!spa->spa_autoexpand) 37249816SGeorge.Wilson@Sun.COM return; 37259816SGeorge.Wilson@Sun.COM 37269816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 37279816SGeorge.Wilson@Sun.COM vdev_t *cvd = vd->vdev_child[c]; 37289816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, cvd); 37299816SGeorge.Wilson@Sun.COM } 37309816SGeorge.Wilson@Sun.COM 37319816SGeorge.Wilson@Sun.COM if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 37329816SGeorge.Wilson@Sun.COM return; 37339816SGeorge.Wilson@Sun.COM 37349816SGeorge.Wilson@Sun.COM physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 37359816SGeorge.Wilson@Sun.COM (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 37369816SGeorge.Wilson@Sun.COM 37379816SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 37389816SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 37399816SGeorge.Wilson@Sun.COM 37409816SGeorge.Wilson@Sun.COM (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 37419816SGeorge.Wilson@Sun.COM ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 37429816SGeorge.Wilson@Sun.COM 37439816SGeorge.Wilson@Sun.COM nvlist_free(attr); 37449816SGeorge.Wilson@Sun.COM kmem_free(physpath, MAXPATHLEN); 37459816SGeorge.Wilson@Sun.COM } 37469816SGeorge.Wilson@Sun.COM 37479816SGeorge.Wilson@Sun.COM static void 37481544Seschrock spa_async_thread(spa_t *spa) 37491544Seschrock { 37507754SJeff.Bonwick@Sun.COM int tasks; 37511544Seschrock 37521544Seschrock ASSERT(spa->spa_sync_on); 3753789Sahrens 37541544Seschrock mutex_enter(&spa->spa_async_lock); 37551544Seschrock tasks = spa->spa_async_tasks; 37561544Seschrock spa->spa_async_tasks = 0; 37571544Seschrock mutex_exit(&spa->spa_async_lock); 37581544Seschrock 37591544Seschrock /* 37601635Sbonwick * See if the config needs to be updated. 37611635Sbonwick */ 37621635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 37639816SGeorge.Wilson@Sun.COM uint64_t oldsz, space_update; 37649816SGeorge.Wilson@Sun.COM 37651635Sbonwick mutex_enter(&spa_namespace_lock); 37669816SGeorge.Wilson@Sun.COM oldsz = spa_get_space(spa); 37671635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 37689816SGeorge.Wilson@Sun.COM space_update = spa_get_space(spa) - oldsz; 37691635Sbonwick mutex_exit(&spa_namespace_lock); 37709816SGeorge.Wilson@Sun.COM 37719816SGeorge.Wilson@Sun.COM /* 37729816SGeorge.Wilson@Sun.COM * If the pool grew as a result of the config update, 37739816SGeorge.Wilson@Sun.COM * then log an internal history event. 37749816SGeorge.Wilson@Sun.COM */ 37759816SGeorge.Wilson@Sun.COM if (space_update) { 37769946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 37779946SMark.Musante@Sun.COM spa, NULL, CRED(), 37789946SMark.Musante@Sun.COM "pool '%s' size: %llu(+%llu)", 37799946SMark.Musante@Sun.COM spa_name(spa), spa_get_space(spa), 37809946SMark.Musante@Sun.COM space_update); 37819816SGeorge.Wilson@Sun.COM } 37821635Sbonwick } 37831635Sbonwick 37841635Sbonwick /* 37854451Seschrock * See if any devices need to be marked REMOVED. 37861544Seschrock */ 37877754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 37887754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 37894451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 37907754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 37917361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 37927754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 37937361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 37947754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 37957754SJeff.Bonwick@Sun.COM } 37967754SJeff.Bonwick@Sun.COM 37979816SGeorge.Wilson@Sun.COM if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 37989816SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 37999816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, spa->spa_root_vdev); 38009816SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 38019816SGeorge.Wilson@Sun.COM } 38029816SGeorge.Wilson@Sun.COM 38037754SJeff.Bonwick@Sun.COM /* 38047754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 38057754SJeff.Bonwick@Sun.COM */ 38067754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 38077754SJeff.Bonwick@Sun.COM spa_vdev_state_enter(spa); 38087754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 38097754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 38104451Seschrock } 38111544Seschrock 38121544Seschrock /* 38131544Seschrock * If any devices are done replacing, detach them. 38141544Seschrock */ 38154451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 38164451Seschrock spa_vdev_resilver_done(spa); 3817789Sahrens 38181544Seschrock /* 38191544Seschrock * Kick off a resilver. 38201544Seschrock */ 38217046Sahrens if (tasks & SPA_ASYNC_RESILVER) 38227046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 38231544Seschrock 38241544Seschrock /* 38251544Seschrock * Let the world know that we're done. 38261544Seschrock */ 38271544Seschrock mutex_enter(&spa->spa_async_lock); 38281544Seschrock spa->spa_async_thread = NULL; 38291544Seschrock cv_broadcast(&spa->spa_async_cv); 38301544Seschrock mutex_exit(&spa->spa_async_lock); 38311544Seschrock thread_exit(); 38321544Seschrock } 38331544Seschrock 38341544Seschrock void 38351544Seschrock spa_async_suspend(spa_t *spa) 38361544Seschrock { 38371544Seschrock mutex_enter(&spa->spa_async_lock); 38381544Seschrock spa->spa_async_suspended++; 38391544Seschrock while (spa->spa_async_thread != NULL) 38401544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 38411544Seschrock mutex_exit(&spa->spa_async_lock); 38421544Seschrock } 38431544Seschrock 38441544Seschrock void 38451544Seschrock spa_async_resume(spa_t *spa) 38461544Seschrock { 38471544Seschrock mutex_enter(&spa->spa_async_lock); 38481544Seschrock ASSERT(spa->spa_async_suspended != 0); 38491544Seschrock spa->spa_async_suspended--; 38501544Seschrock mutex_exit(&spa->spa_async_lock); 38511544Seschrock } 38521544Seschrock 38531544Seschrock static void 38541544Seschrock spa_async_dispatch(spa_t *spa) 38551544Seschrock { 38561544Seschrock mutex_enter(&spa->spa_async_lock); 38571544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 38581635Sbonwick spa->spa_async_thread == NULL && 38591635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 38601544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 38611544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 38621544Seschrock mutex_exit(&spa->spa_async_lock); 38631544Seschrock } 38641544Seschrock 38651544Seschrock void 38661544Seschrock spa_async_request(spa_t *spa, int task) 38671544Seschrock { 38681544Seschrock mutex_enter(&spa->spa_async_lock); 38691544Seschrock spa->spa_async_tasks |= task; 38701544Seschrock mutex_exit(&spa->spa_async_lock); 3871789Sahrens } 3872789Sahrens 3873789Sahrens /* 3874789Sahrens * ========================================================================== 3875789Sahrens * SPA syncing routines 3876789Sahrens * ========================================================================== 3877789Sahrens */ 3878789Sahrens 3879789Sahrens static void 3880789Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3881789Sahrens { 3882789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 3883789Sahrens dmu_tx_t *tx; 3884789Sahrens blkptr_t blk; 3885789Sahrens uint64_t itor = 0; 3886789Sahrens zio_t *zio; 3887789Sahrens int error; 3888789Sahrens uint8_t c = 1; 3889789Sahrens 38907754SJeff.Bonwick@Sun.COM zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 38917754SJeff.Bonwick@Sun.COM 38927754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 38937754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 38947754SJeff.Bonwick@Sun.COM zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 38957754SJeff.Bonwick@Sun.COM ZIO_FLAG_MUSTSUCCEED)); 38967754SJeff.Bonwick@Sun.COM } 3897789Sahrens 3898789Sahrens error = zio_wait(zio); 3899789Sahrens ASSERT3U(error, ==, 0); 3900789Sahrens 3901789Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3902789Sahrens bplist_vacate(bpl, tx); 3903789Sahrens 3904789Sahrens /* 3905789Sahrens * Pre-dirty the first block so we sync to convergence faster. 3906789Sahrens * (Usually only the first block is needed.) 3907789Sahrens */ 3908789Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3909789Sahrens dmu_tx_commit(tx); 3910789Sahrens } 3911789Sahrens 3912789Sahrens static void 39132082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 39142082Seschrock { 39152082Seschrock char *packed = NULL; 39167497STim.Haley@Sun.COM size_t bufsize; 39172082Seschrock size_t nvsize = 0; 39182082Seschrock dmu_buf_t *db; 39192082Seschrock 39202082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 39212082Seschrock 39227497STim.Haley@Sun.COM /* 39237497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 39247497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 39257497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 39267497STim.Haley@Sun.COM */ 39277497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 39287497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 39292082Seschrock 39302082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 39312082Seschrock KM_SLEEP) == 0); 39327497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 39337497STim.Haley@Sun.COM 39347497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 39357497STim.Haley@Sun.COM 39367497STim.Haley@Sun.COM kmem_free(packed, bufsize); 39372082Seschrock 39382082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 39392082Seschrock dmu_buf_will_dirty(db, tx); 39402082Seschrock *(uint64_t *)db->db_data = nvsize; 39412082Seschrock dmu_buf_rele(db, FTAG); 39422082Seschrock } 39432082Seschrock 39442082Seschrock static void 39455450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 39465450Sbrendan const char *config, const char *entry) 39472082Seschrock { 39482082Seschrock nvlist_t *nvroot; 39495450Sbrendan nvlist_t **list; 39502082Seschrock int i; 39512082Seschrock 39525450Sbrendan if (!sav->sav_sync) 39532082Seschrock return; 39542082Seschrock 39552082Seschrock /* 39565450Sbrendan * Update the MOS nvlist describing the list of available devices. 39575450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 39584451Seschrock * valid and the vdevs are labeled appropriately. 39592082Seschrock */ 39605450Sbrendan if (sav->sav_object == 0) { 39615450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 39625450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 39635450Sbrendan sizeof (uint64_t), tx); 39642082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 39655450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 39665450Sbrendan &sav->sav_object, tx) == 0); 39672082Seschrock } 39682082Seschrock 39692082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 39705450Sbrendan if (sav->sav_count == 0) { 39715450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 39722082Seschrock } else { 39735450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 39745450Sbrendan for (i = 0; i < sav->sav_count; i++) 39755450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 39765450Sbrendan B_FALSE, B_FALSE, B_TRUE); 39775450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 39785450Sbrendan sav->sav_count) == 0); 39795450Sbrendan for (i = 0; i < sav->sav_count; i++) 39805450Sbrendan nvlist_free(list[i]); 39815450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 39822082Seschrock } 39832082Seschrock 39845450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 39852926Sek110237 nvlist_free(nvroot); 39862082Seschrock 39875450Sbrendan sav->sav_sync = B_FALSE; 39882082Seschrock } 39892082Seschrock 39902082Seschrock static void 3991789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 3992789Sahrens { 3993789Sahrens nvlist_t *config; 3994789Sahrens 39957754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 3996789Sahrens return; 3997789Sahrens 39987754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 39997754SJeff.Bonwick@Sun.COM 40007754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 40017754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 40027754SJeff.Bonwick@Sun.COM 40037754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 4004789Sahrens 40051635Sbonwick if (spa->spa_config_syncing) 40061635Sbonwick nvlist_free(spa->spa_config_syncing); 40071635Sbonwick spa->spa_config_syncing = config; 4008789Sahrens 40092082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 4010789Sahrens } 4011789Sahrens 40125094Slling /* 40135094Slling * Set zpool properties. 40145094Slling */ 40153912Slling static void 40164543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 40173912Slling { 40183912Slling spa_t *spa = arg1; 40195094Slling objset_t *mos = spa->spa_meta_objset; 40203912Slling nvlist_t *nvp = arg2; 40215094Slling nvpair_t *elem; 40224451Seschrock uint64_t intval; 40236643Seschrock char *strval; 40245094Slling zpool_prop_t prop; 40255094Slling const char *propname; 40265094Slling zprop_type_t proptype; 40275094Slling 40287754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 40297754SJeff.Bonwick@Sun.COM 40305094Slling elem = NULL; 40315094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 40325094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 40335094Slling case ZPOOL_PROP_VERSION: 40345094Slling /* 40355094Slling * Only set version for non-zpool-creation cases 40365094Slling * (set/import). spa_create() needs special care 40375094Slling * for version setting. 40385094Slling */ 40395094Slling if (tx->tx_txg != TXG_INITIAL) { 40405094Slling VERIFY(nvpair_value_uint64(elem, 40415094Slling &intval) == 0); 40425094Slling ASSERT(intval <= SPA_VERSION); 40435094Slling ASSERT(intval >= spa_version(spa)); 40445094Slling spa->spa_uberblock.ub_version = intval; 40455094Slling vdev_config_dirty(spa->spa_root_vdev); 40465094Slling } 40475094Slling break; 40485094Slling 40495094Slling case ZPOOL_PROP_ALTROOT: 40505094Slling /* 40515094Slling * 'altroot' is a non-persistent property. It should 40525094Slling * have been set temporarily at creation or import time. 40535094Slling */ 40545094Slling ASSERT(spa->spa_root != NULL); 40555094Slling break; 40565094Slling 40575363Seschrock case ZPOOL_PROP_CACHEFILE: 40585094Slling /* 40598525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 40605094Slling */ 40614543Smarks break; 40625094Slling default: 40635094Slling /* 40645094Slling * Set pool property values in the poolprops mos object. 40655094Slling */ 40665094Slling if (spa->spa_pool_props_object == 0) { 40675094Slling objset_t *mos = spa->spa_meta_objset; 40685094Slling 40695094Slling VERIFY((spa->spa_pool_props_object = 40705094Slling zap_create(mos, DMU_OT_POOL_PROPS, 40715094Slling DMU_OT_NONE, 0, tx)) > 0); 40725094Slling 40735094Slling VERIFY(zap_update(mos, 40745094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 40755094Slling 8, 1, &spa->spa_pool_props_object, tx) 40765094Slling == 0); 40775094Slling } 40785094Slling 40795094Slling /* normalize the property name */ 40805094Slling propname = zpool_prop_to_name(prop); 40815094Slling proptype = zpool_prop_get_type(prop); 40825094Slling 40835094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 40845094Slling ASSERT(proptype == PROP_TYPE_STRING); 40855094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 40865094Slling VERIFY(zap_update(mos, 40875094Slling spa->spa_pool_props_object, propname, 40885094Slling 1, strlen(strval) + 1, strval, tx) == 0); 40895094Slling 40905094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 40915094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 40925094Slling 40935094Slling if (proptype == PROP_TYPE_INDEX) { 40945094Slling const char *unused; 40955094Slling VERIFY(zpool_prop_index_to_string( 40965094Slling prop, intval, &unused) == 0); 40975094Slling } 40985094Slling VERIFY(zap_update(mos, 40995094Slling spa->spa_pool_props_object, propname, 41005094Slling 8, 1, &intval, tx) == 0); 41015094Slling } else { 41025094Slling ASSERT(0); /* not allowed */ 41035094Slling } 41045094Slling 41055329Sgw25295 switch (prop) { 41065329Sgw25295 case ZPOOL_PROP_DELEGATION: 41075094Slling spa->spa_delegation = intval; 41085329Sgw25295 break; 41095329Sgw25295 case ZPOOL_PROP_BOOTFS: 41105094Slling spa->spa_bootfs = intval; 41115329Sgw25295 break; 41125329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 41135329Sgw25295 spa->spa_failmode = intval; 41145329Sgw25295 break; 41159816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 41169816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = intval; 41179816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 41189816SGeorge.Wilson@Sun.COM break; 41195329Sgw25295 default: 41205329Sgw25295 break; 41215329Sgw25295 } 41223912Slling } 41235094Slling 41245094Slling /* log internal history if this is not a zpool create */ 41255094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 41265094Slling tx->tx_txg != TXG_INITIAL) { 41275094Slling spa_history_internal_log(LOG_POOL_PROPSET, 41285094Slling spa, tx, cr, "%s %lld %s", 41297754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 41305094Slling } 41313912Slling } 41327754SJeff.Bonwick@Sun.COM 41337754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 41343912Slling } 41353912Slling 4136789Sahrens /* 4137789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4138789Sahrens * part of the process, so we iterate until it converges. 4139789Sahrens */ 4140789Sahrens void 4141789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4142789Sahrens { 4143789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4144789Sahrens objset_t *mos = spa->spa_meta_objset; 4145789Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 41461635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4147789Sahrens vdev_t *vd; 4148789Sahrens dmu_tx_t *tx; 4149789Sahrens int dirty_vdevs; 41507754SJeff.Bonwick@Sun.COM int error; 4151789Sahrens 4152789Sahrens /* 4153789Sahrens * Lock out configuration changes. 4154789Sahrens */ 41557754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4156789Sahrens 4157789Sahrens spa->spa_syncing_txg = txg; 4158789Sahrens spa->spa_sync_pass = 0; 4159789Sahrens 41607754SJeff.Bonwick@Sun.COM /* 41617754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 41627754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 41637754SJeff.Bonwick@Sun.COM */ 41647754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 41658241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 41668241SJeff.Bonwick@Sun.COM /* 41678241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 41688241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 41698241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 41708241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 41718241SJeff.Bonwick@Sun.COM * into the root vdev tree. 41728241SJeff.Bonwick@Sun.COM */ 41738241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 41748241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 41758241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 41768241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 41778241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 41788241SJeff.Bonwick@Sun.COM } 41798241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 41808241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 41817754SJeff.Bonwick@Sun.COM } 41827754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 41837754SJeff.Bonwick@Sun.COM 41841544Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4185789Sahrens 41862082Seschrock tx = dmu_tx_create_assigned(dp, txg); 41872082Seschrock 41882082Seschrock /* 41894577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 41902082Seschrock * set spa_deflate if we have no raid-z vdevs. 41912082Seschrock */ 41924577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 41934577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 41942082Seschrock int i; 41952082Seschrock 41962082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 41972082Seschrock vd = rvd->vdev_child[i]; 41982082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 41992082Seschrock break; 42002082Seschrock } 42012082Seschrock if (i == rvd->vdev_children) { 42022082Seschrock spa->spa_deflate = TRUE; 42032082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 42042082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 42052082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 42062082Seschrock } 42072082Seschrock } 42082082Seschrock 42097046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 42107046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 42117046Sahrens dsl_pool_create_origin(dp, tx); 42127046Sahrens 42137046Sahrens /* Keeping the origin open increases spa_minref */ 42147046Sahrens spa->spa_minref += 3; 42157046Sahrens } 42167046Sahrens 42177046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 42187046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 42197046Sahrens dsl_pool_upgrade_clones(dp, tx); 42207046Sahrens } 42217046Sahrens 4222789Sahrens /* 4223789Sahrens * If anything has changed in this txg, push the deferred frees 4224789Sahrens * from the previous txg. If not, leave them alone so that we 4225789Sahrens * don't generate work on an otherwise idle system. 4226789Sahrens */ 4227789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 42282329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 42292329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 4230789Sahrens spa_sync_deferred_frees(spa, txg); 4231789Sahrens 4232789Sahrens /* 4233789Sahrens * Iterate to convergence. 4234789Sahrens */ 4235789Sahrens do { 4236789Sahrens spa->spa_sync_pass++; 4237789Sahrens 4238789Sahrens spa_sync_config_object(spa, tx); 42395450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 42405450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 42415450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 42425450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 42431544Seschrock spa_errlog_sync(spa, txg); 4244789Sahrens dsl_pool_sync(dp, txg); 4245789Sahrens 4246789Sahrens dirty_vdevs = 0; 4247789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4248789Sahrens vdev_sync(vd, txg); 4249789Sahrens dirty_vdevs++; 4250789Sahrens } 4251789Sahrens 4252789Sahrens bplist_sync(bpl, tx); 4253789Sahrens } while (dirty_vdevs); 4254789Sahrens 4255789Sahrens bplist_close(bpl); 4256789Sahrens 4257789Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4258789Sahrens 4259789Sahrens /* 4260789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4261789Sahrens * to commit the transaction group. 42621635Sbonwick * 42635688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 42645688Sbonwick * random top-level vdevs that are known to be visible in the 42657754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 42667754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4267789Sahrens */ 42687754SJeff.Bonwick@Sun.COM for (;;) { 42697754SJeff.Bonwick@Sun.COM /* 42707754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 42717754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 42727754SJeff.Bonwick@Sun.COM */ 42737754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 42747754SJeff.Bonwick@Sun.COM 42757754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 42767754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 42777754SJeff.Bonwick@Sun.COM int svdcount = 0; 42787754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 42797754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 42809816SGeorge.Wilson@Sun.COM 42819816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 42827754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 42837754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 42847754SJeff.Bonwick@Sun.COM continue; 42857754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 42867754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 42877754SJeff.Bonwick@Sun.COM break; 42887754SJeff.Bonwick@Sun.COM } 42899725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 42909725SEric.Schrock@Sun.COM if (error != 0) 42919725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 42929725SEric.Schrock@Sun.COM B_TRUE); 42937754SJeff.Bonwick@Sun.COM } else { 42947754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 42959725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 42969725SEric.Schrock@Sun.COM if (error != 0) 42979725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 42989725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 42991635Sbonwick } 43007754SJeff.Bonwick@Sun.COM 43017754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 43027754SJeff.Bonwick@Sun.COM 43037754SJeff.Bonwick@Sun.COM if (error == 0) 43047754SJeff.Bonwick@Sun.COM break; 43057754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 43067754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 43071635Sbonwick } 43082082Seschrock dmu_tx_commit(tx); 43092082Seschrock 43101635Sbonwick /* 43111635Sbonwick * Clear the dirty config list. 43121635Sbonwick */ 43137754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 43141635Sbonwick vdev_config_clean(vd); 43151635Sbonwick 43161635Sbonwick /* 43171635Sbonwick * Now that the new config has synced transactionally, 43181635Sbonwick * let it become visible to the config cache. 43191635Sbonwick */ 43201635Sbonwick if (spa->spa_config_syncing != NULL) { 43211635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 43221635Sbonwick spa->spa_config_txg = txg; 43231635Sbonwick spa->spa_config_syncing = NULL; 43241635Sbonwick } 4325789Sahrens 4326789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4327789Sahrens 4328789Sahrens /* 4329789Sahrens * Clean up the ZIL records for the synced txg. 4330789Sahrens */ 4331789Sahrens dsl_pool_zil_clean(dp); 4332789Sahrens 4333789Sahrens /* 4334789Sahrens * Update usable space statistics. 4335789Sahrens */ 4336789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4337789Sahrens vdev_sync_done(vd, txg); 4338789Sahrens 4339789Sahrens /* 4340789Sahrens * It had better be the case that we didn't dirty anything 43412082Seschrock * since vdev_config_sync(). 4342789Sahrens */ 4343789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4344789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4345789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4346789Sahrens ASSERT(bpl->bpl_queue == NULL); 4347789Sahrens 43487754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 43491544Seschrock 43501544Seschrock /* 43511544Seschrock * If any async tasks have been requested, kick them off. 43521544Seschrock */ 43531544Seschrock spa_async_dispatch(spa); 4354789Sahrens } 4355789Sahrens 4356789Sahrens /* 4357789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4358789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4359789Sahrens * sync. 4360789Sahrens */ 4361789Sahrens void 4362789Sahrens spa_sync_allpools(void) 4363789Sahrens { 4364789Sahrens spa_t *spa = NULL; 4365789Sahrens mutex_enter(&spa_namespace_lock); 4366789Sahrens while ((spa = spa_next(spa)) != NULL) { 43677754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4368789Sahrens continue; 4369789Sahrens spa_open_ref(spa, FTAG); 4370789Sahrens mutex_exit(&spa_namespace_lock); 4371789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4372789Sahrens mutex_enter(&spa_namespace_lock); 4373789Sahrens spa_close(spa, FTAG); 4374789Sahrens } 4375789Sahrens mutex_exit(&spa_namespace_lock); 4376789Sahrens } 4377789Sahrens 4378789Sahrens /* 4379789Sahrens * ========================================================================== 4380789Sahrens * Miscellaneous routines 4381789Sahrens * ========================================================================== 4382789Sahrens */ 4383789Sahrens 4384789Sahrens /* 4385789Sahrens * Remove all pools in the system. 4386789Sahrens */ 4387789Sahrens void 4388789Sahrens spa_evict_all(void) 4389789Sahrens { 4390789Sahrens spa_t *spa; 4391789Sahrens 4392789Sahrens /* 4393789Sahrens * Remove all cached state. All pools should be closed now, 4394789Sahrens * so every spa in the AVL tree should be unreferenced. 4395789Sahrens */ 4396789Sahrens mutex_enter(&spa_namespace_lock); 4397789Sahrens while ((spa = spa_next(NULL)) != NULL) { 4398789Sahrens /* 43991544Seschrock * Stop async tasks. The async thread may need to detach 44001544Seschrock * a device that's been replaced, which requires grabbing 44011544Seschrock * spa_namespace_lock, so we must drop it here. 4402789Sahrens */ 4403789Sahrens spa_open_ref(spa, FTAG); 4404789Sahrens mutex_exit(&spa_namespace_lock); 44051544Seschrock spa_async_suspend(spa); 44064808Sek110237 mutex_enter(&spa_namespace_lock); 4407789Sahrens spa_close(spa, FTAG); 4408789Sahrens 4409789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4410789Sahrens spa_unload(spa); 4411789Sahrens spa_deactivate(spa); 4412789Sahrens } 4413789Sahrens spa_remove(spa); 4414789Sahrens } 4415789Sahrens mutex_exit(&spa_namespace_lock); 4416789Sahrens } 44171544Seschrock 44181544Seschrock vdev_t * 44199425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 44201544Seschrock { 44216643Seschrock vdev_t *vd; 44226643Seschrock int i; 44236643Seschrock 44246643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 44256643Seschrock return (vd); 44266643Seschrock 44279425SEric.Schrock@Sun.COM if (aux) { 44286643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 44296643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 44306643Seschrock if (vd->vdev_guid == guid) 44316643Seschrock return (vd); 44326643Seschrock } 44339425SEric.Schrock@Sun.COM 44349425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 44359425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 44369425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 44379425SEric.Schrock@Sun.COM return (vd); 44389425SEric.Schrock@Sun.COM } 44396643Seschrock } 44406643Seschrock 44416643Seschrock return (NULL); 44421544Seschrock } 44431760Seschrock 44441760Seschrock void 44455094Slling spa_upgrade(spa_t *spa, uint64_t version) 44461760Seschrock { 44477754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 44481760Seschrock 44491760Seschrock /* 44501760Seschrock * This should only be called for a non-faulted pool, and since a 44511760Seschrock * future version would result in an unopenable pool, this shouldn't be 44521760Seschrock * possible. 44531760Seschrock */ 44544577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 44555094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 44565094Slling 44575094Slling spa->spa_uberblock.ub_version = version; 44581760Seschrock vdev_config_dirty(spa->spa_root_vdev); 44591760Seschrock 44607754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 44612082Seschrock 44622082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 44631760Seschrock } 44642082Seschrock 44652082Seschrock boolean_t 44662082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 44672082Seschrock { 44682082Seschrock int i; 44693377Seschrock uint64_t spareguid; 44705450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 44715450Sbrendan 44725450Sbrendan for (i = 0; i < sav->sav_count; i++) 44735450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 44742082Seschrock return (B_TRUE); 44752082Seschrock 44765450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 44775450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 44785450Sbrendan &spareguid) == 0 && spareguid == guid) 44793377Seschrock return (B_TRUE); 44803377Seschrock } 44813377Seschrock 44822082Seschrock return (B_FALSE); 44832082Seschrock } 44843912Slling 44854451Seschrock /* 44867214Slling * Check if a pool has an active shared spare device. 44877214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 44887214Slling */ 44897214Slling static boolean_t 44907214Slling spa_has_active_shared_spare(spa_t *spa) 44917214Slling { 44927214Slling int i, refcnt; 44937214Slling uint64_t pool; 44947214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 44957214Slling 44967214Slling for (i = 0; i < sav->sav_count; i++) { 44977214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 44987214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 44997214Slling refcnt > 2) 45007214Slling return (B_TRUE); 45017214Slling } 45027214Slling 45037214Slling return (B_FALSE); 45047214Slling } 45057214Slling 45067214Slling /* 45074451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 45084451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 45094451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 45104451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 45114451Seschrock * or zdb as real changes. 45124451Seschrock */ 45134451Seschrock void 45144451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 45154451Seschrock { 45164451Seschrock #ifdef _KERNEL 45174451Seschrock sysevent_t *ev; 45184451Seschrock sysevent_attr_list_t *attr = NULL; 45194451Seschrock sysevent_value_t value; 45204451Seschrock sysevent_id_t eid; 45214451Seschrock 45224451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 45234451Seschrock SE_SLEEP); 45244451Seschrock 45254451Seschrock value.value_type = SE_DATA_TYPE_STRING; 45264451Seschrock value.value.sv_string = spa_name(spa); 45274451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 45284451Seschrock goto done; 45294451Seschrock 45304451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 45314451Seschrock value.value.sv_uint64 = spa_guid(spa); 45324451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 45334451Seschrock goto done; 45344451Seschrock 45354451Seschrock if (vd) { 45364451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 45374451Seschrock value.value.sv_uint64 = vd->vdev_guid; 45384451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 45394451Seschrock SE_SLEEP) != 0) 45404451Seschrock goto done; 45414451Seschrock 45424451Seschrock if (vd->vdev_path) { 45434451Seschrock value.value_type = SE_DATA_TYPE_STRING; 45444451Seschrock value.value.sv_string = vd->vdev_path; 45454451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 45464451Seschrock &value, SE_SLEEP) != 0) 45474451Seschrock goto done; 45484451Seschrock } 45494451Seschrock } 45504451Seschrock 45515756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 45525756Seschrock goto done; 45535756Seschrock attr = NULL; 45545756Seschrock 45554451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 45564451Seschrock 45574451Seschrock done: 45584451Seschrock if (attr) 45594451Seschrock sysevent_free_attr(attr); 45604451Seschrock sysevent_free(ev); 45614451Seschrock #endif 45624451Seschrock } 4563