1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 238525SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens /* 28789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30789Sahrens * pool. 31789Sahrens */ 32789Sahrens 33789Sahrens #include <sys/zfs_context.h> 341544Seschrock #include <sys/fm/fs/zfs.h> 35789Sahrens #include <sys/spa_impl.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/dmu.h> 39789Sahrens #include <sys/dmu_tx.h> 40789Sahrens #include <sys/zap.h> 41789Sahrens #include <sys/zil.h> 4210922SJeff.Bonwick@Sun.COM #include <sys/ddt.h> 43789Sahrens #include <sys/vdev_impl.h> 44789Sahrens #include <sys/metaslab.h> 4510594SGeorge.Wilson@Sun.COM #include <sys/metaslab_impl.h> 46789Sahrens #include <sys/uberblock_impl.h> 47789Sahrens #include <sys/txg.h> 48789Sahrens #include <sys/avl.h> 49789Sahrens #include <sys/dmu_traverse.h> 503912Slling #include <sys/dmu_objset.h> 51789Sahrens #include <sys/unique.h> 52789Sahrens #include <sys/dsl_pool.h> 533912Slling #include <sys/dsl_dataset.h> 54789Sahrens #include <sys/dsl_dir.h> 55789Sahrens #include <sys/dsl_prop.h> 563912Slling #include <sys/dsl_synctask.h> 57789Sahrens #include <sys/fs/zfs.h> 585450Sbrendan #include <sys/arc.h> 59789Sahrens #include <sys/callb.h> 603975Sek110237 #include <sys/systeminfo.h> 616423Sgw25295 #include <sys/spa_boot.h> 629816SGeorge.Wilson@Sun.COM #include <sys/zfs_ioctl.h> 63789Sahrens 648662SJordan.Vaughan@Sun.com #ifdef _KERNEL 65*11173SJonathan.Adams@Sun.COM #include <sys/bootprops.h> 66*11173SJonathan.Adams@Sun.COM #include <sys/callb.h> 67*11173SJonathan.Adams@Sun.COM #include <sys/cpupart.h> 68*11173SJonathan.Adams@Sun.COM #include <sys/pool.h> 69*11173SJonathan.Adams@Sun.COM #include <sys/sysdc.h> 708662SJordan.Vaughan@Sun.com #include <sys/zone.h> 718662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 728662SJordan.Vaughan@Sun.com 735094Slling #include "zfs_prop.h" 745913Sperrin #include "zfs_comutil.h" 755094Slling 76*11173SJonathan.Adams@Sun.COM typedef enum zti_modes { 779515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 789515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 79*11173SJonathan.Adams@Sun.COM zti_mode_batch, /* cpu-intensive; value is ignored */ 8011146SGeorge.Wilson@Sun.COM zti_mode_null, /* don't create a taskq */ 819515SJonathan.Adams@Sun.COM zti_nmodes 82*11173SJonathan.Adams@Sun.COM } zti_modes_t; 832986Sek110237 8411146SGeorge.Wilson@Sun.COM #define ZTI_FIX(n) { zti_mode_fixed, (n) } 8511146SGeorge.Wilson@Sun.COM #define ZTI_PCT(n) { zti_mode_online_percent, (n) } 86*11173SJonathan.Adams@Sun.COM #define ZTI_BATCH { zti_mode_batch, 0 } 8711146SGeorge.Wilson@Sun.COM #define ZTI_NULL { zti_mode_null, 0 } 8811146SGeorge.Wilson@Sun.COM 8911146SGeorge.Wilson@Sun.COM #define ZTI_ONE ZTI_FIX(1) 909515SJonathan.Adams@Sun.COM 919515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 9211146SGeorge.Wilson@Sun.COM enum zti_modes zti_mode; 9311146SGeorge.Wilson@Sun.COM uint_t zti_value; 949515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 959515SJonathan.Adams@Sun.COM 969515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 97*11173SJonathan.Adams@Sun.COM "issue", "issue_high", "intr", "intr_high" 989515SJonathan.Adams@Sun.COM }; 999515SJonathan.Adams@Sun.COM 10011146SGeorge.Wilson@Sun.COM /* 10111146SGeorge.Wilson@Sun.COM * Define the taskq threads for the following I/O types: 10211146SGeorge.Wilson@Sun.COM * NULL, READ, WRITE, FREE, CLAIM, and IOCTL 10311146SGeorge.Wilson@Sun.COM */ 10411146SGeorge.Wilson@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 10511146SGeorge.Wilson@Sun.COM /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ 10611146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 107*11173SJonathan.Adams@Sun.COM { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, 108*11173SJonathan.Adams@Sun.COM { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, 10911146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 11011146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 11111146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 1129515SJonathan.Adams@Sun.COM }; 1139515SJonathan.Adams@Sun.COM 1145094Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 1157214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1165094Slling 117*11173SJonathan.Adams@Sun.COM uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ 118*11173SJonathan.Adams@Sun.COM id_t zio_taskq_psrset_bind = PS_NONE; 119*11173SJonathan.Adams@Sun.COM boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ 120*11173SJonathan.Adams@Sun.COM uint_t zio_taskq_basedc = 80; /* base duty cycle */ 121*11173SJonathan.Adams@Sun.COM 122*11173SJonathan.Adams@Sun.COM boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ 123*11173SJonathan.Adams@Sun.COM 124*11173SJonathan.Adams@Sun.COM /* 125*11173SJonathan.Adams@Sun.COM * This (illegal) pool name is used when temporarily importing a spa_t in order 126*11173SJonathan.Adams@Sun.COM * to get the vdev stats associated with the imported devices. 127*11173SJonathan.Adams@Sun.COM */ 128*11173SJonathan.Adams@Sun.COM #define TRYIMPORT_NAME "$import" 129*11173SJonathan.Adams@Sun.COM 1305094Slling /* 1315094Slling * ========================================================================== 1325094Slling * SPA properties routines 1335094Slling * ========================================================================== 1345094Slling */ 1355094Slling 1365094Slling /* 1375094Slling * Add a (source=src, propname=propval) list to an nvlist. 1385094Slling */ 1395949Slling static void 1405094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1415094Slling uint64_t intval, zprop_source_t src) 1425094Slling { 1435094Slling const char *propname = zpool_prop_to_name(prop); 1445094Slling nvlist_t *propval; 1455949Slling 1465949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1475949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1485949Slling 1495949Slling if (strval != NULL) 1505949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1515949Slling else 1525949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1535949Slling 1545949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1555094Slling nvlist_free(propval); 1565094Slling } 1575094Slling 1585094Slling /* 1595094Slling * Get property values from the spa configuration. 1605094Slling */ 1615949Slling static void 1625094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1635094Slling { 1648525SEric.Schrock@Sun.COM uint64_t size; 16510956SGeorge.Wilson@Sun.COM uint64_t alloc; 1665094Slling uint64_t cap, version; 1675094Slling zprop_source_t src = ZPROP_SRC_NONE; 1686643Seschrock spa_config_dirent_t *dp; 1695094Slling 1707754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1717754SJeff.Bonwick@Sun.COM 1728525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 17310956SGeorge.Wilson@Sun.COM alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 17410922SJeff.Bonwick@Sun.COM size = metaslab_class_get_space(spa_normal_class(spa)); 1758525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1768525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 17710956SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 17810956SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 17910956SGeorge.Wilson@Sun.COM size - alloc, src); 18010956SGeorge.Wilson@Sun.COM 18110956SGeorge.Wilson@Sun.COM cap = (size == 0) ? 0 : (alloc * 100 / size); 1828525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1838525SEric.Schrock@Sun.COM 18410922SJeff.Bonwick@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 18510922SJeff.Bonwick@Sun.COM ddt_get_pool_dedup_ratio(spa), src); 18610922SJeff.Bonwick@Sun.COM 1878525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1888525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1898525SEric.Schrock@Sun.COM 1908525SEric.Schrock@Sun.COM version = spa_version(spa); 1918525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1928525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1938525SEric.Schrock@Sun.COM else 1948525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 1958525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 1968525SEric.Schrock@Sun.COM } 1975949Slling 1985949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1995949Slling 2005949Slling if (spa->spa_root != NULL) 2015949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 2025949Slling 0, ZPROP_SRC_LOCAL); 2035094Slling 2046643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 2056643Seschrock if (dp->scd_path == NULL) { 2065949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 2076643Seschrock "none", 0, ZPROP_SRC_LOCAL); 2086643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 2095949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 2106643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 2115363Seschrock } 2125363Seschrock } 2135094Slling } 2145094Slling 2155094Slling /* 2165094Slling * Get zpool property values. 2175094Slling */ 2185094Slling int 2195094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 2205094Slling { 22110922SJeff.Bonwick@Sun.COM objset_t *mos = spa->spa_meta_objset; 2225094Slling zap_cursor_t zc; 2235094Slling zap_attribute_t za; 2245094Slling int err; 2255094Slling 2265949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2275094Slling 2287754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2297754SJeff.Bonwick@Sun.COM 2305094Slling /* 2315094Slling * Get properties from the spa config. 2325094Slling */ 2335949Slling spa_prop_get_config(spa, nvp); 2345094Slling 2355094Slling /* If no pool property object, no more prop to get. */ 2365094Slling if (spa->spa_pool_props_object == 0) { 2375094Slling mutex_exit(&spa->spa_props_lock); 2385094Slling return (0); 2395094Slling } 2405094Slling 2415094Slling /* 2425094Slling * Get properties from the MOS pool property object. 2435094Slling */ 2445094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2455094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2465094Slling zap_cursor_advance(&zc)) { 2475094Slling uint64_t intval = 0; 2485094Slling char *strval = NULL; 2495094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2505094Slling zpool_prop_t prop; 2515094Slling 2525094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2535094Slling continue; 2545094Slling 2555094Slling switch (za.za_integer_length) { 2565094Slling case 8: 2575094Slling /* integer property */ 2585094Slling if (za.za_first_integer != 2595094Slling zpool_prop_default_numeric(prop)) 2605094Slling src = ZPROP_SRC_LOCAL; 2615094Slling 2625094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2635094Slling dsl_pool_t *dp; 2645094Slling dsl_dataset_t *ds = NULL; 2655094Slling 2665094Slling dp = spa_get_dsl(spa); 2675094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2686689Smaybee if (err = dsl_dataset_hold_obj(dp, 2696689Smaybee za.za_first_integer, FTAG, &ds)) { 2705094Slling rw_exit(&dp->dp_config_rwlock); 2715094Slling break; 2725094Slling } 2735094Slling 2745094Slling strval = kmem_alloc( 2755094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2765094Slling KM_SLEEP); 2775094Slling dsl_dataset_name(ds, strval); 2786689Smaybee dsl_dataset_rele(ds, FTAG); 2795094Slling rw_exit(&dp->dp_config_rwlock); 2805094Slling } else { 2815094Slling strval = NULL; 2825094Slling intval = za.za_first_integer; 2835094Slling } 2845094Slling 2855949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2865094Slling 2875094Slling if (strval != NULL) 2885094Slling kmem_free(strval, 2895094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2905094Slling 2915094Slling break; 2925094Slling 2935094Slling case 1: 2945094Slling /* string property */ 2955094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 2965094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 2975094Slling za.za_name, 1, za.za_num_integers, strval); 2985094Slling if (err) { 2995094Slling kmem_free(strval, za.za_num_integers); 3005094Slling break; 3015094Slling } 3025949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 3035094Slling kmem_free(strval, za.za_num_integers); 3045094Slling break; 3055094Slling 3065094Slling default: 3075094Slling break; 3085094Slling } 3095094Slling } 3105094Slling zap_cursor_fini(&zc); 3115094Slling mutex_exit(&spa->spa_props_lock); 3125094Slling out: 3135094Slling if (err && err != ENOENT) { 3145094Slling nvlist_free(*nvp); 3155949Slling *nvp = NULL; 3165094Slling return (err); 3175094Slling } 3185094Slling 3195094Slling return (0); 3205094Slling } 3215094Slling 3225094Slling /* 3235094Slling * Validate the given pool properties nvlist and modify the list 3245094Slling * for the property values to be set. 3255094Slling */ 3265094Slling static int 3275094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3285094Slling { 3295094Slling nvpair_t *elem; 3305094Slling int error = 0, reset_bootfs = 0; 3315094Slling uint64_t objnum; 3325094Slling 3335094Slling elem = NULL; 3345094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3355094Slling zpool_prop_t prop; 3365094Slling char *propname, *strval; 3375094Slling uint64_t intval; 3385094Slling objset_t *os; 3395363Seschrock char *slash; 3405094Slling 3415094Slling propname = nvpair_name(elem); 3425094Slling 3435094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3445094Slling return (EINVAL); 3455094Slling 3465094Slling switch (prop) { 3475094Slling case ZPOOL_PROP_VERSION: 3485094Slling error = nvpair_value_uint64(elem, &intval); 3495094Slling if (!error && 3505094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3515094Slling error = EINVAL; 3525094Slling break; 3535094Slling 3545094Slling case ZPOOL_PROP_DELEGATION: 3555094Slling case ZPOOL_PROP_AUTOREPLACE: 3567538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3579816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 3585094Slling error = nvpair_value_uint64(elem, &intval); 3595094Slling if (!error && intval > 1) 3605094Slling error = EINVAL; 3615094Slling break; 3625094Slling 3635094Slling case ZPOOL_PROP_BOOTFS: 3649630SJeff.Bonwick@Sun.COM /* 3659630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3669630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3679630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3689630SJeff.Bonwick@Sun.COM */ 3695094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3705094Slling error = ENOTSUP; 3715094Slling break; 3725094Slling } 3735094Slling 3745094Slling /* 3757042Sgw25295 * Make sure the vdev config is bootable 3765094Slling */ 3777042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3785094Slling error = ENOTSUP; 3795094Slling break; 3805094Slling } 3815094Slling 3825094Slling reset_bootfs = 1; 3835094Slling 3845094Slling error = nvpair_value_string(elem, &strval); 3855094Slling 3865094Slling if (!error) { 3877042Sgw25295 uint64_t compress; 3887042Sgw25295 3895094Slling if (strval == NULL || strval[0] == '\0') { 3905094Slling objnum = zpool_prop_default_numeric( 3915094Slling ZPOOL_PROP_BOOTFS); 3925094Slling break; 3935094Slling } 3945094Slling 39510298SMatthew.Ahrens@Sun.COM if (error = dmu_objset_hold(strval, FTAG, &os)) 3965094Slling break; 3977042Sgw25295 39810298SMatthew.Ahrens@Sun.COM /* Must be ZPL and not gzip compressed. */ 39910298SMatthew.Ahrens@Sun.COM 40010298SMatthew.Ahrens@Sun.COM if (dmu_objset_type(os) != DMU_OST_ZFS) { 40110298SMatthew.Ahrens@Sun.COM error = ENOTSUP; 40210298SMatthew.Ahrens@Sun.COM } else if ((error = dsl_prop_get_integer(strval, 4037042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 4047042Sgw25295 &compress, NULL)) == 0 && 4057042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 4067042Sgw25295 error = ENOTSUP; 4077042Sgw25295 } else { 4087042Sgw25295 objnum = dmu_objset_id(os); 4097042Sgw25295 } 41010298SMatthew.Ahrens@Sun.COM dmu_objset_rele(os, FTAG); 4115094Slling } 4125094Slling break; 4137754SJeff.Bonwick@Sun.COM 4145329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 4155329Sgw25295 error = nvpair_value_uint64(elem, &intval); 4165329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 4175329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 4185329Sgw25295 error = EINVAL; 4195329Sgw25295 4205329Sgw25295 /* 4215329Sgw25295 * This is a special case which only occurs when 4225329Sgw25295 * the pool has completely failed. This allows 4235329Sgw25295 * the user to change the in-core failmode property 4245329Sgw25295 * without syncing it out to disk (I/Os might 4255329Sgw25295 * currently be blocked). We do this by returning 4265329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4275329Sgw25295 * into thinking we encountered a property validation 4285329Sgw25295 * error. 4295329Sgw25295 */ 4307754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4315329Sgw25295 spa->spa_failmode = intval; 4325329Sgw25295 error = EIO; 4335329Sgw25295 } 4345329Sgw25295 break; 4355363Seschrock 4365363Seschrock case ZPOOL_PROP_CACHEFILE: 4375363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4385363Seschrock break; 4395363Seschrock 4405363Seschrock if (strval[0] == '\0') 4415363Seschrock break; 4425363Seschrock 4435363Seschrock if (strcmp(strval, "none") == 0) 4445363Seschrock break; 4455363Seschrock 4465363Seschrock if (strval[0] != '/') { 4475363Seschrock error = EINVAL; 4485363Seschrock break; 4495363Seschrock } 4505363Seschrock 4515363Seschrock slash = strrchr(strval, '/'); 4525363Seschrock ASSERT(slash != NULL); 4535363Seschrock 4545363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4555363Seschrock strcmp(slash, "/..") == 0) 4565363Seschrock error = EINVAL; 4575363Seschrock break; 45810922SJeff.Bonwick@Sun.COM 45910922SJeff.Bonwick@Sun.COM case ZPOOL_PROP_DEDUPDITTO: 46010922SJeff.Bonwick@Sun.COM if (spa_version(spa) < SPA_VERSION_DEDUP) 46110922SJeff.Bonwick@Sun.COM error = ENOTSUP; 46210922SJeff.Bonwick@Sun.COM else 46310922SJeff.Bonwick@Sun.COM error = nvpair_value_uint64(elem, &intval); 46410922SJeff.Bonwick@Sun.COM if (error == 0 && 46510922SJeff.Bonwick@Sun.COM intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 46610922SJeff.Bonwick@Sun.COM error = EINVAL; 46710922SJeff.Bonwick@Sun.COM break; 4685094Slling } 4695094Slling 4705094Slling if (error) 4715094Slling break; 4725094Slling } 4735094Slling 4745094Slling if (!error && reset_bootfs) { 4755094Slling error = nvlist_remove(props, 4765094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4775094Slling 4785094Slling if (!error) { 4795094Slling error = nvlist_add_uint64(props, 4805094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4815094Slling } 4825094Slling } 4835094Slling 4845094Slling return (error); 4855094Slling } 4865094Slling 4878525SEric.Schrock@Sun.COM void 4888525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4898525SEric.Schrock@Sun.COM { 4908525SEric.Schrock@Sun.COM char *cachefile; 4918525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4928525SEric.Schrock@Sun.COM 4938525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 4948525SEric.Schrock@Sun.COM &cachefile) != 0) 4958525SEric.Schrock@Sun.COM return; 4968525SEric.Schrock@Sun.COM 4978525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 4988525SEric.Schrock@Sun.COM KM_SLEEP); 4998525SEric.Schrock@Sun.COM 5008525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 5018525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 5028525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 5038525SEric.Schrock@Sun.COM dp->scd_path = NULL; 5048525SEric.Schrock@Sun.COM else 5058525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 5068525SEric.Schrock@Sun.COM 5078525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 5088525SEric.Schrock@Sun.COM if (need_sync) 5098525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 5108525SEric.Schrock@Sun.COM } 5118525SEric.Schrock@Sun.COM 5125094Slling int 5135094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 5145094Slling { 5155094Slling int error; 5168525SEric.Schrock@Sun.COM nvpair_t *elem; 5178525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 5188525SEric.Schrock@Sun.COM zpool_prop_t prop; 5195094Slling 5205094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 5215094Slling return (error); 5225094Slling 5238525SEric.Schrock@Sun.COM elem = NULL; 5248525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 5258525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 5268525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 5278525SEric.Schrock@Sun.COM return (EINVAL); 5288525SEric.Schrock@Sun.COM 5298525SEric.Schrock@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 5308525SEric.Schrock@Sun.COM continue; 5318525SEric.Schrock@Sun.COM 5328525SEric.Schrock@Sun.COM need_sync = B_TRUE; 5338525SEric.Schrock@Sun.COM break; 5348525SEric.Schrock@Sun.COM } 5358525SEric.Schrock@Sun.COM 5368525SEric.Schrock@Sun.COM if (need_sync) 5378525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5388525SEric.Schrock@Sun.COM spa, nvp, 3)); 5398525SEric.Schrock@Sun.COM else 5408525SEric.Schrock@Sun.COM return (0); 5415094Slling } 5425094Slling 5435094Slling /* 5445094Slling * If the bootfs property value is dsobj, clear it. 5455094Slling */ 5465094Slling void 5475094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5485094Slling { 5495094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5505094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5515094Slling spa->spa_pool_props_object, 5525094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5535094Slling spa->spa_bootfs = 0; 5545094Slling } 5555094Slling } 5565094Slling 557789Sahrens /* 558789Sahrens * ========================================================================== 559789Sahrens * SPA state manipulation (open/create/destroy/import/export) 560789Sahrens * ========================================================================== 561789Sahrens */ 562789Sahrens 5631544Seschrock static int 5641544Seschrock spa_error_entry_compare(const void *a, const void *b) 5651544Seschrock { 5661544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5671544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5681544Seschrock int ret; 5691544Seschrock 5701544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5711544Seschrock sizeof (zbookmark_t)); 5721544Seschrock 5731544Seschrock if (ret < 0) 5741544Seschrock return (-1); 5751544Seschrock else if (ret > 0) 5761544Seschrock return (1); 5771544Seschrock else 5781544Seschrock return (0); 5791544Seschrock } 5801544Seschrock 5811544Seschrock /* 5821544Seschrock * Utility function which retrieves copies of the current logs and 5831544Seschrock * re-initializes them in the process. 5841544Seschrock */ 5851544Seschrock void 5861544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5871544Seschrock { 5881544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5891544Seschrock 5901544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5911544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 5921544Seschrock 5931544Seschrock avl_create(&spa->spa_errlist_scrub, 5941544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5951544Seschrock offsetof(spa_error_entry_t, se_avl)); 5961544Seschrock avl_create(&spa->spa_errlist_last, 5971544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 5981544Seschrock offsetof(spa_error_entry_t, se_avl)); 5991544Seschrock } 6001544Seschrock 601*11173SJonathan.Adams@Sun.COM static taskq_t * 602*11173SJonathan.Adams@Sun.COM spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, 603*11173SJonathan.Adams@Sun.COM uint_t value) 604*11173SJonathan.Adams@Sun.COM { 605*11173SJonathan.Adams@Sun.COM uint_t flags = TASKQ_PREPOPULATE; 606*11173SJonathan.Adams@Sun.COM boolean_t batch = B_FALSE; 607*11173SJonathan.Adams@Sun.COM 608*11173SJonathan.Adams@Sun.COM switch (mode) { 609*11173SJonathan.Adams@Sun.COM case zti_mode_null: 610*11173SJonathan.Adams@Sun.COM return (NULL); /* no taskq needed */ 611*11173SJonathan.Adams@Sun.COM 612*11173SJonathan.Adams@Sun.COM case zti_mode_fixed: 613*11173SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 614*11173SJonathan.Adams@Sun.COM value = MAX(value, 1); 615*11173SJonathan.Adams@Sun.COM break; 616*11173SJonathan.Adams@Sun.COM 617*11173SJonathan.Adams@Sun.COM case zti_mode_batch: 618*11173SJonathan.Adams@Sun.COM batch = B_TRUE; 619*11173SJonathan.Adams@Sun.COM flags |= TASKQ_THREADS_CPU_PCT; 620*11173SJonathan.Adams@Sun.COM value = zio_taskq_batch_pct; 621*11173SJonathan.Adams@Sun.COM break; 622*11173SJonathan.Adams@Sun.COM 623*11173SJonathan.Adams@Sun.COM case zti_mode_online_percent: 624*11173SJonathan.Adams@Sun.COM flags |= TASKQ_THREADS_CPU_PCT; 625*11173SJonathan.Adams@Sun.COM break; 626*11173SJonathan.Adams@Sun.COM 627*11173SJonathan.Adams@Sun.COM default: 628*11173SJonathan.Adams@Sun.COM panic("unrecognized mode for %s taskq (%u:%u) in " 629*11173SJonathan.Adams@Sun.COM "spa_activate()", 630*11173SJonathan.Adams@Sun.COM name, mode, value); 631*11173SJonathan.Adams@Sun.COM break; 632*11173SJonathan.Adams@Sun.COM } 633*11173SJonathan.Adams@Sun.COM 634*11173SJonathan.Adams@Sun.COM if (zio_taskq_sysdc && spa->spa_proc != &p0) { 635*11173SJonathan.Adams@Sun.COM if (batch) 636*11173SJonathan.Adams@Sun.COM flags |= TASKQ_DC_BATCH; 637*11173SJonathan.Adams@Sun.COM 638*11173SJonathan.Adams@Sun.COM return (taskq_create_sysdc(name, value, 50, INT_MAX, 639*11173SJonathan.Adams@Sun.COM spa->spa_proc, zio_taskq_basedc, flags)); 640*11173SJonathan.Adams@Sun.COM } 641*11173SJonathan.Adams@Sun.COM return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, 642*11173SJonathan.Adams@Sun.COM spa->spa_proc, flags)); 643*11173SJonathan.Adams@Sun.COM } 644*11173SJonathan.Adams@Sun.COM 645*11173SJonathan.Adams@Sun.COM static void 646*11173SJonathan.Adams@Sun.COM spa_create_zio_taskqs(spa_t *spa) 647*11173SJonathan.Adams@Sun.COM { 648*11173SJonathan.Adams@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 649*11173SJonathan.Adams@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 650*11173SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; 651*11173SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_mode; 652*11173SJonathan.Adams@Sun.COM uint_t value = ztip->zti_value; 653*11173SJonathan.Adams@Sun.COM char name[32]; 654*11173SJonathan.Adams@Sun.COM 655*11173SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 656*11173SJonathan.Adams@Sun.COM "%s_%s", zio_type_name[t], zio_taskq_types[q]); 657*11173SJonathan.Adams@Sun.COM 658*11173SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = 659*11173SJonathan.Adams@Sun.COM spa_taskq_create(spa, name, mode, value); 660*11173SJonathan.Adams@Sun.COM } 661*11173SJonathan.Adams@Sun.COM } 662*11173SJonathan.Adams@Sun.COM } 663*11173SJonathan.Adams@Sun.COM 664*11173SJonathan.Adams@Sun.COM #ifdef _KERNEL 665*11173SJonathan.Adams@Sun.COM static void 666*11173SJonathan.Adams@Sun.COM spa_thread(void *arg) 667*11173SJonathan.Adams@Sun.COM { 668*11173SJonathan.Adams@Sun.COM callb_cpr_t cprinfo; 669*11173SJonathan.Adams@Sun.COM 670*11173SJonathan.Adams@Sun.COM spa_t *spa = arg; 671*11173SJonathan.Adams@Sun.COM user_t *pu = PTOU(curproc); 672*11173SJonathan.Adams@Sun.COM 673*11173SJonathan.Adams@Sun.COM CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, 674*11173SJonathan.Adams@Sun.COM spa->spa_name); 675*11173SJonathan.Adams@Sun.COM 676*11173SJonathan.Adams@Sun.COM ASSERT(curproc != &p0); 677*11173SJonathan.Adams@Sun.COM (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), 678*11173SJonathan.Adams@Sun.COM "zpool-%s", spa->spa_name); 679*11173SJonathan.Adams@Sun.COM (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); 680*11173SJonathan.Adams@Sun.COM 681*11173SJonathan.Adams@Sun.COM /* bind this thread to the requested psrset */ 682*11173SJonathan.Adams@Sun.COM if (zio_taskq_psrset_bind != PS_NONE) { 683*11173SJonathan.Adams@Sun.COM pool_lock(); 684*11173SJonathan.Adams@Sun.COM mutex_enter(&cpu_lock); 685*11173SJonathan.Adams@Sun.COM mutex_enter(&pidlock); 686*11173SJonathan.Adams@Sun.COM mutex_enter(&curproc->p_lock); 687*11173SJonathan.Adams@Sun.COM 688*11173SJonathan.Adams@Sun.COM if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 689*11173SJonathan.Adams@Sun.COM 0, NULL, NULL) == 0) { 690*11173SJonathan.Adams@Sun.COM curthread->t_bind_pset = zio_taskq_psrset_bind; 691*11173SJonathan.Adams@Sun.COM } else { 692*11173SJonathan.Adams@Sun.COM cmn_err(CE_WARN, 693*11173SJonathan.Adams@Sun.COM "Couldn't bind process for zfs pool \"%s\" to " 694*11173SJonathan.Adams@Sun.COM "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); 695*11173SJonathan.Adams@Sun.COM } 696*11173SJonathan.Adams@Sun.COM 697*11173SJonathan.Adams@Sun.COM mutex_exit(&curproc->p_lock); 698*11173SJonathan.Adams@Sun.COM mutex_exit(&pidlock); 699*11173SJonathan.Adams@Sun.COM mutex_exit(&cpu_lock); 700*11173SJonathan.Adams@Sun.COM pool_unlock(); 701*11173SJonathan.Adams@Sun.COM } 702*11173SJonathan.Adams@Sun.COM 703*11173SJonathan.Adams@Sun.COM if (zio_taskq_sysdc) { 704*11173SJonathan.Adams@Sun.COM sysdc_thread_enter(curthread, 100, 0); 705*11173SJonathan.Adams@Sun.COM } 706*11173SJonathan.Adams@Sun.COM 707*11173SJonathan.Adams@Sun.COM spa->spa_proc = curproc; 708*11173SJonathan.Adams@Sun.COM spa->spa_did = curthread->t_did; 709*11173SJonathan.Adams@Sun.COM 710*11173SJonathan.Adams@Sun.COM spa_create_zio_taskqs(spa); 711*11173SJonathan.Adams@Sun.COM 712*11173SJonathan.Adams@Sun.COM mutex_enter(&spa->spa_proc_lock); 713*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); 714*11173SJonathan.Adams@Sun.COM 715*11173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_ACTIVE; 716*11173SJonathan.Adams@Sun.COM cv_broadcast(&spa->spa_proc_cv); 717*11173SJonathan.Adams@Sun.COM 718*11173SJonathan.Adams@Sun.COM CALLB_CPR_SAFE_BEGIN(&cprinfo); 719*11173SJonathan.Adams@Sun.COM while (spa->spa_proc_state == SPA_PROC_ACTIVE) 720*11173SJonathan.Adams@Sun.COM cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 721*11173SJonathan.Adams@Sun.COM CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); 722*11173SJonathan.Adams@Sun.COM 723*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); 724*11173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_GONE; 725*11173SJonathan.Adams@Sun.COM spa->spa_proc = &p0; 726*11173SJonathan.Adams@Sun.COM cv_broadcast(&spa->spa_proc_cv); 727*11173SJonathan.Adams@Sun.COM CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ 728*11173SJonathan.Adams@Sun.COM 729*11173SJonathan.Adams@Sun.COM mutex_enter(&curproc->p_lock); 730*11173SJonathan.Adams@Sun.COM lwp_exit(); 731*11173SJonathan.Adams@Sun.COM } 732*11173SJonathan.Adams@Sun.COM #endif 733*11173SJonathan.Adams@Sun.COM 734789Sahrens /* 735789Sahrens * Activate an uninitialized pool. 736789Sahrens */ 737789Sahrens static void 7388241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 739789Sahrens { 740789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 741789Sahrens 742789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 7438241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 744789Sahrens 74510594SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 74610594SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 747789Sahrens 748*11173SJonathan.Adams@Sun.COM /* Try to create a covering process */ 749*11173SJonathan.Adams@Sun.COM mutex_enter(&spa->spa_proc_lock); 750*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_NONE); 751*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc == &p0); 752*11173SJonathan.Adams@Sun.COM spa->spa_did = 0; 753*11173SJonathan.Adams@Sun.COM 754*11173SJonathan.Adams@Sun.COM /* Only create a process if we're going to be around a while. */ 755*11173SJonathan.Adams@Sun.COM if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { 756*11173SJonathan.Adams@Sun.COM if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, 757*11173SJonathan.Adams@Sun.COM NULL, 0) == 0) { 758*11173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_CREATED; 759*11173SJonathan.Adams@Sun.COM while (spa->spa_proc_state == SPA_PROC_CREATED) { 760*11173SJonathan.Adams@Sun.COM cv_wait(&spa->spa_proc_cv, 761*11173SJonathan.Adams@Sun.COM &spa->spa_proc_lock); 7629515SJonathan.Adams@Sun.COM } 763*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 764*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc != &p0); 765*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_did != 0); 766*11173SJonathan.Adams@Sun.COM } else { 767*11173SJonathan.Adams@Sun.COM #ifdef _KERNEL 768*11173SJonathan.Adams@Sun.COM cmn_err(CE_WARN, 769*11173SJonathan.Adams@Sun.COM "Couldn't create process for zfs pool \"%s\"\n", 770*11173SJonathan.Adams@Sun.COM spa->spa_name); 771*11173SJonathan.Adams@Sun.COM #endif 7727754SJeff.Bonwick@Sun.COM } 773789Sahrens } 774*11173SJonathan.Adams@Sun.COM mutex_exit(&spa->spa_proc_lock); 775*11173SJonathan.Adams@Sun.COM 776*11173SJonathan.Adams@Sun.COM /* If we didn't create a process, we need to create our taskqs. */ 777*11173SJonathan.Adams@Sun.COM if (spa->spa_proc == &p0) { 778*11173SJonathan.Adams@Sun.COM spa_create_zio_taskqs(spa); 779*11173SJonathan.Adams@Sun.COM } 780789Sahrens 7817754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 7827754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 7837754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 7847754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 785789Sahrens 786789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 787789Sahrens offsetof(struct vdev, vdev_txg_node)); 7881544Seschrock 7891544Seschrock avl_create(&spa->spa_errlist_scrub, 7901544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 7911544Seschrock offsetof(spa_error_entry_t, se_avl)); 7921544Seschrock avl_create(&spa->spa_errlist_last, 7931544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 7941544Seschrock offsetof(spa_error_entry_t, se_avl)); 795789Sahrens } 796789Sahrens 797789Sahrens /* 798789Sahrens * Opposite of spa_activate(). 799789Sahrens */ 800789Sahrens static void 801789Sahrens spa_deactivate(spa_t *spa) 802789Sahrens { 803789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 804789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 805789Sahrens ASSERT(spa->spa_root_vdev == NULL); 8069630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 807789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 808789Sahrens 809789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 810789Sahrens 8117754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 8127754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 8137754SJeff.Bonwick@Sun.COM 8147754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 8157754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 81611146SGeorge.Wilson@Sun.COM if (spa->spa_zio_taskq[t][q] != NULL) 81711146SGeorge.Wilson@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 8187754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 8197754SJeff.Bonwick@Sun.COM } 820789Sahrens } 821789Sahrens 822789Sahrens metaslab_class_destroy(spa->spa_normal_class); 823789Sahrens spa->spa_normal_class = NULL; 824789Sahrens 8254527Sperrin metaslab_class_destroy(spa->spa_log_class); 8264527Sperrin spa->spa_log_class = NULL; 8274527Sperrin 8281544Seschrock /* 8291544Seschrock * If this was part of an import or the open otherwise failed, we may 8301544Seschrock * still have errors left in the queues. Empty them just in case. 8311544Seschrock */ 8321544Seschrock spa_errlog_drain(spa); 8331544Seschrock 8341544Seschrock avl_destroy(&spa->spa_errlist_scrub); 8351544Seschrock avl_destroy(&spa->spa_errlist_last); 8361544Seschrock 837789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 838*11173SJonathan.Adams@Sun.COM 839*11173SJonathan.Adams@Sun.COM mutex_enter(&spa->spa_proc_lock); 840*11173SJonathan.Adams@Sun.COM if (spa->spa_proc_state != SPA_PROC_NONE) { 841*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 842*11173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_DEACTIVATE; 843*11173SJonathan.Adams@Sun.COM cv_broadcast(&spa->spa_proc_cv); 844*11173SJonathan.Adams@Sun.COM while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { 845*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc != &p0); 846*11173SJonathan.Adams@Sun.COM cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 847*11173SJonathan.Adams@Sun.COM } 848*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_GONE); 849*11173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_NONE; 850*11173SJonathan.Adams@Sun.COM } 851*11173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc == &p0); 852*11173SJonathan.Adams@Sun.COM mutex_exit(&spa->spa_proc_lock); 853*11173SJonathan.Adams@Sun.COM 854*11173SJonathan.Adams@Sun.COM /* 855*11173SJonathan.Adams@Sun.COM * We want to make sure spa_thread() has actually exited the ZFS 856*11173SJonathan.Adams@Sun.COM * module, so that the module can't be unloaded out from underneath 857*11173SJonathan.Adams@Sun.COM * it. 858*11173SJonathan.Adams@Sun.COM */ 859*11173SJonathan.Adams@Sun.COM if (spa->spa_did != 0) { 860*11173SJonathan.Adams@Sun.COM thread_join(spa->spa_did); 861*11173SJonathan.Adams@Sun.COM spa->spa_did = 0; 862*11173SJonathan.Adams@Sun.COM } 863789Sahrens } 864789Sahrens 865789Sahrens /* 866789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 867789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 868789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 869789Sahrens * All vdev validation is done by the vdev_alloc() routine. 870789Sahrens */ 8712082Seschrock static int 8722082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 8732082Seschrock uint_t id, int atype) 874789Sahrens { 875789Sahrens nvlist_t **child; 8769816SGeorge.Wilson@Sun.COM uint_t children; 8772082Seschrock int error; 8782082Seschrock 8792082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 8802082Seschrock return (error); 8812082Seschrock 8822082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 8832082Seschrock return (0); 884789Sahrens 8857754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 8867754SJeff.Bonwick@Sun.COM &child, &children); 8877754SJeff.Bonwick@Sun.COM 8887754SJeff.Bonwick@Sun.COM if (error == ENOENT) 8897754SJeff.Bonwick@Sun.COM return (0); 8907754SJeff.Bonwick@Sun.COM 8917754SJeff.Bonwick@Sun.COM if (error) { 8922082Seschrock vdev_free(*vdp); 8932082Seschrock *vdp = NULL; 8942082Seschrock return (EINVAL); 895789Sahrens } 896789Sahrens 8979816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 8982082Seschrock vdev_t *vd; 8992082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 9002082Seschrock atype)) != 0) { 9012082Seschrock vdev_free(*vdp); 9022082Seschrock *vdp = NULL; 9032082Seschrock return (error); 904789Sahrens } 905789Sahrens } 906789Sahrens 9072082Seschrock ASSERT(*vdp != NULL); 9082082Seschrock 9092082Seschrock return (0); 910789Sahrens } 911789Sahrens 912789Sahrens /* 913789Sahrens * Opposite of spa_load(). 914789Sahrens */ 915789Sahrens static void 916789Sahrens spa_unload(spa_t *spa) 917789Sahrens { 9182082Seschrock int i; 9192082Seschrock 9207754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 9217754SJeff.Bonwick@Sun.COM 922789Sahrens /* 9231544Seschrock * Stop async tasks. 9241544Seschrock */ 9251544Seschrock spa_async_suspend(spa); 9261544Seschrock 9271544Seschrock /* 928789Sahrens * Stop syncing. 929789Sahrens */ 930789Sahrens if (spa->spa_sync_on) { 931789Sahrens txg_sync_stop(spa->spa_dsl_pool); 932789Sahrens spa->spa_sync_on = B_FALSE; 933789Sahrens } 934789Sahrens 935789Sahrens /* 9367754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 937789Sahrens */ 9389234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 9399234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 9409234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 9419234SGeorge.Wilson@Sun.COM } 942789Sahrens 943789Sahrens /* 944789Sahrens * Close the dsl pool. 945789Sahrens */ 946789Sahrens if (spa->spa_dsl_pool) { 947789Sahrens dsl_pool_close(spa->spa_dsl_pool); 948789Sahrens spa->spa_dsl_pool = NULL; 949789Sahrens } 950789Sahrens 95110922SJeff.Bonwick@Sun.COM ddt_unload(spa); 95210922SJeff.Bonwick@Sun.COM 9538241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 9548241SJeff.Bonwick@Sun.COM 9558241SJeff.Bonwick@Sun.COM /* 9568241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 9578241SJeff.Bonwick@Sun.COM */ 9588241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 9598241SJeff.Bonwick@Sun.COM 960789Sahrens /* 961789Sahrens * Close all vdevs. 962789Sahrens */ 9631585Sbonwick if (spa->spa_root_vdev) 964789Sahrens vdev_free(spa->spa_root_vdev); 9651585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 9661544Seschrock 9675450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9685450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 9695450Sbrendan if (spa->spa_spares.sav_vdevs) { 9705450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 9715450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 9725450Sbrendan spa->spa_spares.sav_vdevs = NULL; 9735450Sbrendan } 9745450Sbrendan if (spa->spa_spares.sav_config) { 9755450Sbrendan nvlist_free(spa->spa_spares.sav_config); 9765450Sbrendan spa->spa_spares.sav_config = NULL; 9772082Seschrock } 9787377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 9795450Sbrendan 9805450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 9815450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 9825450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 9835450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 9845450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 9855450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 9865450Sbrendan } 9875450Sbrendan if (spa->spa_l2cache.sav_config) { 9885450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 9895450Sbrendan spa->spa_l2cache.sav_config = NULL; 9902082Seschrock } 9917377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 9922082Seschrock 9931544Seschrock spa->spa_async_suspended = 0; 9948241SJeff.Bonwick@Sun.COM 9958241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 996789Sahrens } 997789Sahrens 998789Sahrens /* 9992082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 10002082Seschrock * this pool. When this is called, we have some form of basic information in 10015450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 10025450Sbrendan * then re-generate a more complete list including status information. 10032082Seschrock */ 10042082Seschrock static void 10052082Seschrock spa_load_spares(spa_t *spa) 10062082Seschrock { 10072082Seschrock nvlist_t **spares; 10082082Seschrock uint_t nspares; 10092082Seschrock int i; 10103377Seschrock vdev_t *vd, *tvd; 10112082Seschrock 10127754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 10137754SJeff.Bonwick@Sun.COM 10142082Seschrock /* 10152082Seschrock * First, close and free any existing spare vdevs. 10162082Seschrock */ 10175450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 10185450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 10193377Seschrock 10203377Seschrock /* Undo the call to spa_activate() below */ 10216643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 10226643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 10233377Seschrock spa_spare_remove(tvd); 10243377Seschrock vdev_close(vd); 10253377Seschrock vdev_free(vd); 10262082Seschrock } 10273377Seschrock 10285450Sbrendan if (spa->spa_spares.sav_vdevs) 10295450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 10305450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 10315450Sbrendan 10325450Sbrendan if (spa->spa_spares.sav_config == NULL) 10332082Seschrock nspares = 0; 10342082Seschrock else 10355450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 10362082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 10372082Seschrock 10385450Sbrendan spa->spa_spares.sav_count = (int)nspares; 10395450Sbrendan spa->spa_spares.sav_vdevs = NULL; 10402082Seschrock 10412082Seschrock if (nspares == 0) 10422082Seschrock return; 10432082Seschrock 10442082Seschrock /* 10452082Seschrock * Construct the array of vdevs, opening them to get status in the 10463377Seschrock * process. For each spare, there is potentially two different vdev_t 10473377Seschrock * structures associated with it: one in the list of spares (used only 10483377Seschrock * for basic validation purposes) and one in the active vdev 10493377Seschrock * configuration (if it's spared in). During this phase we open and 10503377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 10513377Seschrock * active configuration, then we also mark this vdev as an active spare. 10522082Seschrock */ 10535450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 10545450Sbrendan KM_SLEEP); 10555450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 10562082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 10572082Seschrock VDEV_ALLOC_SPARE) == 0); 10582082Seschrock ASSERT(vd != NULL); 10592082Seschrock 10605450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 10612082Seschrock 10626643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 10636643Seschrock B_FALSE)) != NULL) { 10643377Seschrock if (!tvd->vdev_isspare) 10653377Seschrock spa_spare_add(tvd); 10663377Seschrock 10673377Seschrock /* 10683377Seschrock * We only mark the spare active if we were successfully 10693377Seschrock * able to load the vdev. Otherwise, importing a pool 10703377Seschrock * with a bad active spare would result in strange 10713377Seschrock * behavior, because multiple pool would think the spare 10723377Seschrock * is actively in use. 10733377Seschrock * 10743377Seschrock * There is a vulnerability here to an equally bizarre 10753377Seschrock * circumstance, where a dead active spare is later 10763377Seschrock * brought back to life (onlined or otherwise). Given 10773377Seschrock * the rarity of this scenario, and the extra complexity 10783377Seschrock * it adds, we ignore the possibility. 10793377Seschrock */ 10803377Seschrock if (!vdev_is_dead(tvd)) 10813377Seschrock spa_spare_activate(tvd); 10823377Seschrock } 10833377Seschrock 10847754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 10859425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 10867754SJeff.Bonwick@Sun.COM 10872082Seschrock if (vdev_open(vd) != 0) 10882082Seschrock continue; 10892082Seschrock 10905450Sbrendan if (vdev_validate_aux(vd) == 0) 10915450Sbrendan spa_spare_add(vd); 10922082Seschrock } 10932082Seschrock 10942082Seschrock /* 10952082Seschrock * Recompute the stashed list of spares, with status information 10962082Seschrock * this time. 10972082Seschrock */ 10985450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 10992082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 11002082Seschrock 11015450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 11025450Sbrendan KM_SLEEP); 11035450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 11045450Sbrendan spares[i] = vdev_config_generate(spa, 11055450Sbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 11065450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 11075450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 11085450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 11092082Seschrock nvlist_free(spares[i]); 11105450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 11115450Sbrendan } 11125450Sbrendan 11135450Sbrendan /* 11145450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 11155450Sbrendan * this pool. When this is called, we have some form of basic information in 11165450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 11175450Sbrendan * then re-generate a more complete list including status information. 11185450Sbrendan * Devices which are already active have their details maintained, and are 11195450Sbrendan * not re-opened. 11205450Sbrendan */ 11215450Sbrendan static void 11225450Sbrendan spa_load_l2cache(spa_t *spa) 11235450Sbrendan { 11245450Sbrendan nvlist_t **l2cache; 11255450Sbrendan uint_t nl2cache; 11265450Sbrendan int i, j, oldnvdevs; 11279816SGeorge.Wilson@Sun.COM uint64_t guid; 11285450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 11295450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 11305450Sbrendan 11317754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 11327754SJeff.Bonwick@Sun.COM 11335450Sbrendan if (sav->sav_config != NULL) { 11345450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 11355450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 11365450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 11375450Sbrendan } else { 11385450Sbrendan nl2cache = 0; 11395450Sbrendan } 11405450Sbrendan 11415450Sbrendan oldvdevs = sav->sav_vdevs; 11425450Sbrendan oldnvdevs = sav->sav_count; 11435450Sbrendan sav->sav_vdevs = NULL; 11445450Sbrendan sav->sav_count = 0; 11455450Sbrendan 11465450Sbrendan /* 11475450Sbrendan * Process new nvlist of vdevs. 11485450Sbrendan */ 11495450Sbrendan for (i = 0; i < nl2cache; i++) { 11505450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 11515450Sbrendan &guid) == 0); 11525450Sbrendan 11535450Sbrendan newvdevs[i] = NULL; 11545450Sbrendan for (j = 0; j < oldnvdevs; j++) { 11555450Sbrendan vd = oldvdevs[j]; 11565450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 11575450Sbrendan /* 11585450Sbrendan * Retain previous vdev for add/remove ops. 11595450Sbrendan */ 11605450Sbrendan newvdevs[i] = vd; 11615450Sbrendan oldvdevs[j] = NULL; 11625450Sbrendan break; 11635450Sbrendan } 11645450Sbrendan } 11655450Sbrendan 11665450Sbrendan if (newvdevs[i] == NULL) { 11675450Sbrendan /* 11685450Sbrendan * Create new vdev 11695450Sbrendan */ 11705450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 11715450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 11725450Sbrendan ASSERT(vd != NULL); 11735450Sbrendan newvdevs[i] = vd; 11745450Sbrendan 11755450Sbrendan /* 11765450Sbrendan * Commit this vdev as an l2cache device, 11775450Sbrendan * even if it fails to open. 11785450Sbrendan */ 11795450Sbrendan spa_l2cache_add(vd); 11805450Sbrendan 11816643Seschrock vd->vdev_top = vd; 11826643Seschrock vd->vdev_aux = sav; 11836643Seschrock 11846643Seschrock spa_l2cache_activate(vd); 11856643Seschrock 11865450Sbrendan if (vdev_open(vd) != 0) 11875450Sbrendan continue; 11885450Sbrendan 11895450Sbrendan (void) vdev_validate_aux(vd); 11905450Sbrendan 11919816SGeorge.Wilson@Sun.COM if (!vdev_is_dead(vd)) 11929816SGeorge.Wilson@Sun.COM l2arc_add_vdev(spa, vd); 11935450Sbrendan } 11945450Sbrendan } 11955450Sbrendan 11965450Sbrendan /* 11975450Sbrendan * Purge vdevs that were dropped 11985450Sbrendan */ 11995450Sbrendan for (i = 0; i < oldnvdevs; i++) { 12005450Sbrendan uint64_t pool; 12015450Sbrendan 12025450Sbrendan vd = oldvdevs[i]; 12035450Sbrendan if (vd != NULL) { 12048241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 12058241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 12065450Sbrendan l2arc_remove_vdev(vd); 12075450Sbrendan (void) vdev_close(vd); 12085450Sbrendan spa_l2cache_remove(vd); 12095450Sbrendan } 12105450Sbrendan } 12115450Sbrendan 12125450Sbrendan if (oldvdevs) 12135450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 12145450Sbrendan 12155450Sbrendan if (sav->sav_config == NULL) 12165450Sbrendan goto out; 12175450Sbrendan 12185450Sbrendan sav->sav_vdevs = newvdevs; 12195450Sbrendan sav->sav_count = (int)nl2cache; 12205450Sbrendan 12215450Sbrendan /* 12225450Sbrendan * Recompute the stashed list of l2cache devices, with status 12235450Sbrendan * information this time. 12245450Sbrendan */ 12255450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 12265450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 12275450Sbrendan 12285450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 12295450Sbrendan for (i = 0; i < sav->sav_count; i++) 12305450Sbrendan l2cache[i] = vdev_config_generate(spa, 12315450Sbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 12325450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 12335450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 12345450Sbrendan out: 12355450Sbrendan for (i = 0; i < sav->sav_count; i++) 12365450Sbrendan nvlist_free(l2cache[i]); 12375450Sbrendan if (sav->sav_count) 12385450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 12392082Seschrock } 12402082Seschrock 12412082Seschrock static int 12422082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 12432082Seschrock { 12442082Seschrock dmu_buf_t *db; 12452082Seschrock char *packed = NULL; 12462082Seschrock size_t nvsize = 0; 12472082Seschrock int error; 12482082Seschrock *value = NULL; 12492082Seschrock 12502082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 12512082Seschrock nvsize = *(uint64_t *)db->db_data; 12522082Seschrock dmu_buf_rele(db, FTAG); 12532082Seschrock 12542082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 12559512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 12569512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 12572082Seschrock if (error == 0) 12582082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 12592082Seschrock kmem_free(packed, nvsize); 12602082Seschrock 12612082Seschrock return (error); 12622082Seschrock } 12632082Seschrock 12642082Seschrock /* 12654451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 12664451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 12674451Seschrock */ 12684451Seschrock static void 12694451Seschrock spa_check_removed(vdev_t *vd) 12704451Seschrock { 12719816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 12724451Seschrock spa_check_removed(vd->vdev_child[c]); 12734451Seschrock 12744451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 12754451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 12764451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 12774451Seschrock } 12784451Seschrock } 12794451Seschrock 12804451Seschrock /* 12819701SGeorge.Wilson@Sun.COM * Load the slog device state from the config object since it's possible 12829701SGeorge.Wilson@Sun.COM * that the label does not contain the most up-to-date information. 12839701SGeorge.Wilson@Sun.COM */ 12849701SGeorge.Wilson@Sun.COM void 128510594SGeorge.Wilson@Sun.COM spa_load_log_state(spa_t *spa, nvlist_t *nv) 12869701SGeorge.Wilson@Sun.COM { 128710594SGeorge.Wilson@Sun.COM vdev_t *ovd, *rvd = spa->spa_root_vdev; 128810594SGeorge.Wilson@Sun.COM 128910594SGeorge.Wilson@Sun.COM /* 129010594SGeorge.Wilson@Sun.COM * Load the original root vdev tree from the passed config. 129110594SGeorge.Wilson@Sun.COM */ 129210594SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 129310594SGeorge.Wilson@Sun.COM VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 129410594SGeorge.Wilson@Sun.COM 129510594SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 129610594SGeorge.Wilson@Sun.COM vdev_t *cvd = rvd->vdev_child[c]; 129710594SGeorge.Wilson@Sun.COM if (cvd->vdev_islog) 129810594SGeorge.Wilson@Sun.COM vdev_load_log_state(cvd, ovd->vdev_child[c]); 12999701SGeorge.Wilson@Sun.COM } 130010594SGeorge.Wilson@Sun.COM vdev_free(ovd); 130110594SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 13029701SGeorge.Wilson@Sun.COM } 13039701SGeorge.Wilson@Sun.COM 13049701SGeorge.Wilson@Sun.COM /* 13057294Sperrin * Check for missing log devices 13067294Sperrin */ 13077294Sperrin int 13087294Sperrin spa_check_logs(spa_t *spa) 13097294Sperrin { 13107294Sperrin switch (spa->spa_log_state) { 13117294Sperrin case SPA_LOG_MISSING: 13127294Sperrin /* need to recheck in case slog has been restored */ 13137294Sperrin case SPA_LOG_UNKNOWN: 13147294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 13157294Sperrin DS_FIND_CHILDREN)) { 13167294Sperrin spa->spa_log_state = SPA_LOG_MISSING; 13177294Sperrin return (1); 13187294Sperrin } 13197294Sperrin break; 13207294Sperrin } 13217294Sperrin return (0); 13227294Sperrin } 13237294Sperrin 132410672SEric.Schrock@Sun.COM static void 132510672SEric.Schrock@Sun.COM spa_aux_check_removed(spa_aux_vdev_t *sav) 132610672SEric.Schrock@Sun.COM { 132710922SJeff.Bonwick@Sun.COM for (int i = 0; i < sav->sav_count; i++) 132810672SEric.Schrock@Sun.COM spa_check_removed(sav->sav_vdevs[i]); 132910672SEric.Schrock@Sun.COM } 133010672SEric.Schrock@Sun.COM 133110922SJeff.Bonwick@Sun.COM void 133210922SJeff.Bonwick@Sun.COM spa_claim_notify(zio_t *zio) 133310922SJeff.Bonwick@Sun.COM { 133410922SJeff.Bonwick@Sun.COM spa_t *spa = zio->io_spa; 133510922SJeff.Bonwick@Sun.COM 133610922SJeff.Bonwick@Sun.COM if (zio->io_error) 133710922SJeff.Bonwick@Sun.COM return; 133810922SJeff.Bonwick@Sun.COM 133910922SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 134010922SJeff.Bonwick@Sun.COM if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 134110922SJeff.Bonwick@Sun.COM spa->spa_claim_max_txg = zio->io_bp->blk_birth; 134210922SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 134310922SJeff.Bonwick@Sun.COM } 134410922SJeff.Bonwick@Sun.COM 134510921STim.Haley@Sun.COM typedef struct spa_load_error { 134610921STim.Haley@Sun.COM uint64_t sle_metadata_count; 134710921STim.Haley@Sun.COM uint64_t sle_data_count; 134810921STim.Haley@Sun.COM } spa_load_error_t; 134910921STim.Haley@Sun.COM 135010921STim.Haley@Sun.COM static void 135110921STim.Haley@Sun.COM spa_load_verify_done(zio_t *zio) 135210921STim.Haley@Sun.COM { 135310921STim.Haley@Sun.COM blkptr_t *bp = zio->io_bp; 135410921STim.Haley@Sun.COM spa_load_error_t *sle = zio->io_private; 135510921STim.Haley@Sun.COM dmu_object_type_t type = BP_GET_TYPE(bp); 135610921STim.Haley@Sun.COM int error = zio->io_error; 135710921STim.Haley@Sun.COM 135810921STim.Haley@Sun.COM if (error) { 135910921STim.Haley@Sun.COM if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && 136010921STim.Haley@Sun.COM type != DMU_OT_INTENT_LOG) 136110921STim.Haley@Sun.COM atomic_add_64(&sle->sle_metadata_count, 1); 136210921STim.Haley@Sun.COM else 136310921STim.Haley@Sun.COM atomic_add_64(&sle->sle_data_count, 1); 136410921STim.Haley@Sun.COM } 136510921STim.Haley@Sun.COM zio_data_buf_free(zio->io_data, zio->io_size); 136610921STim.Haley@Sun.COM } 136710921STim.Haley@Sun.COM 136810921STim.Haley@Sun.COM /*ARGSUSED*/ 136910921STim.Haley@Sun.COM static int 137010922SJeff.Bonwick@Sun.COM spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 137110922SJeff.Bonwick@Sun.COM const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 137210921STim.Haley@Sun.COM { 137310921STim.Haley@Sun.COM if (bp != NULL) { 137410921STim.Haley@Sun.COM zio_t *rio = arg; 137510921STim.Haley@Sun.COM size_t size = BP_GET_PSIZE(bp); 137610921STim.Haley@Sun.COM void *data = zio_data_buf_alloc(size); 137710921STim.Haley@Sun.COM 137810921STim.Haley@Sun.COM zio_nowait(zio_read(rio, spa, bp, data, size, 137910921STim.Haley@Sun.COM spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 138010921STim.Haley@Sun.COM ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 138110921STim.Haley@Sun.COM ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 138210921STim.Haley@Sun.COM } 138310921STim.Haley@Sun.COM return (0); 138410921STim.Haley@Sun.COM } 138510921STim.Haley@Sun.COM 138610921STim.Haley@Sun.COM static int 138710921STim.Haley@Sun.COM spa_load_verify(spa_t *spa) 138810921STim.Haley@Sun.COM { 138910921STim.Haley@Sun.COM zio_t *rio; 139010921STim.Haley@Sun.COM spa_load_error_t sle = { 0 }; 139110921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 139210921STim.Haley@Sun.COM boolean_t verify_ok = B_FALSE; 139310921STim.Haley@Sun.COM int error; 139410921STim.Haley@Sun.COM 139510921STim.Haley@Sun.COM rio = zio_root(spa, NULL, &sle, 139610921STim.Haley@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 139710921STim.Haley@Sun.COM 139811125SJeff.Bonwick@Sun.COM error = traverse_pool(spa, spa->spa_verify_min_txg, 139911125SJeff.Bonwick@Sun.COM TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); 140010921STim.Haley@Sun.COM 140110921STim.Haley@Sun.COM (void) zio_wait(rio); 140210921STim.Haley@Sun.COM 140310921STim.Haley@Sun.COM zpool_get_rewind_policy(spa->spa_config, &policy); 140410921STim.Haley@Sun.COM 140510921STim.Haley@Sun.COM spa->spa_load_meta_errors = sle.sle_metadata_count; 140610921STim.Haley@Sun.COM spa->spa_load_data_errors = sle.sle_data_count; 140710921STim.Haley@Sun.COM 140810921STim.Haley@Sun.COM if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta && 140910921STim.Haley@Sun.COM sle.sle_data_count <= policy.zrp_maxdata) { 141010921STim.Haley@Sun.COM verify_ok = B_TRUE; 141110921STim.Haley@Sun.COM spa->spa_load_txg = spa->spa_uberblock.ub_txg; 141210921STim.Haley@Sun.COM spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 141311026STim.Haley@Sun.COM } else { 141411026STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 141510921STim.Haley@Sun.COM } 141610921STim.Haley@Sun.COM 141710921STim.Haley@Sun.COM if (error) { 141810921STim.Haley@Sun.COM if (error != ENXIO && error != EIO) 141910921STim.Haley@Sun.COM error = EIO; 142010921STim.Haley@Sun.COM return (error); 142110921STim.Haley@Sun.COM } 142210921STim.Haley@Sun.COM 142310921STim.Haley@Sun.COM return (verify_ok ? 0 : EIO); 142410921STim.Haley@Sun.COM } 142510921STim.Haley@Sun.COM 14267294Sperrin /* 1427789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 14281544Seschrock * source of configuration information. 1429789Sahrens */ 1430789Sahrens static int 143110921STim.Haley@Sun.COM spa_load(spa_t *spa, spa_load_state_t state, int mosconfig) 1432789Sahrens { 1433789Sahrens int error = 0; 143410594SGeorge.Wilson@Sun.COM nvlist_t *nvconfig, *nvroot = NULL; 1435789Sahrens vdev_t *rvd; 1436789Sahrens uberblock_t *ub = &spa->spa_uberblock; 14371635Sbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1438789Sahrens uint64_t pool_guid; 14392082Seschrock uint64_t version; 14404451Seschrock uint64_t autoreplace = 0; 14418241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 14427294Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 144310921STim.Haley@Sun.COM nvlist_t *config = spa->spa_config; 1444789Sahrens 14458241SJeff.Bonwick@Sun.COM /* 14468241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 14478241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 14488241SJeff.Bonwick@Sun.COM */ 14498241SJeff.Bonwick@Sun.COM if (!mosconfig) 14508241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 14518241SJeff.Bonwick@Sun.COM 14527754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 14537754SJeff.Bonwick@Sun.COM 14541544Seschrock spa->spa_load_state = state; 14551635Sbonwick 1456789Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 14571733Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 14581544Seschrock error = EINVAL; 14591544Seschrock goto out; 14601544Seschrock } 1461789Sahrens 14622082Seschrock /* 14632082Seschrock * Versioning wasn't explicitly added to the label until later, so if 14642082Seschrock * it's not present treat it as the initial version. 14652082Seschrock */ 14662082Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 14674577Sahrens version = SPA_VERSION_INITIAL; 14682082Seschrock 14691733Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 14701733Sbonwick &spa->spa_config_txg); 14711733Sbonwick 14721635Sbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 14731544Seschrock spa_guid_exists(pool_guid, 0)) { 14741544Seschrock error = EEXIST; 14751544Seschrock goto out; 14761544Seschrock } 1477789Sahrens 14782174Seschrock spa->spa_load_guid = pool_guid; 14792174Seschrock 1480789Sahrens /* 14819234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 14829234SGeorge.Wilson@Sun.COM */ 14839630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 14849630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 14859234SGeorge.Wilson@Sun.COM 14869234SGeorge.Wilson@Sun.COM /* 14872082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 14882082Seschrock * value that will be returned by spa_version() since parsing the 14892082Seschrock * configuration requires knowing the version number. 1490789Sahrens */ 14917754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 14922082Seschrock spa->spa_ubsync.ub_version = version; 14932082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 14947754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1495789Sahrens 14962082Seschrock if (error != 0) 14971544Seschrock goto out; 1498789Sahrens 14991585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1500789Sahrens ASSERT(spa_guid(spa) == pool_guid); 1501789Sahrens 1502789Sahrens /* 1503789Sahrens * Try to open all vdevs, loading each label in the process. 1504789Sahrens */ 15057754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 15064070Smc142369 error = vdev_open(rvd); 15077754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 15084070Smc142369 if (error != 0) 15091544Seschrock goto out; 1510789Sahrens 1511789Sahrens /* 15129276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 15139276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 15149276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 15159276SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 15169276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 15179276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 15189276SMark.Musante@Sun.COM * the vdev config. 15191986Seschrock */ 15209276SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 15219276SMark.Musante@Sun.COM error = vdev_validate(rvd); 15229276SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 15239276SMark.Musante@Sun.COM if (error != 0) 15249276SMark.Musante@Sun.COM goto out; 15251986Seschrock 15261986Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 15271986Seschrock error = ENXIO; 15281986Seschrock goto out; 15291986Seschrock } 15301986Seschrock 15311986Seschrock /* 1532789Sahrens * Find the best uberblock. 1533789Sahrens */ 15347754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1535789Sahrens 1536789Sahrens /* 1537789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1538789Sahrens */ 1539789Sahrens if (ub->ub_txg == 0) { 15401760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15411760Seschrock VDEV_AUX_CORRUPT_DATA); 15421544Seschrock error = ENXIO; 15431544Seschrock goto out; 15441544Seschrock } 15451544Seschrock 15461544Seschrock /* 15471544Seschrock * If the pool is newer than the code, we can't open it. 15481544Seschrock */ 15494577Sahrens if (ub->ub_version > SPA_VERSION) { 15501760Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15511760Seschrock VDEV_AUX_VERSION_NEWER); 15521544Seschrock error = ENOTSUP; 15531544Seschrock goto out; 1554789Sahrens } 1555789Sahrens 1556789Sahrens /* 1557789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1558789Sahrens * incomplete configuration. 1559789Sahrens */ 15601732Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 15611544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15621544Seschrock VDEV_AUX_BAD_GUID_SUM); 15631544Seschrock error = ENXIO; 15641544Seschrock goto out; 1565789Sahrens } 1566789Sahrens 1567789Sahrens /* 1568789Sahrens * Initialize internal SPA structures. 1569789Sahrens */ 1570789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1571789Sahrens spa->spa_ubsync = spa->spa_uberblock; 157210921STim.Haley@Sun.COM spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 157310921STim.Haley@Sun.COM TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE; 157410921STim.Haley@Sun.COM spa->spa_first_txg = spa->spa_last_ubsync_txg ? 157510921STim.Haley@Sun.COM spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 157610922SJeff.Bonwick@Sun.COM spa->spa_claim_max_txg = spa->spa_first_txg; 157710922SJeff.Bonwick@Sun.COM 15781544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 15791544Seschrock if (error) { 15801544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15811544Seschrock VDEV_AUX_CORRUPT_DATA); 158210921STim.Haley@Sun.COM error = EIO; 15831544Seschrock goto out; 15841544Seschrock } 1585789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1586789Sahrens 15871544Seschrock if (zap_lookup(spa->spa_meta_objset, 1588789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 15891544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 15901544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 15911544Seschrock VDEV_AUX_CORRUPT_DATA); 15921544Seschrock error = EIO; 15931544Seschrock goto out; 15941544Seschrock } 1595789Sahrens 159610594SGeorge.Wilson@Sun.COM if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 159710594SGeorge.Wilson@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 159810594SGeorge.Wilson@Sun.COM VDEV_AUX_CORRUPT_DATA); 159910594SGeorge.Wilson@Sun.COM error = EIO; 160010594SGeorge.Wilson@Sun.COM goto out; 160110594SGeorge.Wilson@Sun.COM } 160210594SGeorge.Wilson@Sun.COM 1603789Sahrens if (!mosconfig) { 16043975Sek110237 uint64_t hostid; 16052082Seschrock 160610594SGeorge.Wilson@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 16077706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 16083975Sek110237 char *hostname; 16093975Sek110237 unsigned long myhostid = 0; 16103975Sek110237 161110594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_string(nvconfig, 16123975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 16133975Sek110237 16148662SJordan.Vaughan@Sun.com #ifdef _KERNEL 16158662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 16168662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 16178662SJordan.Vaughan@Sun.com /* 16188662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 16198662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 16208662SJordan.Vaughan@Sun.com */ 16213975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 16228662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 16234178Slling if (hostid != 0 && myhostid != 0 && 16248662SJordan.Vaughan@Sun.com hostid != myhostid) { 16253975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 16263975Sek110237 "loaded as it was last accessed by " 16277706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 16283975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 16297754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 16303975Sek110237 (unsigned long)hostid); 16313975Sek110237 error = EBADF; 16323975Sek110237 goto out; 16333975Sek110237 } 16343975Sek110237 } 16353975Sek110237 163610594SGeorge.Wilson@Sun.COM spa_config_set(spa, nvconfig); 1637789Sahrens spa_unload(spa); 1638789Sahrens spa_deactivate(spa); 16398241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1640789Sahrens 164110921STim.Haley@Sun.COM return (spa_load(spa, state, B_TRUE)); 16421544Seschrock } 16431544Seschrock 16441544Seschrock if (zap_lookup(spa->spa_meta_objset, 16451544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 164610922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) { 16471544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 16481544Seschrock VDEV_AUX_CORRUPT_DATA); 16491544Seschrock error = EIO; 16501544Seschrock goto out; 1651789Sahrens } 1652789Sahrens 16531544Seschrock /* 16542082Seschrock * Load the bit that tells us to use the new accounting function 16552082Seschrock * (raid-z deflation). If we have an older pool, this will not 16562082Seschrock * be present. 16572082Seschrock */ 16582082Seschrock error = zap_lookup(spa->spa_meta_objset, 16592082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 16602082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate); 16612082Seschrock if (error != 0 && error != ENOENT) { 16622082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 16632082Seschrock VDEV_AUX_CORRUPT_DATA); 16642082Seschrock error = EIO; 16652082Seschrock goto out; 16662082Seschrock } 16672082Seschrock 16682082Seschrock /* 16691544Seschrock * Load the persistent error log. If we have an older pool, this will 16701544Seschrock * not be present. 16711544Seschrock */ 16721544Seschrock error = zap_lookup(spa->spa_meta_objset, 16731544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 16741544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 16751807Sbonwick if (error != 0 && error != ENOENT) { 16761544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 16771544Seschrock VDEV_AUX_CORRUPT_DATA); 16781544Seschrock error = EIO; 16791544Seschrock goto out; 16801544Seschrock } 16811544Seschrock 16821544Seschrock error = zap_lookup(spa->spa_meta_objset, 16831544Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 16841544Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 16851544Seschrock if (error != 0 && error != ENOENT) { 16861544Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 16871544Seschrock VDEV_AUX_CORRUPT_DATA); 16881544Seschrock error = EIO; 16891544Seschrock goto out; 16901544Seschrock } 1691789Sahrens 1692789Sahrens /* 16932926Sek110237 * Load the history object. If we have an older pool, this 16942926Sek110237 * will not be present. 16952926Sek110237 */ 16962926Sek110237 error = zap_lookup(spa->spa_meta_objset, 16972926Sek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 16982926Sek110237 sizeof (uint64_t), 1, &spa->spa_history); 16992926Sek110237 if (error != 0 && error != ENOENT) { 17002926Sek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 17012926Sek110237 VDEV_AUX_CORRUPT_DATA); 17022926Sek110237 error = EIO; 17032926Sek110237 goto out; 17042926Sek110237 } 17052926Sek110237 17062926Sek110237 /* 17072082Seschrock * Load any hot spares for this pool. 17082082Seschrock */ 17092082Seschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 17105450Sbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 17112082Seschrock if (error != 0 && error != ENOENT) { 17122082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 17132082Seschrock VDEV_AUX_CORRUPT_DATA); 17142082Seschrock error = EIO; 17152082Seschrock goto out; 17162082Seschrock } 17172082Seschrock if (error == 0) { 17184577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 17195450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 17205450Sbrendan &spa->spa_spares.sav_config) != 0) { 17212082Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 17222082Seschrock VDEV_AUX_CORRUPT_DATA); 17232082Seschrock error = EIO; 17242082Seschrock goto out; 17252082Seschrock } 17262082Seschrock 17277754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 17282082Seschrock spa_load_spares(spa); 17297754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 17302082Seschrock } 17312082Seschrock 17325450Sbrendan /* 17335450Sbrendan * Load any level 2 ARC devices for this pool. 17345450Sbrendan */ 17355450Sbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 17365450Sbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 17375450Sbrendan &spa->spa_l2cache.sav_object); 17385450Sbrendan if (error != 0 && error != ENOENT) { 17395450Sbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 17405450Sbrendan VDEV_AUX_CORRUPT_DATA); 17415450Sbrendan error = EIO; 17425450Sbrendan goto out; 17435450Sbrendan } 17445450Sbrendan if (error == 0) { 17455450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 17465450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 17475450Sbrendan &spa->spa_l2cache.sav_config) != 0) { 17485450Sbrendan vdev_set_state(rvd, B_TRUE, 17495450Sbrendan VDEV_STATE_CANT_OPEN, 17505450Sbrendan VDEV_AUX_CORRUPT_DATA); 17515450Sbrendan error = EIO; 17525450Sbrendan goto out; 17535450Sbrendan } 17545450Sbrendan 17557754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 17565450Sbrendan spa_load_l2cache(spa); 17577754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 17585450Sbrendan } 17595450Sbrendan 17605094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 17614543Smarks 17623912Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 17633912Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 17643912Slling 17653912Slling if (error && error != ENOENT) { 17663912Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 17673912Slling VDEV_AUX_CORRUPT_DATA); 17683912Slling error = EIO; 17693912Slling goto out; 17703912Slling } 17713912Slling 17723912Slling if (error == 0) { 17733912Slling (void) zap_lookup(spa->spa_meta_objset, 17743912Slling spa->spa_pool_props_object, 17754451Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 17763912Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 17774451Seschrock (void) zap_lookup(spa->spa_meta_objset, 17784451Seschrock spa->spa_pool_props_object, 17794451Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 17804451Seschrock sizeof (uint64_t), 1, &autoreplace); 178110672SEric.Schrock@Sun.COM spa->spa_autoreplace = (autoreplace != 0); 17824543Smarks (void) zap_lookup(spa->spa_meta_objset, 17834543Smarks spa->spa_pool_props_object, 17844543Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 17854543Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 17865329Sgw25295 (void) zap_lookup(spa->spa_meta_objset, 17875329Sgw25295 spa->spa_pool_props_object, 17885329Sgw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 17895329Sgw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 17909816SGeorge.Wilson@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 17919816SGeorge.Wilson@Sun.COM spa->spa_pool_props_object, 17929816SGeorge.Wilson@Sun.COM zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 17939816SGeorge.Wilson@Sun.COM sizeof (uint64_t), 1, &spa->spa_autoexpand); 179410922SJeff.Bonwick@Sun.COM (void) zap_lookup(spa->spa_meta_objset, 179510922SJeff.Bonwick@Sun.COM spa->spa_pool_props_object, 179610922SJeff.Bonwick@Sun.COM zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO), 179710922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &spa->spa_dedup_ditto); 17983912Slling } 17993912Slling 18002082Seschrock /* 18014451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 18024451Seschrock * the ZFS DE that it should not issue any faults for unopenable 18034451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 18044451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 18054451Seschrock * over. 18064451Seschrock */ 180710672SEric.Schrock@Sun.COM if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 18084451Seschrock spa_check_removed(spa->spa_root_vdev); 180910672SEric.Schrock@Sun.COM /* 181010672SEric.Schrock@Sun.COM * For the import case, this is done in spa_import(), because 181110672SEric.Schrock@Sun.COM * at this point we're using the spare definitions from 181210672SEric.Schrock@Sun.COM * the MOS config, not necessarily from the userland config. 181310672SEric.Schrock@Sun.COM */ 181410672SEric.Schrock@Sun.COM if (state != SPA_LOAD_IMPORT) { 181510672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 181610672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 181710672SEric.Schrock@Sun.COM } 181810672SEric.Schrock@Sun.COM } 18194451Seschrock 18204451Seschrock /* 18211986Seschrock * Load the vdev state for all toplevel vdevs. 1822789Sahrens */ 18231986Seschrock vdev_load(rvd); 1824789Sahrens 1825789Sahrens /* 1826789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1827789Sahrens */ 18287754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1829789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 18307754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1831789Sahrens 1832789Sahrens /* 1833789Sahrens * Check the state of the root vdev. If it can't be opened, it 1834789Sahrens * indicates one or more toplevel vdevs are faulted. 1835789Sahrens */ 18361544Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 18371544Seschrock error = ENXIO; 18381544Seschrock goto out; 18391544Seschrock } 1840789Sahrens 184110922SJeff.Bonwick@Sun.COM /* 184210922SJeff.Bonwick@Sun.COM * Load the DDTs (dedup tables). 184310922SJeff.Bonwick@Sun.COM */ 184410922SJeff.Bonwick@Sun.COM error = ddt_load(spa); 184510922SJeff.Bonwick@Sun.COM if (error != 0) { 184610922SJeff.Bonwick@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 184710922SJeff.Bonwick@Sun.COM VDEV_AUX_CORRUPT_DATA); 184810922SJeff.Bonwick@Sun.COM error = EIO; 184910922SJeff.Bonwick@Sun.COM goto out; 185010922SJeff.Bonwick@Sun.COM } 185110922SJeff.Bonwick@Sun.COM 185210956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 185310956SGeorge.Wilson@Sun.COM 185410921STim.Haley@Sun.COM if (state != SPA_LOAD_TRYIMPORT) { 185510921STim.Haley@Sun.COM error = spa_load_verify(spa); 185610921STim.Haley@Sun.COM if (error) { 185710921STim.Haley@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 185810921STim.Haley@Sun.COM VDEV_AUX_CORRUPT_DATA); 185910921STim.Haley@Sun.COM goto out; 186010921STim.Haley@Sun.COM } 186110921STim.Haley@Sun.COM } 186210921STim.Haley@Sun.COM 186310922SJeff.Bonwick@Sun.COM /* 186410922SJeff.Bonwick@Sun.COM * Load the intent log state and check log integrity. 186510922SJeff.Bonwick@Sun.COM */ 186610922SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 186710922SJeff.Bonwick@Sun.COM &nvroot) == 0); 186810922SJeff.Bonwick@Sun.COM spa_load_log_state(spa, nvroot); 186910922SJeff.Bonwick@Sun.COM nvlist_free(nvconfig); 187010922SJeff.Bonwick@Sun.COM 187110922SJeff.Bonwick@Sun.COM if (spa_check_logs(spa)) { 187210922SJeff.Bonwick@Sun.COM vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 187310922SJeff.Bonwick@Sun.COM VDEV_AUX_BAD_LOG); 187410922SJeff.Bonwick@Sun.COM error = ENXIO; 187510922SJeff.Bonwick@Sun.COM ereport = FM_EREPORT_ZFS_LOG_REPLAY; 187610922SJeff.Bonwick@Sun.COM goto out; 187710922SJeff.Bonwick@Sun.COM } 187810922SJeff.Bonwick@Sun.COM 187910921STim.Haley@Sun.COM if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 188010921STim.Haley@Sun.COM spa->spa_load_max_txg == UINT64_MAX)) { 18811635Sbonwick dmu_tx_t *tx; 18821635Sbonwick int need_update = B_FALSE; 18838241SJeff.Bonwick@Sun.COM 18848241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 18851601Sbonwick 18861635Sbonwick /* 18871635Sbonwick * Claim log blocks that haven't been committed yet. 18881635Sbonwick * This must all happen in a single txg. 188910922SJeff.Bonwick@Sun.COM * Note: spa_claim_max_txg is updated by spa_claim_notify(), 189010922SJeff.Bonwick@Sun.COM * invoked from zil_claim_log_block()'s i/o done callback. 189110921STim.Haley@Sun.COM * Price of rollback is that we abandon the log. 18921635Sbonwick */ 189310922SJeff.Bonwick@Sun.COM spa->spa_claiming = B_TRUE; 189410922SJeff.Bonwick@Sun.COM 18951601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1896789Sahrens spa_first_txg(spa)); 18977754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 18982417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1899789Sahrens dmu_tx_commit(tx); 1900789Sahrens 190110922SJeff.Bonwick@Sun.COM spa->spa_claiming = B_FALSE; 190210922SJeff.Bonwick@Sun.COM 19039701SGeorge.Wilson@Sun.COM spa->spa_log_state = SPA_LOG_GOOD; 1904789Sahrens spa->spa_sync_on = B_TRUE; 1905789Sahrens txg_sync_start(spa->spa_dsl_pool); 1906789Sahrens 1907789Sahrens /* 190810922SJeff.Bonwick@Sun.COM * Wait for all claims to sync. We sync up to the highest 190910922SJeff.Bonwick@Sun.COM * claimed log block birth time so that claimed log blocks 191010922SJeff.Bonwick@Sun.COM * don't appear to be from the future. spa_claim_max_txg 191110922SJeff.Bonwick@Sun.COM * will have been set for us by either zil_check_log_chain() 191210922SJeff.Bonwick@Sun.COM * (invoked from spa_check_logs()) or zil_claim() above. 1913789Sahrens */ 191410922SJeff.Bonwick@Sun.COM txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 19151585Sbonwick 19161585Sbonwick /* 19171635Sbonwick * If the config cache is stale, or we have uninitialized 19181635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 191910100SLin.Ling@Sun.COM * 192010100SLin.Ling@Sun.COM * If spa_load_verbatim is true, trust the current 192110100SLin.Ling@Sun.COM * in-core spa_config and update the disk labels. 19221585Sbonwick */ 19231635Sbonwick if (config_cache_txg != spa->spa_config_txg || 192410921STim.Haley@Sun.COM state == SPA_LOAD_IMPORT || spa->spa_load_verbatim || 192510921STim.Haley@Sun.COM state == SPA_LOAD_RECOVER) 19261635Sbonwick need_update = B_TRUE; 19271635Sbonwick 19288241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 19291635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 19301635Sbonwick need_update = B_TRUE; 19311585Sbonwick 19321585Sbonwick /* 19331635Sbonwick * Update the config cache asychronously in case we're the 19341635Sbonwick * root pool, in which case the config cache isn't writable yet. 19351585Sbonwick */ 19361635Sbonwick if (need_update) 19371635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 19388241SJeff.Bonwick@Sun.COM 19398241SJeff.Bonwick@Sun.COM /* 19408241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 19418241SJeff.Bonwick@Sun.COM */ 19428241SJeff.Bonwick@Sun.COM if (vdev_resilver_needed(rvd, NULL, NULL)) 19438241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 194410298SMatthew.Ahrens@Sun.COM 194510298SMatthew.Ahrens@Sun.COM /* 194610298SMatthew.Ahrens@Sun.COM * Delete any inconsistent datasets. 194710298SMatthew.Ahrens@Sun.COM */ 194810298SMatthew.Ahrens@Sun.COM (void) dmu_objset_find(spa_name(spa), 194910298SMatthew.Ahrens@Sun.COM dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 195010342Schris.kirby@sun.com 195110342Schris.kirby@sun.com /* 195210342Schris.kirby@sun.com * Clean up any stale temporary dataset userrefs. 195310342Schris.kirby@sun.com */ 195410342Schris.kirby@sun.com dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1955789Sahrens } 1956789Sahrens 19571544Seschrock error = 0; 19581544Seschrock out: 195910921STim.Haley@Sun.COM 19607046Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 19612082Seschrock if (error && error != EBADF) 19627294Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 196311149SGeorge.Wilson@Sun.COM 196411149SGeorge.Wilson@Sun.COM spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; 19651544Seschrock spa->spa_ena = 0; 19661544Seschrock 19671544Seschrock return (error); 1968789Sahrens } 1969789Sahrens 197010921STim.Haley@Sun.COM static int 197110921STim.Haley@Sun.COM spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 197210921STim.Haley@Sun.COM { 197310921STim.Haley@Sun.COM spa_unload(spa); 197410921STim.Haley@Sun.COM spa_deactivate(spa); 197510921STim.Haley@Sun.COM 197610921STim.Haley@Sun.COM spa->spa_load_max_txg--; 197710921STim.Haley@Sun.COM 197810921STim.Haley@Sun.COM spa_activate(spa, spa_mode_global); 197910921STim.Haley@Sun.COM spa_async_suspend(spa); 198010921STim.Haley@Sun.COM 198110921STim.Haley@Sun.COM return (spa_load(spa, state, mosconfig)); 198210921STim.Haley@Sun.COM } 198310921STim.Haley@Sun.COM 198410921STim.Haley@Sun.COM static int 198510921STim.Haley@Sun.COM spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 198610921STim.Haley@Sun.COM uint64_t max_request, boolean_t extreme) 198710921STim.Haley@Sun.COM { 198810921STim.Haley@Sun.COM nvlist_t *config = NULL; 198910921STim.Haley@Sun.COM int load_error, rewind_error; 199010921STim.Haley@Sun.COM uint64_t safe_rollback_txg; 199110921STim.Haley@Sun.COM uint64_t min_txg; 199210921STim.Haley@Sun.COM 199311026STim.Haley@Sun.COM if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 199410921STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_load_txg; 199511026STim.Haley@Sun.COM spa->spa_log_state = SPA_LOG_CLEAR; 199611026STim.Haley@Sun.COM } else { 199710921STim.Haley@Sun.COM spa->spa_load_max_txg = max_request; 199811026STim.Haley@Sun.COM } 199910921STim.Haley@Sun.COM 200010921STim.Haley@Sun.COM load_error = rewind_error = spa_load(spa, state, mosconfig); 200110921STim.Haley@Sun.COM if (load_error == 0) 200210921STim.Haley@Sun.COM return (0); 200310921STim.Haley@Sun.COM 200410921STim.Haley@Sun.COM if (spa->spa_root_vdev != NULL) 200510921STim.Haley@Sun.COM config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 200610921STim.Haley@Sun.COM 200710921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 200810921STim.Haley@Sun.COM spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 200910921STim.Haley@Sun.COM 201010921STim.Haley@Sun.COM /* specific txg requested */ 201110921STim.Haley@Sun.COM if (spa->spa_load_max_txg != UINT64_MAX && !extreme) { 201210921STim.Haley@Sun.COM nvlist_free(config); 201310921STim.Haley@Sun.COM return (load_error); 201410921STim.Haley@Sun.COM } 201510921STim.Haley@Sun.COM 201610921STim.Haley@Sun.COM /* Price of rolling back is discarding txgs, including log */ 201710921STim.Haley@Sun.COM if (state == SPA_LOAD_RECOVER) 201810921STim.Haley@Sun.COM spa->spa_log_state = SPA_LOG_CLEAR; 201910921STim.Haley@Sun.COM 202010921STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 202110921STim.Haley@Sun.COM safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE; 202210921STim.Haley@Sun.COM 202310921STim.Haley@Sun.COM min_txg = extreme ? TXG_INITIAL : safe_rollback_txg; 202410921STim.Haley@Sun.COM while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) { 202510921STim.Haley@Sun.COM if (spa->spa_load_max_txg < safe_rollback_txg) 202610921STim.Haley@Sun.COM spa->spa_extreme_rewind = B_TRUE; 202710921STim.Haley@Sun.COM rewind_error = spa_load_retry(spa, state, mosconfig); 202810921STim.Haley@Sun.COM } 202910921STim.Haley@Sun.COM 203010921STim.Haley@Sun.COM if (config) 203110921STim.Haley@Sun.COM spa_rewind_data_to_nvlist(spa, config); 203210921STim.Haley@Sun.COM 203310921STim.Haley@Sun.COM spa->spa_extreme_rewind = B_FALSE; 203410921STim.Haley@Sun.COM spa->spa_load_max_txg = UINT64_MAX; 203510921STim.Haley@Sun.COM 203610921STim.Haley@Sun.COM if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 203710921STim.Haley@Sun.COM spa_config_set(spa, config); 203810921STim.Haley@Sun.COM 203910921STim.Haley@Sun.COM return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); 204010921STim.Haley@Sun.COM } 204110921STim.Haley@Sun.COM 2042789Sahrens /* 2043789Sahrens * Pool Open/Import 2044789Sahrens * 2045789Sahrens * The import case is identical to an open except that the configuration is sent 2046789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 2047789Sahrens * case of an open, the pool configuration will exist in the 20484451Seschrock * POOL_STATE_UNINITIALIZED state. 2049789Sahrens * 2050789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 2051789Sahrens * the same time open the pool, without having to keep around the spa_t in some 2052789Sahrens * ambiguous state. 2053789Sahrens */ 2054789Sahrens static int 205510921STim.Haley@Sun.COM spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 205610921STim.Haley@Sun.COM nvlist_t **config) 2057789Sahrens { 2058789Sahrens spa_t *spa; 205910921STim.Haley@Sun.COM boolean_t norewind; 206010921STim.Haley@Sun.COM boolean_t extreme; 206110921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 206210921STim.Haley@Sun.COM spa_load_state_t state = SPA_LOAD_OPEN; 2063789Sahrens int error; 2064789Sahrens int locked = B_FALSE; 2065789Sahrens 2066789Sahrens *spapp = NULL; 2067789Sahrens 206810921STim.Haley@Sun.COM zpool_get_rewind_policy(nvpolicy, &policy); 206910921STim.Haley@Sun.COM if (policy.zrp_request & ZPOOL_DO_REWIND) 207010921STim.Haley@Sun.COM state = SPA_LOAD_RECOVER; 207110921STim.Haley@Sun.COM norewind = (policy.zrp_request == ZPOOL_NO_REWIND); 207210921STim.Haley@Sun.COM extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0); 207310921STim.Haley@Sun.COM 2074789Sahrens /* 2075789Sahrens * As disgusting as this is, we need to support recursive calls to this 2076789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 2077789Sahrens * up calling spa_open() again. The real fix is to figure out how to 2078789Sahrens * avoid dsl_dir_open() calling this in the first place. 2079789Sahrens */ 2080789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 2081789Sahrens mutex_enter(&spa_namespace_lock); 2082789Sahrens locked = B_TRUE; 2083789Sahrens } 2084789Sahrens 2085789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2086789Sahrens if (locked) 2087789Sahrens mutex_exit(&spa_namespace_lock); 2088789Sahrens return (ENOENT); 2089789Sahrens } 209010921STim.Haley@Sun.COM 2091789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 2092789Sahrens 20938241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2094789Sahrens 209510921STim.Haley@Sun.COM if (spa->spa_last_open_failed && norewind) { 209610921STim.Haley@Sun.COM if (config != NULL && spa->spa_config) 209710921STim.Haley@Sun.COM VERIFY(nvlist_dup(spa->spa_config, 209810921STim.Haley@Sun.COM config, KM_SLEEP) == 0); 209910921STim.Haley@Sun.COM spa_deactivate(spa); 210010921STim.Haley@Sun.COM if (locked) 210110921STim.Haley@Sun.COM mutex_exit(&spa_namespace_lock); 210210921STim.Haley@Sun.COM return (spa->spa_last_open_failed); 210310921STim.Haley@Sun.COM } 210410921STim.Haley@Sun.COM 210510921STim.Haley@Sun.COM if (state != SPA_LOAD_RECOVER) 210610921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 210710921STim.Haley@Sun.COM 210810921STim.Haley@Sun.COM error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 210910921STim.Haley@Sun.COM extreme); 2110789Sahrens 2111789Sahrens if (error == EBADF) { 2112789Sahrens /* 21131986Seschrock * If vdev_validate() returns failure (indicated by 21141986Seschrock * EBADF), it indicates that one of the vdevs indicates 21151986Seschrock * that the pool has been exported or destroyed. If 21161986Seschrock * this is the case, the config cache is out of sync and 21171986Seschrock * we should remove the pool from the namespace. 2118789Sahrens */ 2119789Sahrens spa_unload(spa); 2120789Sahrens spa_deactivate(spa); 21216643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 2122789Sahrens spa_remove(spa); 2123789Sahrens if (locked) 2124789Sahrens mutex_exit(&spa_namespace_lock); 2125789Sahrens return (ENOENT); 21261544Seschrock } 21271544Seschrock 21281544Seschrock if (error) { 2129789Sahrens /* 2130789Sahrens * We can't open the pool, but we still have useful 2131789Sahrens * information: the state of each vdev after the 2132789Sahrens * attempted vdev_open(). Return this to the user. 2133789Sahrens */ 213410921STim.Haley@Sun.COM if (config != NULL && spa->spa_config) 213510921STim.Haley@Sun.COM VERIFY(nvlist_dup(spa->spa_config, config, 213610921STim.Haley@Sun.COM KM_SLEEP) == 0); 2137789Sahrens spa_unload(spa); 2138789Sahrens spa_deactivate(spa); 213910921STim.Haley@Sun.COM spa->spa_last_open_failed = error; 2140789Sahrens if (locked) 2141789Sahrens mutex_exit(&spa_namespace_lock); 2142789Sahrens *spapp = NULL; 2143789Sahrens return (error); 2144789Sahrens } 214510921STim.Haley@Sun.COM 2146789Sahrens } 2147789Sahrens 2148789Sahrens spa_open_ref(spa, tag); 21494451Seschrock 215010921STim.Haley@Sun.COM 215110921STim.Haley@Sun.COM if (config != NULL) 215210921STim.Haley@Sun.COM *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 215310921STim.Haley@Sun.COM 215411026STim.Haley@Sun.COM if (locked) { 215511026STim.Haley@Sun.COM spa->spa_last_open_failed = 0; 215611026STim.Haley@Sun.COM spa->spa_last_ubsync_txg = 0; 215711026STim.Haley@Sun.COM spa->spa_load_txg = 0; 2158789Sahrens mutex_exit(&spa_namespace_lock); 215911026STim.Haley@Sun.COM } 2160789Sahrens 2161789Sahrens *spapp = spa; 2162789Sahrens 2163789Sahrens return (0); 2164789Sahrens } 2165789Sahrens 2166789Sahrens int 216710921STim.Haley@Sun.COM spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 216810921STim.Haley@Sun.COM nvlist_t **config) 216910921STim.Haley@Sun.COM { 217010921STim.Haley@Sun.COM return (spa_open_common(name, spapp, tag, policy, config)); 217110921STim.Haley@Sun.COM } 217210921STim.Haley@Sun.COM 217310921STim.Haley@Sun.COM int 2174789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 2175789Sahrens { 217610921STim.Haley@Sun.COM return (spa_open_common(name, spapp, tag, NULL, NULL)); 2177789Sahrens } 2178789Sahrens 21791544Seschrock /* 21801544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 21811544Seschrock * preventing it from being exported or destroyed. 21821544Seschrock */ 21831544Seschrock spa_t * 21841544Seschrock spa_inject_addref(char *name) 21851544Seschrock { 21861544Seschrock spa_t *spa; 21871544Seschrock 21881544Seschrock mutex_enter(&spa_namespace_lock); 21891544Seschrock if ((spa = spa_lookup(name)) == NULL) { 21901544Seschrock mutex_exit(&spa_namespace_lock); 21911544Seschrock return (NULL); 21921544Seschrock } 21931544Seschrock spa->spa_inject_ref++; 21941544Seschrock mutex_exit(&spa_namespace_lock); 21951544Seschrock 21961544Seschrock return (spa); 21971544Seschrock } 21981544Seschrock 21991544Seschrock void 22001544Seschrock spa_inject_delref(spa_t *spa) 22011544Seschrock { 22021544Seschrock mutex_enter(&spa_namespace_lock); 22031544Seschrock spa->spa_inject_ref--; 22041544Seschrock mutex_exit(&spa_namespace_lock); 22051544Seschrock } 22061544Seschrock 22075450Sbrendan /* 22085450Sbrendan * Add spares device information to the nvlist. 22095450Sbrendan */ 22102082Seschrock static void 22112082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 22122082Seschrock { 22132082Seschrock nvlist_t **spares; 22142082Seschrock uint_t i, nspares; 22152082Seschrock nvlist_t *nvroot; 22162082Seschrock uint64_t guid; 22172082Seschrock vdev_stat_t *vs; 22182082Seschrock uint_t vsc; 22193377Seschrock uint64_t pool; 22202082Seschrock 22219425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 22229425SEric.Schrock@Sun.COM 22235450Sbrendan if (spa->spa_spares.sav_count == 0) 22242082Seschrock return; 22252082Seschrock 22262082Seschrock VERIFY(nvlist_lookup_nvlist(config, 22272082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 22285450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 22292082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 22302082Seschrock if (nspares != 0) { 22312082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 22322082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 22332082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 22342082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 22352082Seschrock 22362082Seschrock /* 22372082Seschrock * Go through and find any spares which have since been 22382082Seschrock * repurposed as an active spare. If this is the case, update 22392082Seschrock * their status appropriately. 22402082Seschrock */ 22412082Seschrock for (i = 0; i < nspares; i++) { 22422082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 22432082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 22447214Slling if (spa_spare_exists(guid, &pool, NULL) && 22457214Slling pool != 0ULL) { 22462082Seschrock VERIFY(nvlist_lookup_uint64_array( 22472082Seschrock spares[i], ZPOOL_CONFIG_STATS, 22482082Seschrock (uint64_t **)&vs, &vsc) == 0); 22492082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 22502082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 22512082Seschrock } 22522082Seschrock } 22532082Seschrock } 22542082Seschrock } 22552082Seschrock 22565450Sbrendan /* 22575450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 22585450Sbrendan */ 22595450Sbrendan static void 22605450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 22615450Sbrendan { 22625450Sbrendan nvlist_t **l2cache; 22635450Sbrendan uint_t i, j, nl2cache; 22645450Sbrendan nvlist_t *nvroot; 22655450Sbrendan uint64_t guid; 22665450Sbrendan vdev_t *vd; 22675450Sbrendan vdev_stat_t *vs; 22685450Sbrendan uint_t vsc; 22695450Sbrendan 22709425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 22719425SEric.Schrock@Sun.COM 22725450Sbrendan if (spa->spa_l2cache.sav_count == 0) 22735450Sbrendan return; 22745450Sbrendan 22755450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 22765450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 22775450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 22785450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 22795450Sbrendan if (nl2cache != 0) { 22805450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 22815450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 22825450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 22835450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 22845450Sbrendan 22855450Sbrendan /* 22865450Sbrendan * Update level 2 cache device stats. 22875450Sbrendan */ 22885450Sbrendan 22895450Sbrendan for (i = 0; i < nl2cache; i++) { 22905450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 22915450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 22925450Sbrendan 22935450Sbrendan vd = NULL; 22945450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 22955450Sbrendan if (guid == 22965450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 22975450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 22985450Sbrendan break; 22995450Sbrendan } 23005450Sbrendan } 23015450Sbrendan ASSERT(vd != NULL); 23025450Sbrendan 23035450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 23045450Sbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 23055450Sbrendan vdev_get_stats(vd, vs); 23065450Sbrendan } 23075450Sbrendan } 23085450Sbrendan } 23095450Sbrendan 2310789Sahrens int 23111544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 2312789Sahrens { 2313789Sahrens int error; 2314789Sahrens spa_t *spa; 2315789Sahrens 2316789Sahrens *config = NULL; 231710921STim.Haley@Sun.COM error = spa_open_common(name, &spa, FTAG, NULL, config); 2318789Sahrens 23199425SEric.Schrock@Sun.COM if (spa != NULL) { 23209425SEric.Schrock@Sun.COM /* 23219425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 23229425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 23239425SEric.Schrock@Sun.COM * self-inconsistent. 23249425SEric.Schrock@Sun.COM */ 23259425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 23269425SEric.Schrock@Sun.COM 23279425SEric.Schrock@Sun.COM if (*config != NULL) { 23287754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 23299425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 23309425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 23319425SEric.Schrock@Sun.COM 23329425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 23339425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 23349425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 23359425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 23369425SEric.Schrock@Sun.COM 23379425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 23389425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 23399425SEric.Schrock@Sun.COM } 23402082Seschrock } 23412082Seschrock 23421544Seschrock /* 23431544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 23441544Seschrock * and call spa_lookup() directly. 23451544Seschrock */ 23461544Seschrock if (altroot) { 23471544Seschrock if (spa == NULL) { 23481544Seschrock mutex_enter(&spa_namespace_lock); 23491544Seschrock spa = spa_lookup(name); 23501544Seschrock if (spa) 23511544Seschrock spa_altroot(spa, altroot, buflen); 23521544Seschrock else 23531544Seschrock altroot[0] = '\0'; 23541544Seschrock spa = NULL; 23551544Seschrock mutex_exit(&spa_namespace_lock); 23561544Seschrock } else { 23571544Seschrock spa_altroot(spa, altroot, buflen); 23581544Seschrock } 23591544Seschrock } 23601544Seschrock 23619425SEric.Schrock@Sun.COM if (spa != NULL) { 23629425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2363789Sahrens spa_close(spa, FTAG); 23649425SEric.Schrock@Sun.COM } 2365789Sahrens 2366789Sahrens return (error); 2367789Sahrens } 2368789Sahrens 2369789Sahrens /* 23705450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 23715450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 23725450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 23735450Sbrendan * specified, as long as they are well-formed. 23742082Seschrock */ 23752082Seschrock static int 23765450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 23775450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 23785450Sbrendan vdev_labeltype_t label) 23792082Seschrock { 23805450Sbrendan nvlist_t **dev; 23815450Sbrendan uint_t i, ndev; 23822082Seschrock vdev_t *vd; 23832082Seschrock int error; 23842082Seschrock 23857754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 23867754SJeff.Bonwick@Sun.COM 23872082Seschrock /* 23885450Sbrendan * It's acceptable to have no devs specified. 23892082Seschrock */ 23905450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 23912082Seschrock return (0); 23922082Seschrock 23935450Sbrendan if (ndev == 0) 23942082Seschrock return (EINVAL); 23952082Seschrock 23962082Seschrock /* 23975450Sbrendan * Make sure the pool is formatted with a version that supports this 23985450Sbrendan * device type. 23992082Seschrock */ 24005450Sbrendan if (spa_version(spa) < version) 24012082Seschrock return (ENOTSUP); 24022082Seschrock 24033377Seschrock /* 24045450Sbrendan * Set the pending device list so we correctly handle device in-use 24053377Seschrock * checking. 24063377Seschrock */ 24075450Sbrendan sav->sav_pending = dev; 24085450Sbrendan sav->sav_npending = ndev; 24095450Sbrendan 24105450Sbrendan for (i = 0; i < ndev; i++) { 24115450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 24122082Seschrock mode)) != 0) 24133377Seschrock goto out; 24142082Seschrock 24152082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 24162082Seschrock vdev_free(vd); 24173377Seschrock error = EINVAL; 24183377Seschrock goto out; 24192082Seschrock } 24202082Seschrock 24215450Sbrendan /* 24227754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 24237754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 24245450Sbrendan */ 24257754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 24265450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 24275450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 24285450Sbrendan error = ENOTBLK; 24295450Sbrendan goto out; 24305450Sbrendan } 24317754SJeff.Bonwick@Sun.COM #endif 24322082Seschrock vd->vdev_top = vd; 24333377Seschrock 24343377Seschrock if ((error = vdev_open(vd)) == 0 && 24355450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 24365450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 24373377Seschrock vd->vdev_guid) == 0); 24382082Seschrock } 24392082Seschrock 24402082Seschrock vdev_free(vd); 24413377Seschrock 24425450Sbrendan if (error && 24435450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 24443377Seschrock goto out; 24453377Seschrock else 24463377Seschrock error = 0; 24472082Seschrock } 24482082Seschrock 24493377Seschrock out: 24505450Sbrendan sav->sav_pending = NULL; 24515450Sbrendan sav->sav_npending = 0; 24523377Seschrock return (error); 24532082Seschrock } 24542082Seschrock 24555450Sbrendan static int 24565450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 24575450Sbrendan { 24585450Sbrendan int error; 24595450Sbrendan 24607754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 24617754SJeff.Bonwick@Sun.COM 24625450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 24635450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 24645450Sbrendan VDEV_LABEL_SPARE)) != 0) { 24655450Sbrendan return (error); 24665450Sbrendan } 24675450Sbrendan 24685450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 24695450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 24705450Sbrendan VDEV_LABEL_L2CACHE)); 24715450Sbrendan } 24725450Sbrendan 24735450Sbrendan static void 24745450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 24755450Sbrendan const char *config) 24765450Sbrendan { 24775450Sbrendan int i; 24785450Sbrendan 24795450Sbrendan if (sav->sav_config != NULL) { 24805450Sbrendan nvlist_t **olddevs; 24815450Sbrendan uint_t oldndevs; 24825450Sbrendan nvlist_t **newdevs; 24835450Sbrendan 24845450Sbrendan /* 24855450Sbrendan * Generate new dev list by concatentating with the 24865450Sbrendan * current dev list. 24875450Sbrendan */ 24885450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 24895450Sbrendan &olddevs, &oldndevs) == 0); 24905450Sbrendan 24915450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 24925450Sbrendan (ndevs + oldndevs), KM_SLEEP); 24935450Sbrendan for (i = 0; i < oldndevs; i++) 24945450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 24955450Sbrendan KM_SLEEP) == 0); 24965450Sbrendan for (i = 0; i < ndevs; i++) 24975450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 24985450Sbrendan KM_SLEEP) == 0); 24995450Sbrendan 25005450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 25015450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 25025450Sbrendan 25035450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 25045450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 25055450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 25065450Sbrendan nvlist_free(newdevs[i]); 25075450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 25085450Sbrendan } else { 25095450Sbrendan /* 25105450Sbrendan * Generate a new dev list. 25115450Sbrendan */ 25125450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 25135450Sbrendan KM_SLEEP) == 0); 25145450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 25155450Sbrendan devs, ndevs) == 0); 25165450Sbrendan } 25175450Sbrendan } 25185450Sbrendan 25195450Sbrendan /* 25205450Sbrendan * Stop and drop level 2 ARC devices 25215450Sbrendan */ 25225450Sbrendan void 25235450Sbrendan spa_l2cache_drop(spa_t *spa) 25245450Sbrendan { 25255450Sbrendan vdev_t *vd; 25265450Sbrendan int i; 25275450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 25285450Sbrendan 25295450Sbrendan for (i = 0; i < sav->sav_count; i++) { 25305450Sbrendan uint64_t pool; 25315450Sbrendan 25325450Sbrendan vd = sav->sav_vdevs[i]; 25335450Sbrendan ASSERT(vd != NULL); 25345450Sbrendan 25358241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 25368241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 25375450Sbrendan l2arc_remove_vdev(vd); 25385450Sbrendan if (vd->vdev_isl2cache) 25395450Sbrendan spa_l2cache_remove(vd); 25405450Sbrendan vdev_clear_stats(vd); 25415450Sbrendan (void) vdev_close(vd); 25425450Sbrendan } 25435450Sbrendan } 25445450Sbrendan 25452082Seschrock /* 2546789Sahrens * Pool Creation 2547789Sahrens */ 2548789Sahrens int 25495094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 25507184Stimh const char *history_str, nvlist_t *zplprops) 2551789Sahrens { 2552789Sahrens spa_t *spa; 25535094Slling char *altroot = NULL; 25541635Sbonwick vdev_t *rvd; 2555789Sahrens dsl_pool_t *dp; 2556789Sahrens dmu_tx_t *tx; 25579816SGeorge.Wilson@Sun.COM int error = 0; 2558789Sahrens uint64_t txg = TXG_INITIAL; 25595450Sbrendan nvlist_t **spares, **l2cache; 25605450Sbrendan uint_t nspares, nl2cache; 25615094Slling uint64_t version; 2562789Sahrens 2563789Sahrens /* 2564789Sahrens * If this pool already exists, return failure. 2565789Sahrens */ 2566789Sahrens mutex_enter(&spa_namespace_lock); 2567789Sahrens if (spa_lookup(pool) != NULL) { 2568789Sahrens mutex_exit(&spa_namespace_lock); 2569789Sahrens return (EEXIST); 2570789Sahrens } 2571789Sahrens 2572789Sahrens /* 2573789Sahrens * Allocate a new spa_t structure. 2574789Sahrens */ 25755094Slling (void) nvlist_lookup_string(props, 25765094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 257710921STim.Haley@Sun.COM spa = spa_add(pool, NULL, altroot); 25788241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2579789Sahrens 25805094Slling if (props && (error = spa_prop_validate(spa, props))) { 25815094Slling spa_deactivate(spa); 25825094Slling spa_remove(spa); 25836643Seschrock mutex_exit(&spa_namespace_lock); 25845094Slling return (error); 25855094Slling } 25865094Slling 25875094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 25885094Slling &version) != 0) 25895094Slling version = SPA_VERSION; 25905094Slling ASSERT(version <= SPA_VERSION); 259110922SJeff.Bonwick@Sun.COM 259210922SJeff.Bonwick@Sun.COM spa->spa_first_txg = txg; 259310922SJeff.Bonwick@Sun.COM spa->spa_uberblock.ub_txg = txg - 1; 25945094Slling spa->spa_uberblock.ub_version = version; 2595789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2596789Sahrens 25971635Sbonwick /* 25989234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 25999234SGeorge.Wilson@Sun.COM */ 26009630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 26019630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 26029234SGeorge.Wilson@Sun.COM 26039234SGeorge.Wilson@Sun.COM /* 26041635Sbonwick * Create the root vdev. 26051635Sbonwick */ 26067754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26071635Sbonwick 26082082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 26092082Seschrock 26102082Seschrock ASSERT(error != 0 || rvd != NULL); 26112082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 26122082Seschrock 26135913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 26141635Sbonwick error = EINVAL; 26152082Seschrock 26162082Seschrock if (error == 0 && 26172082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 26185450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 26192082Seschrock VDEV_ALLOC_ADD)) == 0) { 26209816SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 26219816SGeorge.Wilson@Sun.COM vdev_metaslab_set_size(rvd->vdev_child[c]); 26229816SGeorge.Wilson@Sun.COM vdev_expand(rvd->vdev_child[c], txg); 26239816SGeorge.Wilson@Sun.COM } 26241635Sbonwick } 26251635Sbonwick 26267754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2627789Sahrens 26282082Seschrock if (error != 0) { 2629789Sahrens spa_unload(spa); 2630789Sahrens spa_deactivate(spa); 2631789Sahrens spa_remove(spa); 2632789Sahrens mutex_exit(&spa_namespace_lock); 2633789Sahrens return (error); 2634789Sahrens } 2635789Sahrens 26362082Seschrock /* 26372082Seschrock * Get the list of spares, if specified. 26382082Seschrock */ 26392082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26402082Seschrock &spares, &nspares) == 0) { 26415450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 26422082Seschrock KM_SLEEP) == 0); 26435450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26442082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26457754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26462082Seschrock spa_load_spares(spa); 26477754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26485450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 26495450Sbrendan } 26505450Sbrendan 26515450Sbrendan /* 26525450Sbrendan * Get the list of level 2 cache devices, if specified. 26535450Sbrendan */ 26545450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26555450Sbrendan &l2cache, &nl2cache) == 0) { 26565450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26575450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 26585450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26595450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26607754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26615450Sbrendan spa_load_l2cache(spa); 26627754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 26635450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 26642082Seschrock } 26652082Seschrock 26667184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2667789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2668789Sahrens 266910956SGeorge.Wilson@Sun.COM /* 267010956SGeorge.Wilson@Sun.COM * Create DDTs (dedup tables). 267110956SGeorge.Wilson@Sun.COM */ 267210956SGeorge.Wilson@Sun.COM ddt_create(spa); 267310956SGeorge.Wilson@Sun.COM 267410956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 267510956SGeorge.Wilson@Sun.COM 2676789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2677789Sahrens 2678789Sahrens /* 2679789Sahrens * Create the pool config object. 2680789Sahrens */ 2681789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 26827497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2683789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2684789Sahrens 26851544Seschrock if (zap_add(spa->spa_meta_objset, 2686789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 26871544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 26881544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 26891544Seschrock } 2690789Sahrens 26915094Slling /* Newly created pools with the right version are always deflated. */ 26925094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 26935094Slling spa->spa_deflate = TRUE; 26945094Slling if (zap_add(spa->spa_meta_objset, 26955094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 26965094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 26975094Slling cmn_err(CE_PANIC, "failed to add deflate"); 26985094Slling } 26992082Seschrock } 27002082Seschrock 2701789Sahrens /* 2702789Sahrens * Create the deferred-free bplist object. Turn off compression 2703789Sahrens * because sync-to-convergence takes longer if the blocksize 2704789Sahrens * keeps changing. 2705789Sahrens */ 270610922SJeff.Bonwick@Sun.COM spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset, 2707789Sahrens 1 << 14, tx); 270810922SJeff.Bonwick@Sun.COM dmu_object_set_compress(spa->spa_meta_objset, 270910922SJeff.Bonwick@Sun.COM spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx); 2710789Sahrens 27111544Seschrock if (zap_add(spa->spa_meta_objset, 2712789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 271310922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) { 27141544Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 27151544Seschrock } 2716789Sahrens 27172926Sek110237 /* 27182926Sek110237 * Create the pool's history object. 27192926Sek110237 */ 27205094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 27215094Slling spa_history_create_obj(spa, tx); 27225094Slling 27235094Slling /* 27245094Slling * Set pool properties. 27255094Slling */ 27265094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 27275094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 27285329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 27299816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 273010922SJeff.Bonwick@Sun.COM 27318525SEric.Schrock@Sun.COM if (props != NULL) { 27328525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 27335094Slling spa_sync_props(spa, props, CRED(), tx); 27348525SEric.Schrock@Sun.COM } 27352926Sek110237 2736789Sahrens dmu_tx_commit(tx); 2737789Sahrens 2738789Sahrens spa->spa_sync_on = B_TRUE; 2739789Sahrens txg_sync_start(spa->spa_dsl_pool); 2740789Sahrens 2741789Sahrens /* 2742789Sahrens * We explicitly wait for the first transaction to complete so that our 2743789Sahrens * bean counters are appropriately updated. 2744789Sahrens */ 2745789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2746789Sahrens 27476643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2748789Sahrens 27495094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 27504715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 27519946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_CREATE); 27524715Sek110237 27538667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 27548667SGeorge.Wilson@Sun.COM 2755789Sahrens mutex_exit(&spa_namespace_lock); 2756789Sahrens 2757789Sahrens return (0); 2758789Sahrens } 2759789Sahrens 27606423Sgw25295 #ifdef _KERNEL 27616423Sgw25295 /* 27629790SLin.Ling@Sun.COM * Get the root pool information from the root disk, then import the root pool 27639790SLin.Ling@Sun.COM * during the system boot up time. 27646423Sgw25295 */ 27659790SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 27669790SLin.Ling@Sun.COM 27679790SLin.Ling@Sun.COM static nvlist_t * 27689790SLin.Ling@Sun.COM spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 27696423Sgw25295 { 27709790SLin.Ling@Sun.COM nvlist_t *config; 27716423Sgw25295 nvlist_t *nvtop, *nvroot; 27726423Sgw25295 uint64_t pgid; 27736423Sgw25295 27749790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 27759790SLin.Ling@Sun.COM return (NULL); 27769790SLin.Ling@Sun.COM 27776423Sgw25295 /* 27786423Sgw25295 * Add this top-level vdev to the child array. 27796423Sgw25295 */ 27809790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 27819790SLin.Ling@Sun.COM &nvtop) == 0); 27829790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 27839790SLin.Ling@Sun.COM &pgid) == 0); 27849790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 27856423Sgw25295 27866423Sgw25295 /* 27876423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 27886423Sgw25295 */ 27896423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 27909790SLin.Ling@Sun.COM VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 27919790SLin.Ling@Sun.COM VDEV_TYPE_ROOT) == 0); 27926423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 27936423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 27946423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 27956423Sgw25295 &nvtop, 1) == 0); 27966423Sgw25295 27976423Sgw25295 /* 27986423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 27996423Sgw25295 * this pool's configuration (remove the old, add the new). 28006423Sgw25295 */ 28016423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 28026423Sgw25295 nvlist_free(nvroot); 28039790SLin.Ling@Sun.COM return (config); 28046423Sgw25295 } 28056423Sgw25295 28066423Sgw25295 /* 28079790SLin.Ling@Sun.COM * Walk the vdev tree and see if we can find a device with "better" 28089790SLin.Ling@Sun.COM * configuration. A configuration is "better" if the label on that 28099790SLin.Ling@Sun.COM * device has a more recent txg. 28106423Sgw25295 */ 28119790SLin.Ling@Sun.COM static void 28129790SLin.Ling@Sun.COM spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 28137147Staylor { 28149816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 28159790SLin.Ling@Sun.COM spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 28169790SLin.Ling@Sun.COM 28179790SLin.Ling@Sun.COM if (vd->vdev_ops->vdev_op_leaf) { 28189790SLin.Ling@Sun.COM nvlist_t *label; 28199790SLin.Ling@Sun.COM uint64_t label_txg; 28209790SLin.Ling@Sun.COM 28219790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 28229790SLin.Ling@Sun.COM &label) != 0) 28239790SLin.Ling@Sun.COM return; 28249790SLin.Ling@Sun.COM 28259790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 28269790SLin.Ling@Sun.COM &label_txg) == 0); 28279790SLin.Ling@Sun.COM 28289790SLin.Ling@Sun.COM /* 28299790SLin.Ling@Sun.COM * Do we have a better boot device? 28309790SLin.Ling@Sun.COM */ 28319790SLin.Ling@Sun.COM if (label_txg > *txg) { 28329790SLin.Ling@Sun.COM *txg = label_txg; 28339790SLin.Ling@Sun.COM *avd = vd; 28347147Staylor } 28359790SLin.Ling@Sun.COM nvlist_free(label); 28367147Staylor } 28377147Staylor } 28387147Staylor 28396423Sgw25295 /* 28406423Sgw25295 * Import a root pool. 28416423Sgw25295 * 28427147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 28437147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 28447147Staylor * The GRUB "findroot" command will return the vdev we should boot. 28456423Sgw25295 * 28466423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 28476423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 28486423Sgw25295 * e.g. 28496423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 28506423Sgw25295 */ 28516423Sgw25295 int 28527147Staylor spa_import_rootpool(char *devpath, char *devid) 28536423Sgw25295 { 28549790SLin.Ling@Sun.COM spa_t *spa; 28559790SLin.Ling@Sun.COM vdev_t *rvd, *bvd, *avd = NULL; 28569790SLin.Ling@Sun.COM nvlist_t *config, *nvtop; 28579790SLin.Ling@Sun.COM uint64_t guid, txg; 28586423Sgw25295 char *pname; 28596423Sgw25295 int error; 28606423Sgw25295 28616423Sgw25295 /* 28629790SLin.Ling@Sun.COM * Read the label from the boot device and generate a configuration. 28636423Sgw25295 */ 286410822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 286510822SJack.Meng@Sun.COM #if defined(_OBP) && defined(_KERNEL) 286610822SJack.Meng@Sun.COM if (config == NULL) { 286710822SJack.Meng@Sun.COM if (strstr(devpath, "/iscsi/ssd") != NULL) { 286810822SJack.Meng@Sun.COM /* iscsi boot */ 286910822SJack.Meng@Sun.COM get_iscsi_bootpath_phy(devpath); 287010822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 287110822SJack.Meng@Sun.COM } 287210822SJack.Meng@Sun.COM } 287310822SJack.Meng@Sun.COM #endif 287410822SJack.Meng@Sun.COM if (config == NULL) { 28759790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 28769790SLin.Ling@Sun.COM devpath); 28779790SLin.Ling@Sun.COM return (EIO); 28789790SLin.Ling@Sun.COM } 28799790SLin.Ling@Sun.COM 28809790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 28819790SLin.Ling@Sun.COM &pname) == 0); 28829790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 28836423Sgw25295 28849425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 28859425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 28869425SEric.Schrock@Sun.COM /* 28879425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 28889425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 28899425SEric.Schrock@Sun.COM */ 28909425SEric.Schrock@Sun.COM spa_remove(spa); 28919425SEric.Schrock@Sun.COM } 28929425SEric.Schrock@Sun.COM 289310921STim.Haley@Sun.COM spa = spa_add(pname, config, NULL); 28949425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 289510100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 28969790SLin.Ling@Sun.COM 28979790SLin.Ling@Sun.COM /* 28989790SLin.Ling@Sun.COM * Build up a vdev tree based on the boot device's label config. 28999790SLin.Ling@Sun.COM */ 29009790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 29019790SLin.Ling@Sun.COM &nvtop) == 0); 29029790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29039790SLin.Ling@Sun.COM error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 29049790SLin.Ling@Sun.COM VDEV_ALLOC_ROOTPOOL); 29059790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29069790SLin.Ling@Sun.COM if (error) { 29079790SLin.Ling@Sun.COM mutex_exit(&spa_namespace_lock); 29089790SLin.Ling@Sun.COM nvlist_free(config); 29099790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 29109790SLin.Ling@Sun.COM pname); 29119790SLin.Ling@Sun.COM return (error); 29129790SLin.Ling@Sun.COM } 29139790SLin.Ling@Sun.COM 29149790SLin.Ling@Sun.COM /* 29159790SLin.Ling@Sun.COM * Get the boot vdev. 29169790SLin.Ling@Sun.COM */ 29179790SLin.Ling@Sun.COM if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 29189790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 29199790SLin.Ling@Sun.COM (u_longlong_t)guid); 29209790SLin.Ling@Sun.COM error = ENOENT; 29219790SLin.Ling@Sun.COM goto out; 29229790SLin.Ling@Sun.COM } 29239790SLin.Ling@Sun.COM 29249790SLin.Ling@Sun.COM /* 29259790SLin.Ling@Sun.COM * Determine if there is a better boot device. 29269790SLin.Ling@Sun.COM */ 29279790SLin.Ling@Sun.COM avd = bvd; 29289790SLin.Ling@Sun.COM spa_alt_rootvdev(rvd, &avd, &txg); 29299790SLin.Ling@Sun.COM if (avd != bvd) { 29309790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 29319790SLin.Ling@Sun.COM "try booting from '%s'", avd->vdev_path); 29329790SLin.Ling@Sun.COM error = EINVAL; 29339790SLin.Ling@Sun.COM goto out; 29349790SLin.Ling@Sun.COM } 29359790SLin.Ling@Sun.COM 29369790SLin.Ling@Sun.COM /* 29379790SLin.Ling@Sun.COM * If the boot device is part of a spare vdev then ensure that 29389790SLin.Ling@Sun.COM * we're booting off the active spare. 29399790SLin.Ling@Sun.COM */ 29409790SLin.Ling@Sun.COM if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 29419790SLin.Ling@Sun.COM !bvd->vdev_isspare) { 29429790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is currently spared. Please " 29439790SLin.Ling@Sun.COM "try booting from '%s'", 29449790SLin.Ling@Sun.COM bvd->vdev_parent->vdev_child[1]->vdev_path); 29459790SLin.Ling@Sun.COM error = EINVAL; 29469790SLin.Ling@Sun.COM goto out; 29479790SLin.Ling@Sun.COM } 29489790SLin.Ling@Sun.COM 29499790SLin.Ling@Sun.COM error = 0; 29509946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 29519790SLin.Ling@Sun.COM out: 29529790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29539790SLin.Ling@Sun.COM vdev_free(rvd); 29549790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29559425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 29566423Sgw25295 29579790SLin.Ling@Sun.COM nvlist_free(config); 29586423Sgw25295 return (error); 29596423Sgw25295 } 29609790SLin.Ling@Sun.COM 29616423Sgw25295 #endif 29626423Sgw25295 29636423Sgw25295 /* 29649425SEric.Schrock@Sun.COM * Take a pool and insert it into the namespace as if it had been loaded at 29659425SEric.Schrock@Sun.COM * boot. 29669425SEric.Schrock@Sun.COM */ 29679425SEric.Schrock@Sun.COM int 29689425SEric.Schrock@Sun.COM spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 29699425SEric.Schrock@Sun.COM { 29709425SEric.Schrock@Sun.COM spa_t *spa; 297110921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 29729425SEric.Schrock@Sun.COM char *altroot = NULL; 29739425SEric.Schrock@Sun.COM 29749425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 29759425SEric.Schrock@Sun.COM if (spa_lookup(pool) != NULL) { 29769425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 29779425SEric.Schrock@Sun.COM return (EEXIST); 29789425SEric.Schrock@Sun.COM } 29799425SEric.Schrock@Sun.COM 29809425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 29819425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 298210921STim.Haley@Sun.COM spa = spa_add(pool, config, altroot); 298310921STim.Haley@Sun.COM 298410921STim.Haley@Sun.COM zpool_get_rewind_policy(config, &policy); 298510921STim.Haley@Sun.COM spa->spa_load_max_txg = policy.zrp_txg; 29869425SEric.Schrock@Sun.COM 298710100SLin.Ling@Sun.COM spa->spa_load_verbatim = B_TRUE; 298810000SVictor.Latushkin@Sun.COM 29899425SEric.Schrock@Sun.COM if (props != NULL) 29909425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 29919425SEric.Schrock@Sun.COM 29929425SEric.Schrock@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 29939425SEric.Schrock@Sun.COM 29949425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 29959946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 29969425SEric.Schrock@Sun.COM 29979425SEric.Schrock@Sun.COM return (0); 29989425SEric.Schrock@Sun.COM } 29999425SEric.Schrock@Sun.COM 30009425SEric.Schrock@Sun.COM /* 30016423Sgw25295 * Import a non-root pool into the system. 30026423Sgw25295 */ 30036423Sgw25295 int 30046423Sgw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 30056423Sgw25295 { 30069425SEric.Schrock@Sun.COM spa_t *spa; 30079425SEric.Schrock@Sun.COM char *altroot = NULL; 300810921STim.Haley@Sun.COM spa_load_state_t state = SPA_LOAD_IMPORT; 300910921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 30109425SEric.Schrock@Sun.COM int error; 30119425SEric.Schrock@Sun.COM nvlist_t *nvroot; 30129425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 30139425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 30149425SEric.Schrock@Sun.COM 30159425SEric.Schrock@Sun.COM /* 30169425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 30179425SEric.Schrock@Sun.COM */ 30189425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 30199425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pool)) != NULL) { 30209425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 30219425SEric.Schrock@Sun.COM return (EEXIST); 30229425SEric.Schrock@Sun.COM } 30239425SEric.Schrock@Sun.COM 302410921STim.Haley@Sun.COM zpool_get_rewind_policy(config, &policy); 302510921STim.Haley@Sun.COM if (policy.zrp_request & ZPOOL_DO_REWIND) 302610921STim.Haley@Sun.COM state = SPA_LOAD_RECOVER; 302710921STim.Haley@Sun.COM 30289425SEric.Schrock@Sun.COM /* 30299425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 30309425SEric.Schrock@Sun.COM */ 30319425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 30329425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 303310921STim.Haley@Sun.COM spa = spa_add(pool, config, altroot); 30349425SEric.Schrock@Sun.COM spa_activate(spa, spa_mode_global); 30359425SEric.Schrock@Sun.COM 30369425SEric.Schrock@Sun.COM /* 30379630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 30389630SJeff.Bonwick@Sun.COM */ 30399630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 30409630SJeff.Bonwick@Sun.COM 30419630SJeff.Bonwick@Sun.COM /* 30429425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 30439425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 30449425SEric.Schrock@Sun.COM * doing an import. 30459425SEric.Schrock@Sun.COM */ 304610921STim.Haley@Sun.COM if (state != SPA_LOAD_RECOVER) 304710921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 304810921STim.Haley@Sun.COM error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 304910921STim.Haley@Sun.COM ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0)); 305010921STim.Haley@Sun.COM 305110921STim.Haley@Sun.COM /* 305210921STim.Haley@Sun.COM * Propagate anything learned about failing or best txgs 305310921STim.Haley@Sun.COM * back to caller 305410921STim.Haley@Sun.COM */ 305510921STim.Haley@Sun.COM spa_rewind_data_to_nvlist(spa, config); 30569425SEric.Schrock@Sun.COM 30579425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 30589425SEric.Schrock@Sun.COM /* 30599425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 30609425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 30619425SEric.Schrock@Sun.COM */ 30629425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 30639425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 30649425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 30659425SEric.Schrock@Sun.COM spa_load_spares(spa); 30669425SEric.Schrock@Sun.COM } 30679425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 30689425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 30699425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 30709425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 30719425SEric.Schrock@Sun.COM } 30729425SEric.Schrock@Sun.COM 30739425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 30749425SEric.Schrock@Sun.COM &nvroot) == 0); 30759425SEric.Schrock@Sun.COM if (error == 0) 30769425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 30779425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 30789425SEric.Schrock@Sun.COM if (error == 0) 30799425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 30809425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 30819425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 30829425SEric.Schrock@Sun.COM 30839425SEric.Schrock@Sun.COM if (props != NULL) 30849425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 30859425SEric.Schrock@Sun.COM 30869425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 30879425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 30889425SEric.Schrock@Sun.COM spa_unload(spa); 30899425SEric.Schrock@Sun.COM spa_deactivate(spa); 30909425SEric.Schrock@Sun.COM spa_remove(spa); 30919425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 30929425SEric.Schrock@Sun.COM return (error); 30939425SEric.Schrock@Sun.COM } 30949425SEric.Schrock@Sun.COM 30959630SJeff.Bonwick@Sun.COM spa_async_resume(spa); 30969630SJeff.Bonwick@Sun.COM 30979425SEric.Schrock@Sun.COM /* 30989425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 30999425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 31009425SEric.Schrock@Sun.COM */ 31019425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 31029425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 31039425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 31049425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 31059425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 31069425SEric.Schrock@Sun.COM else 31079425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 31089425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 31099425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 31109425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 31119425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 31129425SEric.Schrock@Sun.COM spa_load_spares(spa); 31139425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 31149425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 31159425SEric.Schrock@Sun.COM } 31169425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 31179425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 31189425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 31199425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 31209425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 31219425SEric.Schrock@Sun.COM else 31229425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 31239425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 31249425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 31259425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 31269425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 31279425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 31289425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 31299425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 31309425SEric.Schrock@Sun.COM } 31319425SEric.Schrock@Sun.COM 313210672SEric.Schrock@Sun.COM /* 313310672SEric.Schrock@Sun.COM * Check for any removed devices. 313410672SEric.Schrock@Sun.COM */ 313510672SEric.Schrock@Sun.COM if (spa->spa_autoreplace) { 313610672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 313710672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 313810672SEric.Schrock@Sun.COM } 313910672SEric.Schrock@Sun.COM 31409425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 31419425SEric.Schrock@Sun.COM /* 31429425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 31439425SEric.Schrock@Sun.COM */ 314410100SLin.Ling@Sun.COM spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 31459425SEric.Schrock@Sun.COM } 31469425SEric.Schrock@Sun.COM 31479816SGeorge.Wilson@Sun.COM /* 31489816SGeorge.Wilson@Sun.COM * It's possible that the pool was expanded while it was exported. 31499816SGeorge.Wilson@Sun.COM * We kick off an async task to handle this for us. 31509816SGeorge.Wilson@Sun.COM */ 31519816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 31529816SGeorge.Wilson@Sun.COM 31539425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 31549946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 31559425SEric.Schrock@Sun.COM 31569425SEric.Schrock@Sun.COM return (0); 31576643Seschrock } 31586643Seschrock 3159789Sahrens nvlist_t * 3160789Sahrens spa_tryimport(nvlist_t *tryconfig) 3161789Sahrens { 3162789Sahrens nvlist_t *config = NULL; 3163789Sahrens char *poolname; 3164789Sahrens spa_t *spa; 3165789Sahrens uint64_t state; 31668680SLin.Ling@Sun.COM int error; 3167789Sahrens 3168789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 3169789Sahrens return (NULL); 3170789Sahrens 3171789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 3172789Sahrens return (NULL); 3173789Sahrens 31741635Sbonwick /* 31751635Sbonwick * Create and initialize the spa structure. 31761635Sbonwick */ 3177789Sahrens mutex_enter(&spa_namespace_lock); 317810921STim.Haley@Sun.COM spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 31798241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 3180789Sahrens 3181789Sahrens /* 31821635Sbonwick * Pass off the heavy lifting to spa_load(). 31831732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 31841732Sbonwick * is actually the one to trust when doing an import. 3185789Sahrens */ 318610921STim.Haley@Sun.COM error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE); 3187789Sahrens 3188789Sahrens /* 3189789Sahrens * If 'tryconfig' was at least parsable, return the current config. 3190789Sahrens */ 3191789Sahrens if (spa->spa_root_vdev != NULL) { 3192789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 3193789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 3194789Sahrens poolname) == 0); 3195789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 3196789Sahrens state) == 0); 31973975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 31983975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 31992082Seschrock 32002082Seschrock /* 32016423Sgw25295 * If the bootfs property exists on this pool then we 32026423Sgw25295 * copy it out so that external consumers can tell which 32036423Sgw25295 * pools are bootable. 32046423Sgw25295 */ 32058680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 32066423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 32076423Sgw25295 32086423Sgw25295 /* 32096423Sgw25295 * We have to play games with the name since the 32106423Sgw25295 * pool was opened as TRYIMPORT_NAME. 32116423Sgw25295 */ 32127754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 32136423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 32146423Sgw25295 char *cp; 32156423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 32166423Sgw25295 32176423Sgw25295 cp = strchr(tmpname, '/'); 32186423Sgw25295 if (cp == NULL) { 32196423Sgw25295 (void) strlcpy(dsname, tmpname, 32206423Sgw25295 MAXPATHLEN); 32216423Sgw25295 } else { 32226423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 32236423Sgw25295 "%s/%s", poolname, ++cp); 32246423Sgw25295 } 32256423Sgw25295 VERIFY(nvlist_add_string(config, 32266423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 32276423Sgw25295 kmem_free(dsname, MAXPATHLEN); 32286423Sgw25295 } 32296423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 32306423Sgw25295 } 32316423Sgw25295 32326423Sgw25295 /* 32335450Sbrendan * Add the list of hot spares and level 2 cache devices. 32342082Seschrock */ 32359425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 32362082Seschrock spa_add_spares(spa, config); 32375450Sbrendan spa_add_l2cache(spa, config); 32389425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 3239789Sahrens } 3240789Sahrens 3241789Sahrens spa_unload(spa); 3242789Sahrens spa_deactivate(spa); 3243789Sahrens spa_remove(spa); 3244789Sahrens mutex_exit(&spa_namespace_lock); 3245789Sahrens 3246789Sahrens return (config); 3247789Sahrens } 3248789Sahrens 3249789Sahrens /* 3250789Sahrens * Pool export/destroy 3251789Sahrens * 3252789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 3253789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 3254789Sahrens * update the pool state and sync all the labels to disk, removing the 32558211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 32568211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 3257789Sahrens */ 3258789Sahrens static int 32597214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 32608211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 3261789Sahrens { 3262789Sahrens spa_t *spa; 3263789Sahrens 32641775Sbillm if (oldconfig) 32651775Sbillm *oldconfig = NULL; 32661775Sbillm 32678241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 3268789Sahrens return (EROFS); 3269789Sahrens 3270789Sahrens mutex_enter(&spa_namespace_lock); 3271789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 3272789Sahrens mutex_exit(&spa_namespace_lock); 3273789Sahrens return (ENOENT); 3274789Sahrens } 3275789Sahrens 3276789Sahrens /* 32771544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 32781544Seschrock * reacquire the namespace lock, and see if we can export. 32791544Seschrock */ 32801544Seschrock spa_open_ref(spa, FTAG); 32811544Seschrock mutex_exit(&spa_namespace_lock); 32821544Seschrock spa_async_suspend(spa); 32831544Seschrock mutex_enter(&spa_namespace_lock); 32841544Seschrock spa_close(spa, FTAG); 32851544Seschrock 32861544Seschrock /* 3287789Sahrens * The pool will be in core if it's openable, 3288789Sahrens * in which case we can modify its state. 3289789Sahrens */ 3290789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 3291789Sahrens /* 3292789Sahrens * Objsets may be open only because they're dirty, so we 3293789Sahrens * have to force it to sync before checking spa_refcnt. 3294789Sahrens */ 3295789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 3296789Sahrens 32971544Seschrock /* 32981544Seschrock * A pool cannot be exported or destroyed if there are active 32991544Seschrock * references. If we are resetting a pool, allow references by 33001544Seschrock * fault injection handlers. 33011544Seschrock */ 33021544Seschrock if (!spa_refcount_zero(spa) || 33031544Seschrock (spa->spa_inject_ref != 0 && 33041544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 33051544Seschrock spa_async_resume(spa); 3306789Sahrens mutex_exit(&spa_namespace_lock); 3307789Sahrens return (EBUSY); 3308789Sahrens } 3309789Sahrens 3310789Sahrens /* 33117214Slling * A pool cannot be exported if it has an active shared spare. 33127214Slling * This is to prevent other pools stealing the active spare 33137214Slling * from an exported pool. At user's own will, such pool can 33147214Slling * be forcedly exported. 33157214Slling */ 33167214Slling if (!force && new_state == POOL_STATE_EXPORTED && 33177214Slling spa_has_active_shared_spare(spa)) { 33187214Slling spa_async_resume(spa); 33197214Slling mutex_exit(&spa_namespace_lock); 33207214Slling return (EXDEV); 33217214Slling } 33227214Slling 33237214Slling /* 3324789Sahrens * We want this to be reflected on every label, 3325789Sahrens * so mark them all dirty. spa_unload() will do the 3326789Sahrens * final sync that pushes these changes out. 3327789Sahrens */ 33288211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 33297754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 33301544Seschrock spa->spa_state = new_state; 33311635Sbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 33321544Seschrock vdev_config_dirty(spa->spa_root_vdev); 33337754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 33341544Seschrock } 3335789Sahrens } 3336789Sahrens 33374451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 33384451Seschrock 3339789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 3340789Sahrens spa_unload(spa); 3341789Sahrens spa_deactivate(spa); 3342789Sahrens } 3343789Sahrens 33441775Sbillm if (oldconfig && spa->spa_config) 33451775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 33461775Sbillm 33471544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 33488211SGeorge.Wilson@Sun.COM if (!hardforce) 33498211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 33501544Seschrock spa_remove(spa); 33511544Seschrock } 3352789Sahrens mutex_exit(&spa_namespace_lock); 3353789Sahrens 3354789Sahrens return (0); 3355789Sahrens } 3356789Sahrens 3357789Sahrens /* 3358789Sahrens * Destroy a storage pool. 3359789Sahrens */ 3360789Sahrens int 3361789Sahrens spa_destroy(char *pool) 3362789Sahrens { 33638211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 33648211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 3365789Sahrens } 3366789Sahrens 3367789Sahrens /* 3368789Sahrens * Export a storage pool. 3369789Sahrens */ 3370789Sahrens int 33718211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 33728211SGeorge.Wilson@Sun.COM boolean_t hardforce) 3373789Sahrens { 33748211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 33758211SGeorge.Wilson@Sun.COM force, hardforce)); 3376789Sahrens } 3377789Sahrens 3378789Sahrens /* 33791544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 33801544Seschrock * from the namespace in any way. 33811544Seschrock */ 33821544Seschrock int 33831544Seschrock spa_reset(char *pool) 33841544Seschrock { 33857214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 33868211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 33871544Seschrock } 33881544Seschrock 33891544Seschrock /* 3390789Sahrens * ========================================================================== 3391789Sahrens * Device manipulation 3392789Sahrens * ========================================================================== 3393789Sahrens */ 3394789Sahrens 3395789Sahrens /* 33964527Sperrin * Add a device to a storage pool. 3397789Sahrens */ 3398789Sahrens int 3399789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 3400789Sahrens { 340110594SGeorge.Wilson@Sun.COM uint64_t txg, id; 34028241SJeff.Bonwick@Sun.COM int error; 3403789Sahrens vdev_t *rvd = spa->spa_root_vdev; 34041585Sbonwick vdev_t *vd, *tvd; 34055450Sbrendan nvlist_t **spares, **l2cache; 34065450Sbrendan uint_t nspares, nl2cache; 3407789Sahrens 3408789Sahrens txg = spa_vdev_enter(spa); 3409789Sahrens 34102082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 34112082Seschrock VDEV_ALLOC_ADD)) != 0) 34122082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 34132082Seschrock 34147754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 3415789Sahrens 34165450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 34175450Sbrendan &nspares) != 0) 34182082Seschrock nspares = 0; 34192082Seschrock 34205450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 34215450Sbrendan &nl2cache) != 0) 34225450Sbrendan nl2cache = 0; 34235450Sbrendan 34247754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 34252082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 34267754SJeff.Bonwick@Sun.COM 34277754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 34287754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 34297754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 34302082Seschrock 34313377Seschrock /* 34325450Sbrendan * We must validate the spares and l2cache devices after checking the 34335450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 34343377Seschrock */ 34357754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 34363377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 34373377Seschrock 34383377Seschrock /* 34393377Seschrock * Transfer each new top-level vdev from vd to rvd. 34403377Seschrock */ 34418241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 344210594SGeorge.Wilson@Sun.COM 344310594SGeorge.Wilson@Sun.COM /* 344410594SGeorge.Wilson@Sun.COM * Set the vdev id to the first hole, if one exists. 344510594SGeorge.Wilson@Sun.COM */ 344610594SGeorge.Wilson@Sun.COM for (id = 0; id < rvd->vdev_children; id++) { 344710594SGeorge.Wilson@Sun.COM if (rvd->vdev_child[id]->vdev_ishole) { 344810594SGeorge.Wilson@Sun.COM vdev_free(rvd->vdev_child[id]); 344910594SGeorge.Wilson@Sun.COM break; 345010594SGeorge.Wilson@Sun.COM } 345110594SGeorge.Wilson@Sun.COM } 34523377Seschrock tvd = vd->vdev_child[c]; 34533377Seschrock vdev_remove_child(vd, tvd); 345410594SGeorge.Wilson@Sun.COM tvd->vdev_id = id; 34553377Seschrock vdev_add_child(rvd, tvd); 34563377Seschrock vdev_config_dirty(tvd); 34573377Seschrock } 34583377Seschrock 34592082Seschrock if (nspares != 0) { 34605450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 34615450Sbrendan ZPOOL_CONFIG_SPARES); 34622082Seschrock spa_load_spares(spa); 34635450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 34645450Sbrendan } 34655450Sbrendan 34665450Sbrendan if (nl2cache != 0) { 34675450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 34685450Sbrendan ZPOOL_CONFIG_L2CACHE); 34695450Sbrendan spa_load_l2cache(spa); 34705450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3471789Sahrens } 3472789Sahrens 3473789Sahrens /* 34741585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 34751585Sbonwick * If other threads start allocating from these vdevs before we 34761585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 34771585Sbonwick * fail to open the pool because there are DVAs that the config cache 34781585Sbonwick * can't translate. Therefore, we first add the vdevs without 34791585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 34801635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 34811585Sbonwick * 34821585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 34831585Sbonwick * if we lose power at any point in this sequence, the remaining 34841585Sbonwick * steps will be completed the next time we load the pool. 3485789Sahrens */ 34861635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 34871585Sbonwick 34881635Sbonwick mutex_enter(&spa_namespace_lock); 34891635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 34901635Sbonwick mutex_exit(&spa_namespace_lock); 3491789Sahrens 34921635Sbonwick return (0); 3493789Sahrens } 3494789Sahrens 3495789Sahrens /* 3496789Sahrens * Attach a device to a mirror. The arguments are the path to any device 3497789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3498789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3499789Sahrens * 3500789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3501789Sahrens * existing device; in this case the two devices are made into their own 35024451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3503789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3504789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3505789Sahrens * completion of resilvering, the first disk (the one being replaced) 3506789Sahrens * is automatically detached. 3507789Sahrens */ 3508789Sahrens int 35091544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3510789Sahrens { 3511789Sahrens uint64_t txg, open_txg; 3512789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3513789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 35142082Seschrock vdev_ops_t *pvops; 35157313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 35167313SEric.Kustarz@Sun.COM int newvd_isspare; 35177313SEric.Kustarz@Sun.COM int error; 3518789Sahrens 3519789Sahrens txg = spa_vdev_enter(spa); 3520789Sahrens 35216643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3522789Sahrens 3523789Sahrens if (oldvd == NULL) 3524789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3525789Sahrens 35261585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 35271585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35281585Sbonwick 3529789Sahrens pvd = oldvd->vdev_parent; 3530789Sahrens 35312082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 35324451Seschrock VDEV_ALLOC_ADD)) != 0) 35334451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 35344451Seschrock 35354451Seschrock if (newrootvd->vdev_children != 1) 3536789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3537789Sahrens 3538789Sahrens newvd = newrootvd->vdev_child[0]; 3539789Sahrens 3540789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3541789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3542789Sahrens 35432082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3544789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3545789Sahrens 35464527Sperrin /* 35474527Sperrin * Spares can't replace logs 35484527Sperrin */ 35497326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 35504527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 35514527Sperrin 35522082Seschrock if (!replacing) { 35532082Seschrock /* 35542082Seschrock * For attach, the only allowable parent is a mirror or the root 35552082Seschrock * vdev. 35562082Seschrock */ 35572082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 35582082Seschrock pvd->vdev_ops != &vdev_root_ops) 35592082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 35602082Seschrock 35612082Seschrock pvops = &vdev_mirror_ops; 35622082Seschrock } else { 35632082Seschrock /* 35642082Seschrock * Active hot spares can only be replaced by inactive hot 35652082Seschrock * spares. 35662082Seschrock */ 35672082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 35682082Seschrock pvd->vdev_child[1] == oldvd && 35692082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 35702082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 35712082Seschrock 35722082Seschrock /* 35732082Seschrock * If the source is a hot spare, and the parent isn't already a 35742082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 35753377Seschrock * want to create a replacing vdev. The user is not allowed to 35763377Seschrock * attach to a spared vdev child unless the 'isspare' state is 35773377Seschrock * the same (spare replaces spare, non-spare replaces 35783377Seschrock * non-spare). 35792082Seschrock */ 35802082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) 35812082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 35823377Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 35833377Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 35843377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 35852082Seschrock else if (pvd->vdev_ops != &vdev_spare_ops && 35862082Seschrock newvd->vdev_isspare) 35872082Seschrock pvops = &vdev_spare_ops; 35882082Seschrock else 35892082Seschrock pvops = &vdev_replacing_ops; 35902082Seschrock } 35912082Seschrock 35921175Slling /* 35939816SGeorge.Wilson@Sun.COM * Make sure the new device is big enough. 35941175Slling */ 35959816SGeorge.Wilson@Sun.COM if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3596789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3597789Sahrens 35981732Sbonwick /* 35991732Sbonwick * The new device cannot have a higher alignment requirement 36001732Sbonwick * than the top-level vdev. 36011732Sbonwick */ 36021732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3603789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3604789Sahrens 3605789Sahrens /* 3606789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3607789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3608789Sahrens */ 3609789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3610789Sahrens spa_strfree(oldvd->vdev_path); 3611789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3612789Sahrens KM_SLEEP); 3613789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3614789Sahrens newvd->vdev_path, "old"); 3615789Sahrens if (oldvd->vdev_devid != NULL) { 3616789Sahrens spa_strfree(oldvd->vdev_devid); 3617789Sahrens oldvd->vdev_devid = NULL; 3618789Sahrens } 3619789Sahrens } 3620789Sahrens 3621789Sahrens /* 36222082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 36232082Seschrock * mirror/replacing/spare vdev above oldvd. 3624789Sahrens */ 3625789Sahrens if (pvd->vdev_ops != pvops) 3626789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3627789Sahrens 3628789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3629789Sahrens ASSERT(pvd->vdev_ops == pvops); 3630789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3631789Sahrens 3632789Sahrens /* 3633789Sahrens * Extract the new device from its root and add it to pvd. 3634789Sahrens */ 3635789Sahrens vdev_remove_child(newrootvd, newvd); 3636789Sahrens newvd->vdev_id = pvd->vdev_children; 363710594SGeorge.Wilson@Sun.COM newvd->vdev_crtxg = oldvd->vdev_crtxg; 3638789Sahrens vdev_add_child(pvd, newvd); 3639789Sahrens 3640789Sahrens tvd = newvd->vdev_top; 3641789Sahrens ASSERT(pvd->vdev_top == tvd); 3642789Sahrens ASSERT(tvd->vdev_parent == rvd); 3643789Sahrens 3644789Sahrens vdev_config_dirty(tvd); 3645789Sahrens 3646789Sahrens /* 3647789Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3648789Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3649789Sahrens */ 3650789Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3651789Sahrens 36528241SJeff.Bonwick@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, 36538241SJeff.Bonwick@Sun.COM TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3654789Sahrens 36559425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 36563377Seschrock spa_spare_activate(newvd); 36579425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 36589425SEric.Schrock@Sun.COM } 36599425SEric.Schrock@Sun.COM 36607754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 36617754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 36627313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 36631544Seschrock 3664789Sahrens /* 3665789Sahrens * Mark newvd's DTL dirty in this txg. 3666789Sahrens */ 36671732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3668789Sahrens 3669789Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3670789Sahrens 36719946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 36729946SMark.Musante@Sun.COM CRED(), "%s vdev=%s %s vdev=%s", 36739946SMark.Musante@Sun.COM replacing && newvd_isspare ? "spare in" : 36749946SMark.Musante@Sun.COM replacing ? "replace" : "attach", newvdpath, 36759946SMark.Musante@Sun.COM replacing ? "for" : "to", oldvdpath); 36767313SEric.Kustarz@Sun.COM 36777313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 36787313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 36797313SEric.Kustarz@Sun.COM 3680789Sahrens /* 36817046Sahrens * Kick off a resilver to update newvd. 3682789Sahrens */ 36837046Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3684789Sahrens 3685789Sahrens return (0); 3686789Sahrens } 3687789Sahrens 3688789Sahrens /* 3689789Sahrens * Detach a device from a mirror or replacing vdev. 3690789Sahrens * If 'replace_done' is specified, only detach if the parent 3691789Sahrens * is a replacing vdev. 3692789Sahrens */ 3693789Sahrens int 36948241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3695789Sahrens { 3696789Sahrens uint64_t txg; 36978241SJeff.Bonwick@Sun.COM int error; 3698789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3699789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 37002082Seschrock boolean_t unspare = B_FALSE; 37012082Seschrock uint64_t unspare_guid; 37026673Seschrock size_t len; 3703789Sahrens 3704789Sahrens txg = spa_vdev_enter(spa); 3705789Sahrens 37066643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3707789Sahrens 3708789Sahrens if (vd == NULL) 3709789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3710789Sahrens 37111585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 37121585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37131585Sbonwick 3714789Sahrens pvd = vd->vdev_parent; 3715789Sahrens 3716789Sahrens /* 37178241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 37188241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 37198241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 37208241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 37218241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 37228241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 37238241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 37248241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 37258241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 37268241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 37278241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 37288241SJeff.Bonwick@Sun.COM */ 37298241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 37308241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 37318241SJeff.Bonwick@Sun.COM 37328241SJeff.Bonwick@Sun.COM /* 3733789Sahrens * If replace_done is specified, only remove this device if it's 37342082Seschrock * the first child of a replacing vdev. For the 'spare' vdev, either 37352082Seschrock * disk can be removed. 3736789Sahrens */ 37372082Seschrock if (replace_done) { 37382082Seschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 37392082Seschrock if (vd->vdev_id != 0) 37402082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37412082Seschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 37422082Seschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37432082Seschrock } 37442082Seschrock } 37452082Seschrock 37462082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 37474577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3748789Sahrens 3749789Sahrens /* 37502082Seschrock * Only mirror, replacing, and spare vdevs support detach. 3751789Sahrens */ 3752789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 37532082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 37542082Seschrock pvd->vdev_ops != &vdev_spare_ops) 3755789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3756789Sahrens 3757789Sahrens /* 37588241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 37598241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 3760789Sahrens */ 37618241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 3762789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3763789Sahrens 37648241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 37658241SJeff.Bonwick@Sun.COM 3766789Sahrens /* 37676673Seschrock * If we are detaching the second disk from a replacing vdev, then 37686673Seschrock * check to see if we changed the original vdev's path to have "/old" 37696673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 37706673Seschrock */ 37716673Seschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 37726673Seschrock pvd->vdev_child[0]->vdev_path != NULL && 37736673Seschrock pvd->vdev_child[1]->vdev_path != NULL) { 37746673Seschrock ASSERT(pvd->vdev_child[1] == vd); 37756673Seschrock cvd = pvd->vdev_child[0]; 37766673Seschrock len = strlen(vd->vdev_path); 37776673Seschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 37786673Seschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 37796673Seschrock spa_strfree(cvd->vdev_path); 37806673Seschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 37816673Seschrock } 37826673Seschrock } 37836673Seschrock 37846673Seschrock /* 37852082Seschrock * If we are detaching the original disk from a spare, then it implies 37862082Seschrock * that the spare should become a real disk, and be removed from the 37872082Seschrock * active spare list for the pool. 37882082Seschrock */ 37892082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 37908241SJeff.Bonwick@Sun.COM vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 37912082Seschrock unspare = B_TRUE; 37922082Seschrock 37932082Seschrock /* 3794789Sahrens * Erase the disk labels so the disk can be used for other things. 3795789Sahrens * This must be done after all other error cases are handled, 3796789Sahrens * but before we disembowel vd (so we can still do I/O to it). 3797789Sahrens * But if we can't do it, don't treat the error as fatal -- 3798789Sahrens * it may be that the unwritability of the disk is the reason 3799789Sahrens * it's being detached! 3800789Sahrens */ 38013377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3802789Sahrens 3803789Sahrens /* 3804789Sahrens * Remove vd from its parent and compact the parent's children. 3805789Sahrens */ 3806789Sahrens vdev_remove_child(pvd, vd); 3807789Sahrens vdev_compact_children(pvd); 3808789Sahrens 3809789Sahrens /* 3810789Sahrens * Remember one of the remaining children so we can get tvd below. 3811789Sahrens */ 3812789Sahrens cvd = pvd->vdev_child[0]; 3813789Sahrens 3814789Sahrens /* 38152082Seschrock * If we need to remove the remaining child from the list of hot spares, 38168241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 38178241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 38188241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 38198241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 38208241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 38218241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 38222082Seschrock */ 38232082Seschrock if (unspare) { 38242082Seschrock ASSERT(cvd->vdev_isspare); 38253377Seschrock spa_spare_remove(cvd); 38262082Seschrock unspare_guid = cvd->vdev_guid; 38278241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 38282082Seschrock } 38292082Seschrock 38302082Seschrock /* 3831789Sahrens * If the parent mirror/replacing vdev only has one child, 3832789Sahrens * the parent is no longer needed. Remove it from the tree. 3833789Sahrens */ 3834789Sahrens if (pvd->vdev_children == 1) 3835789Sahrens vdev_remove_parent(cvd); 3836789Sahrens 3837789Sahrens /* 3838789Sahrens * We don't set tvd until now because the parent we just removed 3839789Sahrens * may have been the previous top-level vdev. 3840789Sahrens */ 3841789Sahrens tvd = cvd->vdev_top; 3842789Sahrens ASSERT(tvd->vdev_parent == rvd); 3843789Sahrens 3844789Sahrens /* 38453377Seschrock * Reevaluate the parent vdev state. 3846789Sahrens */ 38474451Seschrock vdev_propagate_state(cvd); 3848789Sahrens 3849789Sahrens /* 38509816SGeorge.Wilson@Sun.COM * If the 'autoexpand' property is set on the pool then automatically 38519816SGeorge.Wilson@Sun.COM * try to expand the size of the pool. For example if the device we 38529816SGeorge.Wilson@Sun.COM * just detached was smaller than the others, it may be possible to 38539816SGeorge.Wilson@Sun.COM * add metaslabs (i.e. grow the pool). We need to reopen the vdev 38549816SGeorge.Wilson@Sun.COM * first so that we can obtain the updated sizes of the leaf vdevs. 3855789Sahrens */ 38569816SGeorge.Wilson@Sun.COM if (spa->spa_autoexpand) { 38579816SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 38589816SGeorge.Wilson@Sun.COM vdev_expand(tvd, txg); 38599816SGeorge.Wilson@Sun.COM } 3860789Sahrens 3861789Sahrens vdev_config_dirty(tvd); 3862789Sahrens 3863789Sahrens /* 38643377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 38653377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 38663377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 38673377Seschrock * prevent vd from being accessed after it's freed. 3868789Sahrens */ 38698241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 3870789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 38711732Sbonwick vd->vdev_detached = B_TRUE; 38721732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3873789Sahrens 38744451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 38754451Seschrock 38762082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 38772082Seschrock 38782082Seschrock /* 38793377Seschrock * If this was the removal of the original device in a hot spare vdev, 38803377Seschrock * then we want to go through and remove the device from the hot spare 38813377Seschrock * list of every other pool. 38822082Seschrock */ 38832082Seschrock if (unspare) { 38848241SJeff.Bonwick@Sun.COM spa_t *myspa = spa; 38852082Seschrock spa = NULL; 38862082Seschrock mutex_enter(&spa_namespace_lock); 38872082Seschrock while ((spa = spa_next(spa)) != NULL) { 38882082Seschrock if (spa->spa_state != POOL_STATE_ACTIVE) 38892082Seschrock continue; 38908241SJeff.Bonwick@Sun.COM if (spa == myspa) 38918241SJeff.Bonwick@Sun.COM continue; 38927793SJeff.Bonwick@Sun.COM spa_open_ref(spa, FTAG); 38937793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 38942082Seschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 38957793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 38967793SJeff.Bonwick@Sun.COM spa_close(spa, FTAG); 38972082Seschrock } 38982082Seschrock mutex_exit(&spa_namespace_lock); 38992082Seschrock } 39002082Seschrock 39012082Seschrock return (error); 39022082Seschrock } 39032082Seschrock 39047754SJeff.Bonwick@Sun.COM static nvlist_t * 39057754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 39062082Seschrock { 39077754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 39087754SJeff.Bonwick@Sun.COM uint64_t guid; 39097754SJeff.Bonwick@Sun.COM 39107754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 39117754SJeff.Bonwick@Sun.COM &guid) == 0); 39127754SJeff.Bonwick@Sun.COM 39137754SJeff.Bonwick@Sun.COM if (guid == target_guid) 39147754SJeff.Bonwick@Sun.COM return (nvpp[i]); 39152082Seschrock } 39162082Seschrock 39177754SJeff.Bonwick@Sun.COM return (NULL); 39185450Sbrendan } 39195450Sbrendan 39207754SJeff.Bonwick@Sun.COM static void 39217754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 39227754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 39235450Sbrendan { 39247754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 39257754SJeff.Bonwick@Sun.COM 39267754SJeff.Bonwick@Sun.COM if (count > 1) 39277754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 39287754SJeff.Bonwick@Sun.COM 39297754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 39307754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 39317754SJeff.Bonwick@Sun.COM continue; 39327754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 39335450Sbrendan } 39345450Sbrendan 39357754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 39367754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 39377754SJeff.Bonwick@Sun.COM 39387754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 39397754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 39407754SJeff.Bonwick@Sun.COM 39417754SJeff.Bonwick@Sun.COM if (count > 1) 39427754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 39435450Sbrendan } 39445450Sbrendan 39455450Sbrendan /* 394610594SGeorge.Wilson@Sun.COM * Removing a device from the vdev namespace requires several steps 394710594SGeorge.Wilson@Sun.COM * and can take a significant amount of time. As a result we use 394810594SGeorge.Wilson@Sun.COM * the spa_vdev_config_[enter/exit] functions which allow us to 394910594SGeorge.Wilson@Sun.COM * grab and release the spa_config_lock while still holding the namespace 395010594SGeorge.Wilson@Sun.COM * lock. During each step the configuration is synced out. 395110594SGeorge.Wilson@Sun.COM */ 395210594SGeorge.Wilson@Sun.COM 395310594SGeorge.Wilson@Sun.COM /* 395410594SGeorge.Wilson@Sun.COM * Evacuate the device. 395510594SGeorge.Wilson@Sun.COM */ 395610594SGeorge.Wilson@Sun.COM int 395710594SGeorge.Wilson@Sun.COM spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 395810594SGeorge.Wilson@Sun.COM { 395910974SJeff.Bonwick@Sun.COM int error = 0; 396010594SGeorge.Wilson@Sun.COM uint64_t txg; 396110594SGeorge.Wilson@Sun.COM 396210594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 396310594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 396410922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 396510594SGeorge.Wilson@Sun.COM 396610594SGeorge.Wilson@Sun.COM /* 396710594SGeorge.Wilson@Sun.COM * Evacuate the device. We don't hold the config lock as writer 396810594SGeorge.Wilson@Sun.COM * since we need to do I/O but we do keep the 396910594SGeorge.Wilson@Sun.COM * spa_namespace_lock held. Once this completes the device 397010594SGeorge.Wilson@Sun.COM * should no longer have any blocks allocated on it. 397110594SGeorge.Wilson@Sun.COM */ 397210594SGeorge.Wilson@Sun.COM if (vd->vdev_islog) { 397310974SJeff.Bonwick@Sun.COM error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 397410974SJeff.Bonwick@Sun.COM NULL, DS_FIND_CHILDREN); 397510974SJeff.Bonwick@Sun.COM } else { 397610974SJeff.Bonwick@Sun.COM error = ENOTSUP; /* until we have bp rewrite */ 397710594SGeorge.Wilson@Sun.COM } 397810594SGeorge.Wilson@Sun.COM 397910974SJeff.Bonwick@Sun.COM txg_wait_synced(spa_get_dsl(spa), 0); 398010974SJeff.Bonwick@Sun.COM 398110974SJeff.Bonwick@Sun.COM if (error) 398210974SJeff.Bonwick@Sun.COM return (error); 398310974SJeff.Bonwick@Sun.COM 398410594SGeorge.Wilson@Sun.COM /* 398510974SJeff.Bonwick@Sun.COM * The evacuation succeeded. Remove any remaining MOS metadata 398610974SJeff.Bonwick@Sun.COM * associated with this vdev, and wait for these changes to sync. 398710594SGeorge.Wilson@Sun.COM */ 398810594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 398910594SGeorge.Wilson@Sun.COM vd->vdev_removing = B_TRUE; 399010594SGeorge.Wilson@Sun.COM vdev_dirty(vd, 0, NULL, txg); 399110594SGeorge.Wilson@Sun.COM vdev_config_dirty(vd); 399210594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 399310594SGeorge.Wilson@Sun.COM 399410594SGeorge.Wilson@Sun.COM return (0); 399510594SGeorge.Wilson@Sun.COM } 399610594SGeorge.Wilson@Sun.COM 399710594SGeorge.Wilson@Sun.COM /* 399810594SGeorge.Wilson@Sun.COM * Complete the removal by cleaning up the namespace. 399910594SGeorge.Wilson@Sun.COM */ 400010594SGeorge.Wilson@Sun.COM void 400110974SJeff.Bonwick@Sun.COM spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) 400210594SGeorge.Wilson@Sun.COM { 400310594SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 400410594SGeorge.Wilson@Sun.COM uint64_t id = vd->vdev_id; 400510594SGeorge.Wilson@Sun.COM boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 400610594SGeorge.Wilson@Sun.COM 400710594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 400810594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 400910922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 401010594SGeorge.Wilson@Sun.COM 401110594SGeorge.Wilson@Sun.COM (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 401210922SJeff.Bonwick@Sun.COM 401310922SJeff.Bonwick@Sun.COM if (list_link_active(&vd->vdev_state_dirty_node)) 401410922SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 401510922SJeff.Bonwick@Sun.COM if (list_link_active(&vd->vdev_config_dirty_node)) 401610922SJeff.Bonwick@Sun.COM vdev_config_clean(vd); 401710922SJeff.Bonwick@Sun.COM 401810594SGeorge.Wilson@Sun.COM vdev_free(vd); 401910594SGeorge.Wilson@Sun.COM 402010594SGeorge.Wilson@Sun.COM if (last_vdev) { 402110594SGeorge.Wilson@Sun.COM vdev_compact_children(rvd); 402210594SGeorge.Wilson@Sun.COM } else { 402310594SGeorge.Wilson@Sun.COM vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 402410594SGeorge.Wilson@Sun.COM vdev_add_child(rvd, vd); 402510594SGeorge.Wilson@Sun.COM } 402610594SGeorge.Wilson@Sun.COM vdev_config_dirty(rvd); 402710594SGeorge.Wilson@Sun.COM 402810594SGeorge.Wilson@Sun.COM /* 402910594SGeorge.Wilson@Sun.COM * Reassess the health of our root vdev. 403010594SGeorge.Wilson@Sun.COM */ 403110594SGeorge.Wilson@Sun.COM vdev_reopen(rvd); 403210594SGeorge.Wilson@Sun.COM } 403310594SGeorge.Wilson@Sun.COM 403410594SGeorge.Wilson@Sun.COM /* 40355450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 403610594SGeorge.Wilson@Sun.COM * spares, slogs, and level 2 ARC devices. 40375450Sbrendan */ 40385450Sbrendan int 40395450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 40405450Sbrendan { 40415450Sbrendan vdev_t *vd; 404210974SJeff.Bonwick@Sun.COM metaslab_group_t *mg; 40437754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 404410594SGeorge.Wilson@Sun.COM uint64_t txg = 0; 40455450Sbrendan uint_t nspares, nl2cache; 40465450Sbrendan int error = 0; 40478241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 40488241SJeff.Bonwick@Sun.COM 40498241SJeff.Bonwick@Sun.COM if (!locked) 40508241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 40515450Sbrendan 40526643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 40535450Sbrendan 40545450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 40555450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 40567754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 40577754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 40587754SJeff.Bonwick@Sun.COM /* 40597754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 40607754SJeff.Bonwick@Sun.COM * in this pool. 40617754SJeff.Bonwick@Sun.COM */ 40627754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 40637754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 40647754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 40657754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 40667754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 40677754SJeff.Bonwick@Sun.COM } else { 40687754SJeff.Bonwick@Sun.COM error = EBUSY; 40697754SJeff.Bonwick@Sun.COM } 40707754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 40715450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 40727754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 40737754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 40747754SJeff.Bonwick@Sun.COM /* 40757754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 40767754SJeff.Bonwick@Sun.COM */ 40777754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 40787754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 40795450Sbrendan spa_load_l2cache(spa); 40805450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 408110594SGeorge.Wilson@Sun.COM } else if (vd != NULL && vd->vdev_islog) { 408210594SGeorge.Wilson@Sun.COM ASSERT(!locked); 408310922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 408410594SGeorge.Wilson@Sun.COM 408510594SGeorge.Wilson@Sun.COM /* 408610594SGeorge.Wilson@Sun.COM * XXX - Once we have bp-rewrite this should 408710594SGeorge.Wilson@Sun.COM * become the common case. 408810594SGeorge.Wilson@Sun.COM */ 408910594SGeorge.Wilson@Sun.COM 409010974SJeff.Bonwick@Sun.COM mg = vd->vdev_mg; 409110974SJeff.Bonwick@Sun.COM 409210594SGeorge.Wilson@Sun.COM /* 409310974SJeff.Bonwick@Sun.COM * Stop allocating from this vdev. 409410594SGeorge.Wilson@Sun.COM */ 409510974SJeff.Bonwick@Sun.COM metaslab_group_passivate(mg); 409610594SGeorge.Wilson@Sun.COM 409710922SJeff.Bonwick@Sun.COM /* 409810922SJeff.Bonwick@Sun.COM * Wait for the youngest allocations and frees to sync, 409910922SJeff.Bonwick@Sun.COM * and then wait for the deferral of those frees to finish. 410010922SJeff.Bonwick@Sun.COM */ 410110922SJeff.Bonwick@Sun.COM spa_vdev_config_exit(spa, NULL, 410210922SJeff.Bonwick@Sun.COM txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 410310922SJeff.Bonwick@Sun.COM 410410974SJeff.Bonwick@Sun.COM /* 410510974SJeff.Bonwick@Sun.COM * Attempt to evacuate the vdev. 410610974SJeff.Bonwick@Sun.COM */ 410710974SJeff.Bonwick@Sun.COM error = spa_vdev_remove_evacuate(spa, vd); 410810974SJeff.Bonwick@Sun.COM 410910594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 411010594SGeorge.Wilson@Sun.COM 411110974SJeff.Bonwick@Sun.COM /* 411210974SJeff.Bonwick@Sun.COM * If we couldn't evacuate the vdev, unwind. 411310974SJeff.Bonwick@Sun.COM */ 411410974SJeff.Bonwick@Sun.COM if (error) { 411510974SJeff.Bonwick@Sun.COM metaslab_group_activate(mg); 411610974SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 411710974SJeff.Bonwick@Sun.COM } 411810974SJeff.Bonwick@Sun.COM 411910974SJeff.Bonwick@Sun.COM /* 412010974SJeff.Bonwick@Sun.COM * Clean up the vdev namespace. 412110974SJeff.Bonwick@Sun.COM */ 412210974SJeff.Bonwick@Sun.COM spa_vdev_remove_from_namespace(spa, vd); 412310594SGeorge.Wilson@Sun.COM 41247754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 41257754SJeff.Bonwick@Sun.COM /* 41267754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 41277754SJeff.Bonwick@Sun.COM */ 41287754SJeff.Bonwick@Sun.COM error = ENOTSUP; 41297754SJeff.Bonwick@Sun.COM } else { 41307754SJeff.Bonwick@Sun.COM /* 41317754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 41327754SJeff.Bonwick@Sun.COM */ 41337754SJeff.Bonwick@Sun.COM error = ENOENT; 41345450Sbrendan } 41352082Seschrock 41368241SJeff.Bonwick@Sun.COM if (!locked) 41378241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 41388241SJeff.Bonwick@Sun.COM 41398241SJeff.Bonwick@Sun.COM return (error); 4140789Sahrens } 4141789Sahrens 4142789Sahrens /* 41434451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 41444451Seschrock * current spared, so we can detach it. 4145789Sahrens */ 41461544Seschrock static vdev_t * 41474451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 4148789Sahrens { 41491544Seschrock vdev_t *newvd, *oldvd; 41509816SGeorge.Wilson@Sun.COM 41519816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 41524451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 41531544Seschrock if (oldvd != NULL) 41541544Seschrock return (oldvd); 41551544Seschrock } 4156789Sahrens 41574451Seschrock /* 41584451Seschrock * Check for a completed replacement. 41594451Seschrock */ 4160789Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 41611544Seschrock oldvd = vd->vdev_child[0]; 41621544Seschrock newvd = vd->vdev_child[1]; 4163789Sahrens 41648241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 41658241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 41661544Seschrock return (oldvd); 41671544Seschrock } 4168789Sahrens 41694451Seschrock /* 41704451Seschrock * Check for a completed resilver with the 'unspare' flag set. 41714451Seschrock */ 41724451Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 41734451Seschrock newvd = vd->vdev_child[0]; 41744451Seschrock oldvd = vd->vdev_child[1]; 41754451Seschrock 41764451Seschrock if (newvd->vdev_unspare && 41778241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 41788241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) { 41794451Seschrock newvd->vdev_unspare = 0; 41804451Seschrock return (oldvd); 41814451Seschrock } 41824451Seschrock } 41834451Seschrock 41841544Seschrock return (NULL); 4185789Sahrens } 4186789Sahrens 41871544Seschrock static void 41884451Seschrock spa_vdev_resilver_done(spa_t *spa) 4189789Sahrens { 41908241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 41918241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 41928241SJeff.Bonwick@Sun.COM 41938241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4194789Sahrens 41954451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 41968241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 41978241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 41981544Seschrock guid = vd->vdev_guid; 41998241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 42008241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 42018241SJeff.Bonwick@Sun.COM sguid = 0; 42022082Seschrock /* 42032082Seschrock * If we have just finished replacing a hot spared device, then 42042082Seschrock * we need to detach the parent's first child (the original hot 42052082Seschrock * spare) as well. 42062082Seschrock */ 42078241SJeff.Bonwick@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 42082082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 42098241SJeff.Bonwick@Sun.COM ASSERT(ppvd->vdev_children == 2); 42108241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 42112082Seschrock } 42128241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 42138241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 42141544Seschrock return; 42158241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 42162082Seschrock return; 42178241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4218789Sahrens } 4219789Sahrens 42208241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 4221789Sahrens } 4222789Sahrens 4223789Sahrens /* 422411041SEric.Taylor@Sun.COM * Update the stored path or FRU for this vdev. 42251354Seschrock */ 42261354Seschrock int 42279425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 42289425SEric.Schrock@Sun.COM boolean_t ispath) 42291354Seschrock { 42306643Seschrock vdev_t *vd; 423111041SEric.Taylor@Sun.COM 423211041SEric.Taylor@Sun.COM spa_vdev_state_enter(spa, SCL_ALL); 42331354Seschrock 42349425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 423511041SEric.Taylor@Sun.COM return (spa_vdev_state_exit(spa, NULL, ENOENT)); 42361354Seschrock 42371585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 423811041SEric.Taylor@Sun.COM return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 42391585Sbonwick 42409425SEric.Schrock@Sun.COM if (ispath) { 42419425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_path); 42429425SEric.Schrock@Sun.COM vd->vdev_path = spa_strdup(value); 42439425SEric.Schrock@Sun.COM } else { 42449425SEric.Schrock@Sun.COM if (vd->vdev_fru != NULL) 42459425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 42469425SEric.Schrock@Sun.COM vd->vdev_fru = spa_strdup(value); 42479425SEric.Schrock@Sun.COM } 42481354Seschrock 424911041SEric.Taylor@Sun.COM return (spa_vdev_state_exit(spa, vd, 0)); 42501354Seschrock } 42511354Seschrock 42529425SEric.Schrock@Sun.COM int 42539425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 42549425SEric.Schrock@Sun.COM { 42559425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 42569425SEric.Schrock@Sun.COM } 42579425SEric.Schrock@Sun.COM 42589425SEric.Schrock@Sun.COM int 42599425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 42609425SEric.Schrock@Sun.COM { 42619425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 42629425SEric.Schrock@Sun.COM } 42639425SEric.Schrock@Sun.COM 42641354Seschrock /* 4265789Sahrens * ========================================================================== 4266789Sahrens * SPA Scrubbing 4267789Sahrens * ========================================================================== 4268789Sahrens */ 4269789Sahrens 42707046Sahrens int 42717046Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 4272789Sahrens { 42737754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 42744808Sek110237 4275789Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 4276789Sahrens return (ENOTSUP); 4277789Sahrens 4278789Sahrens /* 42797046Sahrens * If a resilver was requested, but there is no DTL on a 42807046Sahrens * writeable leaf device, we have nothing to do. 4281789Sahrens */ 42827046Sahrens if (type == POOL_SCRUB_RESILVER && 42837046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 42847046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 42851544Seschrock return (0); 42861544Seschrock } 4287789Sahrens 42887046Sahrens if (type == POOL_SCRUB_EVERYTHING && 42897046Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 42907046Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 42917046Sahrens return (EBUSY); 42927046Sahrens 42937046Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 42947046Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 42957046Sahrens } else if (type == POOL_SCRUB_NONE) { 42967046Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 42971544Seschrock } else { 42987046Sahrens return (EINVAL); 42991544Seschrock } 4300789Sahrens } 4301789Sahrens 43021544Seschrock /* 43031544Seschrock * ========================================================================== 43041544Seschrock * SPA async task processing 43051544Seschrock * ========================================================================== 43061544Seschrock */ 43071544Seschrock 43081544Seschrock static void 43094451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 4310789Sahrens { 43117361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 43127361SBrendan.Gregg@Sun.COM vd->vdev_remove_wanted = 0; 43137361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 431410575SEric.Schrock@Sun.COM 431510575SEric.Schrock@Sun.COM /* 431610575SEric.Schrock@Sun.COM * We want to clear the stats, but we don't want to do a full 431710575SEric.Schrock@Sun.COM * vdev_clear() as that will cause us to throw away 431810575SEric.Schrock@Sun.COM * degraded/faulted state as well as attempt to reopen the 431910575SEric.Schrock@Sun.COM * device, all of which is a waste. 432010575SEric.Schrock@Sun.COM */ 432110575SEric.Schrock@Sun.COM vd->vdev_stat.vs_read_errors = 0; 432210575SEric.Schrock@Sun.COM vd->vdev_stat.vs_write_errors = 0; 432310575SEric.Schrock@Sun.COM vd->vdev_stat.vs_checksum_errors = 0; 432410575SEric.Schrock@Sun.COM 43257754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 43261544Seschrock } 43277361SBrendan.Gregg@Sun.COM 43287754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 43297361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 43301544Seschrock } 43311544Seschrock 43321544Seschrock static void 43337754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 43347754SJeff.Bonwick@Sun.COM { 43357754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 43367754SJeff.Bonwick@Sun.COM vd->vdev_probe_wanted = 0; 43377754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 43387754SJeff.Bonwick@Sun.COM } 43397754SJeff.Bonwick@Sun.COM 43407754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 43417754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 43427754SJeff.Bonwick@Sun.COM } 43437754SJeff.Bonwick@Sun.COM 43447754SJeff.Bonwick@Sun.COM static void 43459816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa_t *spa, vdev_t *vd) 43469816SGeorge.Wilson@Sun.COM { 43479816SGeorge.Wilson@Sun.COM sysevent_id_t eid; 43489816SGeorge.Wilson@Sun.COM nvlist_t *attr; 43499816SGeorge.Wilson@Sun.COM char *physpath; 43509816SGeorge.Wilson@Sun.COM 43519816SGeorge.Wilson@Sun.COM if (!spa->spa_autoexpand) 43529816SGeorge.Wilson@Sun.COM return; 43539816SGeorge.Wilson@Sun.COM 43549816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 43559816SGeorge.Wilson@Sun.COM vdev_t *cvd = vd->vdev_child[c]; 43569816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, cvd); 43579816SGeorge.Wilson@Sun.COM } 43589816SGeorge.Wilson@Sun.COM 43599816SGeorge.Wilson@Sun.COM if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 43609816SGeorge.Wilson@Sun.COM return; 43619816SGeorge.Wilson@Sun.COM 43629816SGeorge.Wilson@Sun.COM physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 43639816SGeorge.Wilson@Sun.COM (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 43649816SGeorge.Wilson@Sun.COM 43659816SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 43669816SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 43679816SGeorge.Wilson@Sun.COM 43689816SGeorge.Wilson@Sun.COM (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 43699816SGeorge.Wilson@Sun.COM ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 43709816SGeorge.Wilson@Sun.COM 43719816SGeorge.Wilson@Sun.COM nvlist_free(attr); 43729816SGeorge.Wilson@Sun.COM kmem_free(physpath, MAXPATHLEN); 43739816SGeorge.Wilson@Sun.COM } 43749816SGeorge.Wilson@Sun.COM 43759816SGeorge.Wilson@Sun.COM static void 43761544Seschrock spa_async_thread(spa_t *spa) 43771544Seschrock { 43787754SJeff.Bonwick@Sun.COM int tasks; 43791544Seschrock 43801544Seschrock ASSERT(spa->spa_sync_on); 4381789Sahrens 43821544Seschrock mutex_enter(&spa->spa_async_lock); 43831544Seschrock tasks = spa->spa_async_tasks; 43841544Seschrock spa->spa_async_tasks = 0; 43851544Seschrock mutex_exit(&spa->spa_async_lock); 43861544Seschrock 43871544Seschrock /* 43881635Sbonwick * See if the config needs to be updated. 43891635Sbonwick */ 43901635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 439110922SJeff.Bonwick@Sun.COM uint64_t old_space, new_space; 43929816SGeorge.Wilson@Sun.COM 43931635Sbonwick mutex_enter(&spa_namespace_lock); 439410922SJeff.Bonwick@Sun.COM old_space = metaslab_class_get_space(spa_normal_class(spa)); 43951635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 439610922SJeff.Bonwick@Sun.COM new_space = metaslab_class_get_space(spa_normal_class(spa)); 43971635Sbonwick mutex_exit(&spa_namespace_lock); 43989816SGeorge.Wilson@Sun.COM 43999816SGeorge.Wilson@Sun.COM /* 44009816SGeorge.Wilson@Sun.COM * If the pool grew as a result of the config update, 44019816SGeorge.Wilson@Sun.COM * then log an internal history event. 44029816SGeorge.Wilson@Sun.COM */ 440310922SJeff.Bonwick@Sun.COM if (new_space != old_space) { 44049946SMark.Musante@Sun.COM spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 44059946SMark.Musante@Sun.COM spa, NULL, CRED(), 44069946SMark.Musante@Sun.COM "pool '%s' size: %llu(+%llu)", 440710922SJeff.Bonwick@Sun.COM spa_name(spa), new_space, new_space - old_space); 44089816SGeorge.Wilson@Sun.COM } 44091635Sbonwick } 44101635Sbonwick 44111635Sbonwick /* 44124451Seschrock * See if any devices need to be marked REMOVED. 44131544Seschrock */ 44147754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 441510685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 44164451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 44177754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 44187361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 44197754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 44207361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 44217754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 44227754SJeff.Bonwick@Sun.COM } 44237754SJeff.Bonwick@Sun.COM 44249816SGeorge.Wilson@Sun.COM if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 44259816SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 44269816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, spa->spa_root_vdev); 44279816SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 44289816SGeorge.Wilson@Sun.COM } 44299816SGeorge.Wilson@Sun.COM 44307754SJeff.Bonwick@Sun.COM /* 44317754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 44327754SJeff.Bonwick@Sun.COM */ 44337754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 443410685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 44357754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 44367754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 44374451Seschrock } 44381544Seschrock 44391544Seschrock /* 44401544Seschrock * If any devices are done replacing, detach them. 44411544Seschrock */ 44424451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 44434451Seschrock spa_vdev_resilver_done(spa); 4444789Sahrens 44451544Seschrock /* 44461544Seschrock * Kick off a resilver. 44471544Seschrock */ 44487046Sahrens if (tasks & SPA_ASYNC_RESILVER) 44497046Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 44501544Seschrock 44511544Seschrock /* 44521544Seschrock * Let the world know that we're done. 44531544Seschrock */ 44541544Seschrock mutex_enter(&spa->spa_async_lock); 44551544Seschrock spa->spa_async_thread = NULL; 44561544Seschrock cv_broadcast(&spa->spa_async_cv); 44571544Seschrock mutex_exit(&spa->spa_async_lock); 44581544Seschrock thread_exit(); 44591544Seschrock } 44601544Seschrock 44611544Seschrock void 44621544Seschrock spa_async_suspend(spa_t *spa) 44631544Seschrock { 44641544Seschrock mutex_enter(&spa->spa_async_lock); 44651544Seschrock spa->spa_async_suspended++; 44661544Seschrock while (spa->spa_async_thread != NULL) 44671544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 44681544Seschrock mutex_exit(&spa->spa_async_lock); 44691544Seschrock } 44701544Seschrock 44711544Seschrock void 44721544Seschrock spa_async_resume(spa_t *spa) 44731544Seschrock { 44741544Seschrock mutex_enter(&spa->spa_async_lock); 44751544Seschrock ASSERT(spa->spa_async_suspended != 0); 44761544Seschrock spa->spa_async_suspended--; 44771544Seschrock mutex_exit(&spa->spa_async_lock); 44781544Seschrock } 44791544Seschrock 44801544Seschrock static void 44811544Seschrock spa_async_dispatch(spa_t *spa) 44821544Seschrock { 44831544Seschrock mutex_enter(&spa->spa_async_lock); 44841544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 44851635Sbonwick spa->spa_async_thread == NULL && 44861635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 44871544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 44881544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 44891544Seschrock mutex_exit(&spa->spa_async_lock); 44901544Seschrock } 44911544Seschrock 44921544Seschrock void 44931544Seschrock spa_async_request(spa_t *spa, int task) 44941544Seschrock { 44951544Seschrock mutex_enter(&spa->spa_async_lock); 44961544Seschrock spa->spa_async_tasks |= task; 44971544Seschrock mutex_exit(&spa->spa_async_lock); 4498789Sahrens } 4499789Sahrens 4500789Sahrens /* 4501789Sahrens * ========================================================================== 4502789Sahrens * SPA syncing routines 4503789Sahrens * ========================================================================== 4504789Sahrens */ 4505789Sahrens static void 450610922SJeff.Bonwick@Sun.COM spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg) 4507789Sahrens { 4508789Sahrens blkptr_t blk; 4509789Sahrens uint64_t itor = 0; 4510789Sahrens uint8_t c = 1; 4511789Sahrens 45127754SJeff.Bonwick@Sun.COM while (bplist_iterate(bpl, &itor, &blk) == 0) { 45137754SJeff.Bonwick@Sun.COM ASSERT(blk.blk_birth < txg); 451410922SJeff.Bonwick@Sun.COM zio_free(spa, txg, &blk); 45157754SJeff.Bonwick@Sun.COM } 4516789Sahrens 4517789Sahrens bplist_vacate(bpl, tx); 4518789Sahrens 4519789Sahrens /* 4520789Sahrens * Pre-dirty the first block so we sync to convergence faster. 4521789Sahrens * (Usually only the first block is needed.) 4522789Sahrens */ 452310922SJeff.Bonwick@Sun.COM dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx); 452410922SJeff.Bonwick@Sun.COM } 452510922SJeff.Bonwick@Sun.COM 452610922SJeff.Bonwick@Sun.COM static void 452710922SJeff.Bonwick@Sun.COM spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 452810922SJeff.Bonwick@Sun.COM { 452910922SJeff.Bonwick@Sun.COM zio_t *zio = arg; 453010922SJeff.Bonwick@Sun.COM 453110922SJeff.Bonwick@Sun.COM zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 453210922SJeff.Bonwick@Sun.COM zio->io_flags)); 4533789Sahrens } 4534789Sahrens 4535789Sahrens static void 45362082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 45372082Seschrock { 45382082Seschrock char *packed = NULL; 45397497STim.Haley@Sun.COM size_t bufsize; 45402082Seschrock size_t nvsize = 0; 45412082Seschrock dmu_buf_t *db; 45422082Seschrock 45432082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 45442082Seschrock 45457497STim.Haley@Sun.COM /* 45467497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 45477497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 45487497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 45497497STim.Haley@Sun.COM */ 45507497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 45517497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 45522082Seschrock 45532082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 45542082Seschrock KM_SLEEP) == 0); 45557497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 45567497STim.Haley@Sun.COM 45577497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 45587497STim.Haley@Sun.COM 45597497STim.Haley@Sun.COM kmem_free(packed, bufsize); 45602082Seschrock 45612082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 45622082Seschrock dmu_buf_will_dirty(db, tx); 45632082Seschrock *(uint64_t *)db->db_data = nvsize; 45642082Seschrock dmu_buf_rele(db, FTAG); 45652082Seschrock } 45662082Seschrock 45672082Seschrock static void 45685450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 45695450Sbrendan const char *config, const char *entry) 45702082Seschrock { 45712082Seschrock nvlist_t *nvroot; 45725450Sbrendan nvlist_t **list; 45732082Seschrock int i; 45742082Seschrock 45755450Sbrendan if (!sav->sav_sync) 45762082Seschrock return; 45772082Seschrock 45782082Seschrock /* 45795450Sbrendan * Update the MOS nvlist describing the list of available devices. 45805450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 45814451Seschrock * valid and the vdevs are labeled appropriately. 45822082Seschrock */ 45835450Sbrendan if (sav->sav_object == 0) { 45845450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 45855450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 45865450Sbrendan sizeof (uint64_t), tx); 45872082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 45885450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 45895450Sbrendan &sav->sav_object, tx) == 0); 45902082Seschrock } 45912082Seschrock 45922082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 45935450Sbrendan if (sav->sav_count == 0) { 45945450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 45952082Seschrock } else { 45965450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 45975450Sbrendan for (i = 0; i < sav->sav_count; i++) 45985450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 45995450Sbrendan B_FALSE, B_FALSE, B_TRUE); 46005450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 46015450Sbrendan sav->sav_count) == 0); 46025450Sbrendan for (i = 0; i < sav->sav_count; i++) 46035450Sbrendan nvlist_free(list[i]); 46045450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 46052082Seschrock } 46062082Seschrock 46075450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 46082926Sek110237 nvlist_free(nvroot); 46092082Seschrock 46105450Sbrendan sav->sav_sync = B_FALSE; 46112082Seschrock } 46122082Seschrock 46132082Seschrock static void 4614789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 4615789Sahrens { 4616789Sahrens nvlist_t *config; 4617789Sahrens 46187754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 4619789Sahrens return; 4620789Sahrens 46217754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 46227754SJeff.Bonwick@Sun.COM 46237754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 46247754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 46257754SJeff.Bonwick@Sun.COM 46267754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 4627789Sahrens 46281635Sbonwick if (spa->spa_config_syncing) 46291635Sbonwick nvlist_free(spa->spa_config_syncing); 46301635Sbonwick spa->spa_config_syncing = config; 4631789Sahrens 46322082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 4633789Sahrens } 4634789Sahrens 46355094Slling /* 46365094Slling * Set zpool properties. 46375094Slling */ 46383912Slling static void 46394543Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 46403912Slling { 46413912Slling spa_t *spa = arg1; 46425094Slling objset_t *mos = spa->spa_meta_objset; 46433912Slling nvlist_t *nvp = arg2; 46445094Slling nvpair_t *elem; 46454451Seschrock uint64_t intval; 46466643Seschrock char *strval; 46475094Slling zpool_prop_t prop; 46485094Slling const char *propname; 46495094Slling zprop_type_t proptype; 46505094Slling 46517754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 46527754SJeff.Bonwick@Sun.COM 46535094Slling elem = NULL; 46545094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 46555094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 46565094Slling case ZPOOL_PROP_VERSION: 46575094Slling /* 46585094Slling * Only set version for non-zpool-creation cases 46595094Slling * (set/import). spa_create() needs special care 46605094Slling * for version setting. 46615094Slling */ 46625094Slling if (tx->tx_txg != TXG_INITIAL) { 46635094Slling VERIFY(nvpair_value_uint64(elem, 46645094Slling &intval) == 0); 46655094Slling ASSERT(intval <= SPA_VERSION); 46665094Slling ASSERT(intval >= spa_version(spa)); 46675094Slling spa->spa_uberblock.ub_version = intval; 46685094Slling vdev_config_dirty(spa->spa_root_vdev); 46695094Slling } 46705094Slling break; 46715094Slling 46725094Slling case ZPOOL_PROP_ALTROOT: 46735094Slling /* 46745094Slling * 'altroot' is a non-persistent property. It should 46755094Slling * have been set temporarily at creation or import time. 46765094Slling */ 46775094Slling ASSERT(spa->spa_root != NULL); 46785094Slling break; 46795094Slling 46805363Seschrock case ZPOOL_PROP_CACHEFILE: 46815094Slling /* 46828525SEric.Schrock@Sun.COM * 'cachefile' is also a non-persisitent property. 46835094Slling */ 46844543Smarks break; 46855094Slling default: 46865094Slling /* 46875094Slling * Set pool property values in the poolprops mos object. 46885094Slling */ 46895094Slling if (spa->spa_pool_props_object == 0) { 46905094Slling VERIFY((spa->spa_pool_props_object = 46915094Slling zap_create(mos, DMU_OT_POOL_PROPS, 46925094Slling DMU_OT_NONE, 0, tx)) > 0); 46935094Slling 46945094Slling VERIFY(zap_update(mos, 46955094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 46965094Slling 8, 1, &spa->spa_pool_props_object, tx) 46975094Slling == 0); 46985094Slling } 46995094Slling 47005094Slling /* normalize the property name */ 47015094Slling propname = zpool_prop_to_name(prop); 47025094Slling proptype = zpool_prop_get_type(prop); 47035094Slling 47045094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 47055094Slling ASSERT(proptype == PROP_TYPE_STRING); 47065094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 47075094Slling VERIFY(zap_update(mos, 47085094Slling spa->spa_pool_props_object, propname, 47095094Slling 1, strlen(strval) + 1, strval, tx) == 0); 47105094Slling 47115094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 47125094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 47135094Slling 47145094Slling if (proptype == PROP_TYPE_INDEX) { 47155094Slling const char *unused; 47165094Slling VERIFY(zpool_prop_index_to_string( 47175094Slling prop, intval, &unused) == 0); 47185094Slling } 47195094Slling VERIFY(zap_update(mos, 47205094Slling spa->spa_pool_props_object, propname, 47215094Slling 8, 1, &intval, tx) == 0); 47225094Slling } else { 47235094Slling ASSERT(0); /* not allowed */ 47245094Slling } 47255094Slling 47265329Sgw25295 switch (prop) { 47275329Sgw25295 case ZPOOL_PROP_DELEGATION: 47285094Slling spa->spa_delegation = intval; 47295329Sgw25295 break; 47305329Sgw25295 case ZPOOL_PROP_BOOTFS: 47315094Slling spa->spa_bootfs = intval; 47325329Sgw25295 break; 47335329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 47345329Sgw25295 spa->spa_failmode = intval; 47355329Sgw25295 break; 47369816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 47379816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = intval; 47389816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 47399816SGeorge.Wilson@Sun.COM break; 474010922SJeff.Bonwick@Sun.COM case ZPOOL_PROP_DEDUPDITTO: 474110922SJeff.Bonwick@Sun.COM spa->spa_dedup_ditto = intval; 474210922SJeff.Bonwick@Sun.COM break; 47435329Sgw25295 default: 47445329Sgw25295 break; 47455329Sgw25295 } 47463912Slling } 47475094Slling 47485094Slling /* log internal history if this is not a zpool create */ 47495094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 47505094Slling tx->tx_txg != TXG_INITIAL) { 47515094Slling spa_history_internal_log(LOG_POOL_PROPSET, 47525094Slling spa, tx, cr, "%s %lld %s", 47537754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 47545094Slling } 47553912Slling } 47567754SJeff.Bonwick@Sun.COM 47577754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 47583912Slling } 47593912Slling 4760789Sahrens /* 4761789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4762789Sahrens * part of the process, so we iterate until it converges. 4763789Sahrens */ 4764789Sahrens void 4765789Sahrens spa_sync(spa_t *spa, uint64_t txg) 4766789Sahrens { 4767789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4768789Sahrens objset_t *mos = spa->spa_meta_objset; 476910922SJeff.Bonwick@Sun.COM bplist_t *defer_bpl = &spa->spa_deferred_bplist; 477010922SJeff.Bonwick@Sun.COM bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 47711635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4772789Sahrens vdev_t *vd; 4773789Sahrens dmu_tx_t *tx; 47747754SJeff.Bonwick@Sun.COM int error; 4775789Sahrens 4776789Sahrens /* 4777789Sahrens * Lock out configuration changes. 4778789Sahrens */ 47797754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4780789Sahrens 4781789Sahrens spa->spa_syncing_txg = txg; 4782789Sahrens spa->spa_sync_pass = 0; 4783789Sahrens 47847754SJeff.Bonwick@Sun.COM /* 47857754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 47867754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 47877754SJeff.Bonwick@Sun.COM */ 47887754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 47898241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 47908241SJeff.Bonwick@Sun.COM /* 47918241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 47928241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 47938241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 47948241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 47958241SJeff.Bonwick@Sun.COM * into the root vdev tree. 47968241SJeff.Bonwick@Sun.COM */ 47978241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 47988241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 47998241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 48008241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 48018241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 48028241SJeff.Bonwick@Sun.COM } 48038241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 48048241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 48057754SJeff.Bonwick@Sun.COM } 48067754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 48077754SJeff.Bonwick@Sun.COM 480810922SJeff.Bonwick@Sun.COM VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj)); 4809789Sahrens 48102082Seschrock tx = dmu_tx_create_assigned(dp, txg); 48112082Seschrock 48122082Seschrock /* 48134577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 48142082Seschrock * set spa_deflate if we have no raid-z vdevs. 48152082Seschrock */ 48164577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 48174577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 48182082Seschrock int i; 48192082Seschrock 48202082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 48212082Seschrock vd = rvd->vdev_child[i]; 48222082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 48232082Seschrock break; 48242082Seschrock } 48252082Seschrock if (i == rvd->vdev_children) { 48262082Seschrock spa->spa_deflate = TRUE; 48272082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 48282082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 48292082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 48302082Seschrock } 48312082Seschrock } 48322082Seschrock 48337046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 48347046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 48357046Sahrens dsl_pool_create_origin(dp, tx); 48367046Sahrens 48377046Sahrens /* Keeping the origin open increases spa_minref */ 48387046Sahrens spa->spa_minref += 3; 48397046Sahrens } 48407046Sahrens 48417046Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 48427046Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 48437046Sahrens dsl_pool_upgrade_clones(dp, tx); 48447046Sahrens } 48457046Sahrens 4846789Sahrens /* 4847789Sahrens * If anything has changed in this txg, push the deferred frees 4848789Sahrens * from the previous txg. If not, leave them alone so that we 4849789Sahrens * don't generate work on an otherwise idle system. 4850789Sahrens */ 4851789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 48522329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 48532329Sek110237 !txg_list_empty(&dp->dp_sync_tasks, txg)) 485410922SJeff.Bonwick@Sun.COM spa_sync_deferred_bplist(spa, defer_bpl, tx, txg); 4855789Sahrens 4856789Sahrens /* 4857789Sahrens * Iterate to convergence. 4858789Sahrens */ 4859789Sahrens do { 486010922SJeff.Bonwick@Sun.COM int pass = ++spa->spa_sync_pass; 4861789Sahrens 4862789Sahrens spa_sync_config_object(spa, tx); 48635450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 48645450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 48655450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 48665450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 48671544Seschrock spa_errlog_sync(spa, txg); 4868789Sahrens dsl_pool_sync(dp, txg); 4869789Sahrens 487010922SJeff.Bonwick@Sun.COM if (pass <= SYNC_PASS_DEFERRED_FREE) { 487110922SJeff.Bonwick@Sun.COM zio_t *zio = zio_root(spa, NULL, NULL, 0); 487210922SJeff.Bonwick@Sun.COM bplist_sync(free_bpl, spa_sync_free, zio, tx); 487310922SJeff.Bonwick@Sun.COM VERIFY(zio_wait(zio) == 0); 487410922SJeff.Bonwick@Sun.COM } else { 487510922SJeff.Bonwick@Sun.COM bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx); 4876789Sahrens } 4877789Sahrens 487810922SJeff.Bonwick@Sun.COM ddt_sync(spa, txg); 487910922SJeff.Bonwick@Sun.COM 488010922SJeff.Bonwick@Sun.COM while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 488110922SJeff.Bonwick@Sun.COM vdev_sync(vd, txg); 488210922SJeff.Bonwick@Sun.COM 488310922SJeff.Bonwick@Sun.COM } while (dmu_objset_is_dirty(mos, txg)); 488410922SJeff.Bonwick@Sun.COM 488510922SJeff.Bonwick@Sun.COM ASSERT(free_bpl->bpl_queue == NULL); 488610922SJeff.Bonwick@Sun.COM 488710922SJeff.Bonwick@Sun.COM bplist_close(defer_bpl); 4888789Sahrens 4889789Sahrens /* 4890789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4891789Sahrens * to commit the transaction group. 48921635Sbonwick * 48935688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 48945688Sbonwick * random top-level vdevs that are known to be visible in the 48957754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 48967754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4897789Sahrens */ 48987754SJeff.Bonwick@Sun.COM for (;;) { 48997754SJeff.Bonwick@Sun.COM /* 49007754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 49017754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 49027754SJeff.Bonwick@Sun.COM */ 49037754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 49047754SJeff.Bonwick@Sun.COM 49057754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 49067754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 49077754SJeff.Bonwick@Sun.COM int svdcount = 0; 49087754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 49097754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 49109816SGeorge.Wilson@Sun.COM 49119816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 49127754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 49137754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 49147754SJeff.Bonwick@Sun.COM continue; 49157754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 49167754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 49177754SJeff.Bonwick@Sun.COM break; 49187754SJeff.Bonwick@Sun.COM } 49199725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 49209725SEric.Schrock@Sun.COM if (error != 0) 49219725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 49229725SEric.Schrock@Sun.COM B_TRUE); 49237754SJeff.Bonwick@Sun.COM } else { 49247754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 49259725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 49269725SEric.Schrock@Sun.COM if (error != 0) 49279725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 49289725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 49291635Sbonwick } 49307754SJeff.Bonwick@Sun.COM 49317754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 49327754SJeff.Bonwick@Sun.COM 49337754SJeff.Bonwick@Sun.COM if (error == 0) 49347754SJeff.Bonwick@Sun.COM break; 49357754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 49367754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 49371635Sbonwick } 49382082Seschrock dmu_tx_commit(tx); 49392082Seschrock 49401635Sbonwick /* 49411635Sbonwick * Clear the dirty config list. 49421635Sbonwick */ 49437754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 49441635Sbonwick vdev_config_clean(vd); 49451635Sbonwick 49461635Sbonwick /* 49471635Sbonwick * Now that the new config has synced transactionally, 49481635Sbonwick * let it become visible to the config cache. 49491635Sbonwick */ 49501635Sbonwick if (spa->spa_config_syncing != NULL) { 49511635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 49521635Sbonwick spa->spa_config_txg = txg; 49531635Sbonwick spa->spa_config_syncing = NULL; 49541635Sbonwick } 4955789Sahrens 4956789Sahrens spa->spa_ubsync = spa->spa_uberblock; 4957789Sahrens 495810922SJeff.Bonwick@Sun.COM dsl_pool_sync_done(dp, txg); 4959789Sahrens 4960789Sahrens /* 4961789Sahrens * Update usable space statistics. 4962789Sahrens */ 4963789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4964789Sahrens vdev_sync_done(vd, txg); 4965789Sahrens 496610956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 496710956SGeorge.Wilson@Sun.COM 4968789Sahrens /* 4969789Sahrens * It had better be the case that we didn't dirty anything 49702082Seschrock * since vdev_config_sync(). 4971789Sahrens */ 4972789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4973789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4974789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 497510922SJeff.Bonwick@Sun.COM ASSERT(defer_bpl->bpl_queue == NULL); 497610922SJeff.Bonwick@Sun.COM ASSERT(free_bpl->bpl_queue == NULL); 497710922SJeff.Bonwick@Sun.COM 497810922SJeff.Bonwick@Sun.COM spa->spa_sync_pass = 0; 4979789Sahrens 49807754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 49811544Seschrock 498210921STim.Haley@Sun.COM spa_handle_ignored_writes(spa); 498310921STim.Haley@Sun.COM 49841544Seschrock /* 49851544Seschrock * If any async tasks have been requested, kick them off. 49861544Seschrock */ 49871544Seschrock spa_async_dispatch(spa); 4988789Sahrens } 4989789Sahrens 4990789Sahrens /* 4991789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4992789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4993789Sahrens * sync. 4994789Sahrens */ 4995789Sahrens void 4996789Sahrens spa_sync_allpools(void) 4997789Sahrens { 4998789Sahrens spa_t *spa = NULL; 4999789Sahrens mutex_enter(&spa_namespace_lock); 5000789Sahrens while ((spa = spa_next(spa)) != NULL) { 50017754SJeff.Bonwick@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 5002789Sahrens continue; 5003789Sahrens spa_open_ref(spa, FTAG); 5004789Sahrens mutex_exit(&spa_namespace_lock); 5005789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 5006789Sahrens mutex_enter(&spa_namespace_lock); 5007789Sahrens spa_close(spa, FTAG); 5008789Sahrens } 5009789Sahrens mutex_exit(&spa_namespace_lock); 5010789Sahrens } 5011789Sahrens 5012789Sahrens /* 5013789Sahrens * ========================================================================== 5014789Sahrens * Miscellaneous routines 5015789Sahrens * ========================================================================== 5016789Sahrens */ 5017789Sahrens 5018789Sahrens /* 5019789Sahrens * Remove all pools in the system. 5020789Sahrens */ 5021789Sahrens void 5022789Sahrens spa_evict_all(void) 5023789Sahrens { 5024789Sahrens spa_t *spa; 5025789Sahrens 5026789Sahrens /* 5027789Sahrens * Remove all cached state. All pools should be closed now, 5028789Sahrens * so every spa in the AVL tree should be unreferenced. 5029789Sahrens */ 5030789Sahrens mutex_enter(&spa_namespace_lock); 5031789Sahrens while ((spa = spa_next(NULL)) != NULL) { 5032789Sahrens /* 50331544Seschrock * Stop async tasks. The async thread may need to detach 50341544Seschrock * a device that's been replaced, which requires grabbing 50351544Seschrock * spa_namespace_lock, so we must drop it here. 5036789Sahrens */ 5037789Sahrens spa_open_ref(spa, FTAG); 5038789Sahrens mutex_exit(&spa_namespace_lock); 50391544Seschrock spa_async_suspend(spa); 50404808Sek110237 mutex_enter(&spa_namespace_lock); 5041789Sahrens spa_close(spa, FTAG); 5042789Sahrens 5043789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 5044789Sahrens spa_unload(spa); 5045789Sahrens spa_deactivate(spa); 5046789Sahrens } 5047789Sahrens spa_remove(spa); 5048789Sahrens } 5049789Sahrens mutex_exit(&spa_namespace_lock); 5050789Sahrens } 50511544Seschrock 50521544Seschrock vdev_t * 50539425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 50541544Seschrock { 50556643Seschrock vdev_t *vd; 50566643Seschrock int i; 50576643Seschrock 50586643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 50596643Seschrock return (vd); 50606643Seschrock 50619425SEric.Schrock@Sun.COM if (aux) { 50626643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 50636643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 50646643Seschrock if (vd->vdev_guid == guid) 50656643Seschrock return (vd); 50666643Seschrock } 50679425SEric.Schrock@Sun.COM 50689425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 50699425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 50709425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 50719425SEric.Schrock@Sun.COM return (vd); 50729425SEric.Schrock@Sun.COM } 50736643Seschrock } 50746643Seschrock 50756643Seschrock return (NULL); 50761544Seschrock } 50771760Seschrock 50781760Seschrock void 50795094Slling spa_upgrade(spa_t *spa, uint64_t version) 50801760Seschrock { 50817754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 50821760Seschrock 50831760Seschrock /* 50841760Seschrock * This should only be called for a non-faulted pool, and since a 50851760Seschrock * future version would result in an unopenable pool, this shouldn't be 50861760Seschrock * possible. 50871760Seschrock */ 50884577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 50895094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 50905094Slling 50915094Slling spa->spa_uberblock.ub_version = version; 50921760Seschrock vdev_config_dirty(spa->spa_root_vdev); 50931760Seschrock 50947754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 50952082Seschrock 50962082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 50971760Seschrock } 50982082Seschrock 50992082Seschrock boolean_t 51002082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 51012082Seschrock { 51022082Seschrock int i; 51033377Seschrock uint64_t spareguid; 51045450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 51055450Sbrendan 51065450Sbrendan for (i = 0; i < sav->sav_count; i++) 51075450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 51082082Seschrock return (B_TRUE); 51092082Seschrock 51105450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 51115450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 51125450Sbrendan &spareguid) == 0 && spareguid == guid) 51133377Seschrock return (B_TRUE); 51143377Seschrock } 51153377Seschrock 51162082Seschrock return (B_FALSE); 51172082Seschrock } 51183912Slling 51194451Seschrock /* 51207214Slling * Check if a pool has an active shared spare device. 51217214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 51227214Slling */ 51237214Slling static boolean_t 51247214Slling spa_has_active_shared_spare(spa_t *spa) 51257214Slling { 51267214Slling int i, refcnt; 51277214Slling uint64_t pool; 51287214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 51297214Slling 51307214Slling for (i = 0; i < sav->sav_count; i++) { 51317214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 51327214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 51337214Slling refcnt > 2) 51347214Slling return (B_TRUE); 51357214Slling } 51367214Slling 51377214Slling return (B_FALSE); 51387214Slling } 51397214Slling 51407214Slling /* 51414451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 51424451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 51434451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 51444451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 51454451Seschrock * or zdb as real changes. 51464451Seschrock */ 51474451Seschrock void 51484451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 51494451Seschrock { 51504451Seschrock #ifdef _KERNEL 51514451Seschrock sysevent_t *ev; 51524451Seschrock sysevent_attr_list_t *attr = NULL; 51534451Seschrock sysevent_value_t value; 51544451Seschrock sysevent_id_t eid; 51554451Seschrock 51564451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 51574451Seschrock SE_SLEEP); 51584451Seschrock 51594451Seschrock value.value_type = SE_DATA_TYPE_STRING; 51604451Seschrock value.value.sv_string = spa_name(spa); 51614451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 51624451Seschrock goto done; 51634451Seschrock 51644451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 51654451Seschrock value.value.sv_uint64 = spa_guid(spa); 51664451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 51674451Seschrock goto done; 51684451Seschrock 51694451Seschrock if (vd) { 51704451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 51714451Seschrock value.value.sv_uint64 = vd->vdev_guid; 51724451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 51734451Seschrock SE_SLEEP) != 0) 51744451Seschrock goto done; 51754451Seschrock 51764451Seschrock if (vd->vdev_path) { 51774451Seschrock value.value_type = SE_DATA_TYPE_STRING; 51784451Seschrock value.value.sv_string = vd->vdev_path; 51794451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 51804451Seschrock &value, SE_SLEEP) != 0) 51814451Seschrock goto done; 51824451Seschrock } 51834451Seschrock } 51844451Seschrock 51855756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 51865756Seschrock goto done; 51875756Seschrock attr = NULL; 51885756Seschrock 51894451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 51904451Seschrock 51914451Seschrock done: 51924451Seschrock if (attr) 51934451Seschrock sysevent_free_attr(attr); 51944451Seschrock sysevent_free(ev); 51954451Seschrock #endif 51964451Seschrock } 5197