1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 2312247SGeorge.Wilson@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24789Sahrens */ 25789Sahrens 26789Sahrens /* 27789Sahrens * This file contains all the routines used when modifying on-disk SPA state. 28789Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 29789Sahrens * pool. 30789Sahrens */ 31789Sahrens 32789Sahrens #include <sys/zfs_context.h> 331544Seschrock #include <sys/fm/fs/zfs.h> 34789Sahrens #include <sys/spa_impl.h> 35789Sahrens #include <sys/zio.h> 36789Sahrens #include <sys/zio_checksum.h> 37789Sahrens #include <sys/dmu.h> 38789Sahrens #include <sys/dmu_tx.h> 39789Sahrens #include <sys/zap.h> 40789Sahrens #include <sys/zil.h> 4110922SJeff.Bonwick@Sun.COM #include <sys/ddt.h> 42789Sahrens #include <sys/vdev_impl.h> 43789Sahrens #include <sys/metaslab.h> 4410594SGeorge.Wilson@Sun.COM #include <sys/metaslab_impl.h> 45789Sahrens #include <sys/uberblock_impl.h> 46789Sahrens #include <sys/txg.h> 47789Sahrens #include <sys/avl.h> 48789Sahrens #include <sys/dmu_traverse.h> 493912Slling #include <sys/dmu_objset.h> 50789Sahrens #include <sys/unique.h> 51789Sahrens #include <sys/dsl_pool.h> 523912Slling #include <sys/dsl_dataset.h> 53789Sahrens #include <sys/dsl_dir.h> 54789Sahrens #include <sys/dsl_prop.h> 553912Slling #include <sys/dsl_synctask.h> 56789Sahrens #include <sys/fs/zfs.h> 575450Sbrendan #include <sys/arc.h> 58789Sahrens #include <sys/callb.h> 593975Sek110237 #include <sys/systeminfo.h> 606423Sgw25295 #include <sys/spa_boot.h> 619816SGeorge.Wilson@Sun.COM #include <sys/zfs_ioctl.h> 6212296SLin.Ling@Sun.COM #include <sys/dsl_scan.h> 63789Sahrens 648662SJordan.Vaughan@Sun.com #ifdef _KERNEL 6511173SJonathan.Adams@Sun.COM #include <sys/bootprops.h> 6611173SJonathan.Adams@Sun.COM #include <sys/callb.h> 6711173SJonathan.Adams@Sun.COM #include <sys/cpupart.h> 6811173SJonathan.Adams@Sun.COM #include <sys/pool.h> 6911173SJonathan.Adams@Sun.COM #include <sys/sysdc.h> 708662SJordan.Vaughan@Sun.com #include <sys/zone.h> 718662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 728662SJordan.Vaughan@Sun.com 735094Slling #include "zfs_prop.h" 745913Sperrin #include "zfs_comutil.h" 755094Slling 7611173SJonathan.Adams@Sun.COM typedef enum zti_modes { 779515SJonathan.Adams@Sun.COM zti_mode_fixed, /* value is # of threads (min 1) */ 789515SJonathan.Adams@Sun.COM zti_mode_online_percent, /* value is % of online CPUs */ 7911173SJonathan.Adams@Sun.COM zti_mode_batch, /* cpu-intensive; value is ignored */ 8011146SGeorge.Wilson@Sun.COM zti_mode_null, /* don't create a taskq */ 819515SJonathan.Adams@Sun.COM zti_nmodes 8211173SJonathan.Adams@Sun.COM } zti_modes_t; 832986Sek110237 8411146SGeorge.Wilson@Sun.COM #define ZTI_FIX(n) { zti_mode_fixed, (n) } 8511146SGeorge.Wilson@Sun.COM #define ZTI_PCT(n) { zti_mode_online_percent, (n) } 8611173SJonathan.Adams@Sun.COM #define ZTI_BATCH { zti_mode_batch, 0 } 8711146SGeorge.Wilson@Sun.COM #define ZTI_NULL { zti_mode_null, 0 } 8811146SGeorge.Wilson@Sun.COM 8911146SGeorge.Wilson@Sun.COM #define ZTI_ONE ZTI_FIX(1) 909515SJonathan.Adams@Sun.COM 919515SJonathan.Adams@Sun.COM typedef struct zio_taskq_info { 9211146SGeorge.Wilson@Sun.COM enum zti_modes zti_mode; 9311146SGeorge.Wilson@Sun.COM uint_t zti_value; 949515SJonathan.Adams@Sun.COM } zio_taskq_info_t; 959515SJonathan.Adams@Sun.COM 969515SJonathan.Adams@Sun.COM static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 9711173SJonathan.Adams@Sun.COM "issue", "issue_high", "intr", "intr_high" 989515SJonathan.Adams@Sun.COM }; 999515SJonathan.Adams@Sun.COM 10011146SGeorge.Wilson@Sun.COM /* 10111146SGeorge.Wilson@Sun.COM * Define the taskq threads for the following I/O types: 10211146SGeorge.Wilson@Sun.COM * NULL, READ, WRITE, FREE, CLAIM, and IOCTL 10311146SGeorge.Wilson@Sun.COM */ 10411146SGeorge.Wilson@Sun.COM const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 10511146SGeorge.Wilson@Sun.COM /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ 10611146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 10711173SJonathan.Adams@Sun.COM { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, 10811173SJonathan.Adams@Sun.COM { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, 10912450SGeorge.Wilson@Sun.COM { ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL }, 11011146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 11111146SGeorge.Wilson@Sun.COM { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 1129515SJonathan.Adams@Sun.COM }; 1139515SJonathan.Adams@Sun.COM 11412296SLin.Ling@Sun.COM static dsl_syncfunc_t spa_sync_props; 1157214Slling static boolean_t spa_has_active_shared_spare(spa_t *spa); 11611422SMark.Musante@Sun.COM static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, 11711422SMark.Musante@Sun.COM spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, 11811422SMark.Musante@Sun.COM char **ereport); 11913037SMark.Musante@Sun.COM static void spa_vdev_resilver_done(spa_t *spa); 1205094Slling 12111173SJonathan.Adams@Sun.COM uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ 12211173SJonathan.Adams@Sun.COM id_t zio_taskq_psrset_bind = PS_NONE; 12311173SJonathan.Adams@Sun.COM boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ 12411173SJonathan.Adams@Sun.COM uint_t zio_taskq_basedc = 80; /* base duty cycle */ 12511173SJonathan.Adams@Sun.COM 12611173SJonathan.Adams@Sun.COM boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ 12711173SJonathan.Adams@Sun.COM 12811173SJonathan.Adams@Sun.COM /* 12911173SJonathan.Adams@Sun.COM * This (illegal) pool name is used when temporarily importing a spa_t in order 13011173SJonathan.Adams@Sun.COM * to get the vdev stats associated with the imported devices. 13111173SJonathan.Adams@Sun.COM */ 13211173SJonathan.Adams@Sun.COM #define TRYIMPORT_NAME "$import" 13311173SJonathan.Adams@Sun.COM 1345094Slling /* 1355094Slling * ========================================================================== 1365094Slling * SPA properties routines 1375094Slling * ========================================================================== 1385094Slling */ 1395094Slling 1405094Slling /* 1415094Slling * Add a (source=src, propname=propval) list to an nvlist. 1425094Slling */ 1435949Slling static void 1445094Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 1455094Slling uint64_t intval, zprop_source_t src) 1465094Slling { 1475094Slling const char *propname = zpool_prop_to_name(prop); 1485094Slling nvlist_t *propval; 1495949Slling 1505949Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1515949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 1525949Slling 1535949Slling if (strval != NULL) 1545949Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1555949Slling else 1565949Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 1575949Slling 1585949Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 1595094Slling nvlist_free(propval); 1605094Slling } 1615094Slling 1625094Slling /* 1635094Slling * Get property values from the spa configuration. 1645094Slling */ 1655949Slling static void 1665094Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 1675094Slling { 1688525SEric.Schrock@Sun.COM uint64_t size; 16910956SGeorge.Wilson@Sun.COM uint64_t alloc; 1705094Slling uint64_t cap, version; 1715094Slling zprop_source_t src = ZPROP_SRC_NONE; 1726643Seschrock spa_config_dirent_t *dp; 1735094Slling 1747754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 1757754SJeff.Bonwick@Sun.COM 1768525SEric.Schrock@Sun.COM if (spa->spa_root_vdev != NULL) { 17710956SGeorge.Wilson@Sun.COM alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 17810922SJeff.Bonwick@Sun.COM size = metaslab_class_get_space(spa_normal_class(spa)); 1798525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1808525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 18110956SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 18210956SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 18310956SGeorge.Wilson@Sun.COM size - alloc, src); 184*13049SGeorge.Wilson@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, 185*13049SGeorge.Wilson@Sun.COM (spa_mode(spa) == FREAD), src); 18610956SGeorge.Wilson@Sun.COM 18710956SGeorge.Wilson@Sun.COM cap = (size == 0) ? 0 : (alloc * 100 / size); 1888525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 1898525SEric.Schrock@Sun.COM 19010922SJeff.Bonwick@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 19110922SJeff.Bonwick@Sun.COM ddt_get_pool_dedup_ratio(spa), src); 19210922SJeff.Bonwick@Sun.COM 1938525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1948525SEric.Schrock@Sun.COM spa->spa_root_vdev->vdev_state, src); 1958525SEric.Schrock@Sun.COM 1968525SEric.Schrock@Sun.COM version = spa_version(spa); 1978525SEric.Schrock@Sun.COM if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 1988525SEric.Schrock@Sun.COM src = ZPROP_SRC_DEFAULT; 1998525SEric.Schrock@Sun.COM else 2008525SEric.Schrock@Sun.COM src = ZPROP_SRC_LOCAL; 2018525SEric.Schrock@Sun.COM spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 2028525SEric.Schrock@Sun.COM } 2035949Slling 2045949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 2055949Slling 2065949Slling if (spa->spa_root != NULL) 2075949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 2085949Slling 0, ZPROP_SRC_LOCAL); 2095094Slling 2106643Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 2116643Seschrock if (dp->scd_path == NULL) { 2125949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 2136643Seschrock "none", 0, ZPROP_SRC_LOCAL); 2146643Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 2155949Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 2166643Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 2175363Seschrock } 2185363Seschrock } 2195094Slling } 2205094Slling 2215094Slling /* 2225094Slling * Get zpool property values. 2235094Slling */ 2245094Slling int 2255094Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 2265094Slling { 22710922SJeff.Bonwick@Sun.COM objset_t *mos = spa->spa_meta_objset; 2285094Slling zap_cursor_t zc; 2295094Slling zap_attribute_t za; 2305094Slling int err; 2315094Slling 2325949Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2335094Slling 2347754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 2357754SJeff.Bonwick@Sun.COM 2365094Slling /* 2375094Slling * Get properties from the spa config. 2385094Slling */ 2395949Slling spa_prop_get_config(spa, nvp); 2405094Slling 2415094Slling /* If no pool property object, no more prop to get. */ 24211619SGeorge.Wilson@Sun.COM if (mos == NULL || spa->spa_pool_props_object == 0) { 2435094Slling mutex_exit(&spa->spa_props_lock); 2445094Slling return (0); 2455094Slling } 2465094Slling 2475094Slling /* 2485094Slling * Get properties from the MOS pool property object. 2495094Slling */ 2505094Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 2515094Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 2525094Slling zap_cursor_advance(&zc)) { 2535094Slling uint64_t intval = 0; 2545094Slling char *strval = NULL; 2555094Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 2565094Slling zpool_prop_t prop; 2575094Slling 2585094Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 2595094Slling continue; 2605094Slling 2615094Slling switch (za.za_integer_length) { 2625094Slling case 8: 2635094Slling /* integer property */ 2645094Slling if (za.za_first_integer != 2655094Slling zpool_prop_default_numeric(prop)) 2665094Slling src = ZPROP_SRC_LOCAL; 2675094Slling 2685094Slling if (prop == ZPOOL_PROP_BOOTFS) { 2695094Slling dsl_pool_t *dp; 2705094Slling dsl_dataset_t *ds = NULL; 2715094Slling 2725094Slling dp = spa_get_dsl(spa); 2735094Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2746689Smaybee if (err = dsl_dataset_hold_obj(dp, 2756689Smaybee za.za_first_integer, FTAG, &ds)) { 2765094Slling rw_exit(&dp->dp_config_rwlock); 2775094Slling break; 2785094Slling } 2795094Slling 2805094Slling strval = kmem_alloc( 2815094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 2825094Slling KM_SLEEP); 2835094Slling dsl_dataset_name(ds, strval); 2846689Smaybee dsl_dataset_rele(ds, FTAG); 2855094Slling rw_exit(&dp->dp_config_rwlock); 2865094Slling } else { 2875094Slling strval = NULL; 2885094Slling intval = za.za_first_integer; 2895094Slling } 2905094Slling 2915949Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 2925094Slling 2935094Slling if (strval != NULL) 2945094Slling kmem_free(strval, 2955094Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 2965094Slling 2975094Slling break; 2985094Slling 2995094Slling case 1: 3005094Slling /* string property */ 3015094Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 3025094Slling err = zap_lookup(mos, spa->spa_pool_props_object, 3035094Slling za.za_name, 1, za.za_num_integers, strval); 3045094Slling if (err) { 3055094Slling kmem_free(strval, za.za_num_integers); 3065094Slling break; 3075094Slling } 3085949Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 3095094Slling kmem_free(strval, za.za_num_integers); 3105094Slling break; 3115094Slling 3125094Slling default: 3135094Slling break; 3145094Slling } 3155094Slling } 3165094Slling zap_cursor_fini(&zc); 3175094Slling mutex_exit(&spa->spa_props_lock); 3185094Slling out: 3195094Slling if (err && err != ENOENT) { 3205094Slling nvlist_free(*nvp); 3215949Slling *nvp = NULL; 3225094Slling return (err); 3235094Slling } 3245094Slling 3255094Slling return (0); 3265094Slling } 3275094Slling 3285094Slling /* 3295094Slling * Validate the given pool properties nvlist and modify the list 3305094Slling * for the property values to be set. 3315094Slling */ 3325094Slling static int 3335094Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 3345094Slling { 3355094Slling nvpair_t *elem; 3365094Slling int error = 0, reset_bootfs = 0; 3375094Slling uint64_t objnum; 3385094Slling 3395094Slling elem = NULL; 3405094Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 3415094Slling zpool_prop_t prop; 3425094Slling char *propname, *strval; 3435094Slling uint64_t intval; 3445094Slling objset_t *os; 3455363Seschrock char *slash; 3465094Slling 3475094Slling propname = nvpair_name(elem); 3485094Slling 3495094Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 3505094Slling return (EINVAL); 3515094Slling 3525094Slling switch (prop) { 3535094Slling case ZPOOL_PROP_VERSION: 3545094Slling error = nvpair_value_uint64(elem, &intval); 3555094Slling if (!error && 3565094Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 3575094Slling error = EINVAL; 3585094Slling break; 3595094Slling 3605094Slling case ZPOOL_PROP_DELEGATION: 3615094Slling case ZPOOL_PROP_AUTOREPLACE: 3627538SRichard.Morris@Sun.COM case ZPOOL_PROP_LISTSNAPS: 3639816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 3645094Slling error = nvpair_value_uint64(elem, &intval); 3655094Slling if (!error && intval > 1) 3665094Slling error = EINVAL; 3675094Slling break; 3685094Slling 3695094Slling case ZPOOL_PROP_BOOTFS: 3709630SJeff.Bonwick@Sun.COM /* 3719630SJeff.Bonwick@Sun.COM * If the pool version is less than SPA_VERSION_BOOTFS, 3729630SJeff.Bonwick@Sun.COM * or the pool is still being created (version == 0), 3739630SJeff.Bonwick@Sun.COM * the bootfs property cannot be set. 3749630SJeff.Bonwick@Sun.COM */ 3755094Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 3765094Slling error = ENOTSUP; 3775094Slling break; 3785094Slling } 3795094Slling 3805094Slling /* 3817042Sgw25295 * Make sure the vdev config is bootable 3825094Slling */ 3837042Sgw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 3845094Slling error = ENOTSUP; 3855094Slling break; 3865094Slling } 3875094Slling 3885094Slling reset_bootfs = 1; 3895094Slling 3905094Slling error = nvpair_value_string(elem, &strval); 3915094Slling 3925094Slling if (!error) { 3937042Sgw25295 uint64_t compress; 3947042Sgw25295 3955094Slling if (strval == NULL || strval[0] == '\0') { 3965094Slling objnum = zpool_prop_default_numeric( 3975094Slling ZPOOL_PROP_BOOTFS); 3985094Slling break; 3995094Slling } 4005094Slling 40110298SMatthew.Ahrens@Sun.COM if (error = dmu_objset_hold(strval, FTAG, &os)) 4025094Slling break; 4037042Sgw25295 40410298SMatthew.Ahrens@Sun.COM /* Must be ZPL and not gzip compressed. */ 40510298SMatthew.Ahrens@Sun.COM 40610298SMatthew.Ahrens@Sun.COM if (dmu_objset_type(os) != DMU_OST_ZFS) { 40710298SMatthew.Ahrens@Sun.COM error = ENOTSUP; 40810298SMatthew.Ahrens@Sun.COM } else if ((error = dsl_prop_get_integer(strval, 4097042Sgw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 4107042Sgw25295 &compress, NULL)) == 0 && 4117042Sgw25295 !BOOTFS_COMPRESS_VALID(compress)) { 4127042Sgw25295 error = ENOTSUP; 4137042Sgw25295 } else { 4147042Sgw25295 objnum = dmu_objset_id(os); 4157042Sgw25295 } 41610298SMatthew.Ahrens@Sun.COM dmu_objset_rele(os, FTAG); 4175094Slling } 4185094Slling break; 4197754SJeff.Bonwick@Sun.COM 4205329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 4215329Sgw25295 error = nvpair_value_uint64(elem, &intval); 4225329Sgw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 4235329Sgw25295 intval > ZIO_FAILURE_MODE_PANIC)) 4245329Sgw25295 error = EINVAL; 4255329Sgw25295 4265329Sgw25295 /* 4275329Sgw25295 * This is a special case which only occurs when 4285329Sgw25295 * the pool has completely failed. This allows 4295329Sgw25295 * the user to change the in-core failmode property 4305329Sgw25295 * without syncing it out to disk (I/Os might 4315329Sgw25295 * currently be blocked). We do this by returning 4325329Sgw25295 * EIO to the caller (spa_prop_set) to trick it 4335329Sgw25295 * into thinking we encountered a property validation 4345329Sgw25295 * error. 4355329Sgw25295 */ 4367754SJeff.Bonwick@Sun.COM if (!error && spa_suspended(spa)) { 4375329Sgw25295 spa->spa_failmode = intval; 4385329Sgw25295 error = EIO; 4395329Sgw25295 } 4405329Sgw25295 break; 4415363Seschrock 4425363Seschrock case ZPOOL_PROP_CACHEFILE: 4435363Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4445363Seschrock break; 4455363Seschrock 4465363Seschrock if (strval[0] == '\0') 4475363Seschrock break; 4485363Seschrock 4495363Seschrock if (strcmp(strval, "none") == 0) 4505363Seschrock break; 4515363Seschrock 4525363Seschrock if (strval[0] != '/') { 4535363Seschrock error = EINVAL; 4545363Seschrock break; 4555363Seschrock } 4565363Seschrock 4575363Seschrock slash = strrchr(strval, '/'); 4585363Seschrock ASSERT(slash != NULL); 4595363Seschrock 4605363Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4615363Seschrock strcmp(slash, "/..") == 0) 4625363Seschrock error = EINVAL; 4635363Seschrock break; 46410922SJeff.Bonwick@Sun.COM 46510922SJeff.Bonwick@Sun.COM case ZPOOL_PROP_DEDUPDITTO: 46610922SJeff.Bonwick@Sun.COM if (spa_version(spa) < SPA_VERSION_DEDUP) 46710922SJeff.Bonwick@Sun.COM error = ENOTSUP; 46810922SJeff.Bonwick@Sun.COM else 46910922SJeff.Bonwick@Sun.COM error = nvpair_value_uint64(elem, &intval); 47010922SJeff.Bonwick@Sun.COM if (error == 0 && 47110922SJeff.Bonwick@Sun.COM intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 47210922SJeff.Bonwick@Sun.COM error = EINVAL; 47310922SJeff.Bonwick@Sun.COM break; 4745094Slling } 4755094Slling 4765094Slling if (error) 4775094Slling break; 4785094Slling } 4795094Slling 4805094Slling if (!error && reset_bootfs) { 4815094Slling error = nvlist_remove(props, 4825094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 4835094Slling 4845094Slling if (!error) { 4855094Slling error = nvlist_add_uint64(props, 4865094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 4875094Slling } 4885094Slling } 4895094Slling 4905094Slling return (error); 4915094Slling } 4925094Slling 4938525SEric.Schrock@Sun.COM void 4948525SEric.Schrock@Sun.COM spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 4958525SEric.Schrock@Sun.COM { 4968525SEric.Schrock@Sun.COM char *cachefile; 4978525SEric.Schrock@Sun.COM spa_config_dirent_t *dp; 4988525SEric.Schrock@Sun.COM 4998525SEric.Schrock@Sun.COM if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 5008525SEric.Schrock@Sun.COM &cachefile) != 0) 5018525SEric.Schrock@Sun.COM return; 5028525SEric.Schrock@Sun.COM 5038525SEric.Schrock@Sun.COM dp = kmem_alloc(sizeof (spa_config_dirent_t), 5048525SEric.Schrock@Sun.COM KM_SLEEP); 5058525SEric.Schrock@Sun.COM 5068525SEric.Schrock@Sun.COM if (cachefile[0] == '\0') 5078525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(spa_config_path); 5088525SEric.Schrock@Sun.COM else if (strcmp(cachefile, "none") == 0) 5098525SEric.Schrock@Sun.COM dp->scd_path = NULL; 5108525SEric.Schrock@Sun.COM else 5118525SEric.Schrock@Sun.COM dp->scd_path = spa_strdup(cachefile); 5128525SEric.Schrock@Sun.COM 5138525SEric.Schrock@Sun.COM list_insert_head(&spa->spa_config_list, dp); 5148525SEric.Schrock@Sun.COM if (need_sync) 5158525SEric.Schrock@Sun.COM spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 5168525SEric.Schrock@Sun.COM } 5178525SEric.Schrock@Sun.COM 5185094Slling int 5195094Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 5205094Slling { 5215094Slling int error; 5228525SEric.Schrock@Sun.COM nvpair_t *elem; 5238525SEric.Schrock@Sun.COM boolean_t need_sync = B_FALSE; 5248525SEric.Schrock@Sun.COM zpool_prop_t prop; 5255094Slling 5265094Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 5275094Slling return (error); 5285094Slling 5298525SEric.Schrock@Sun.COM elem = NULL; 5308525SEric.Schrock@Sun.COM while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 5318525SEric.Schrock@Sun.COM if ((prop = zpool_name_to_prop( 5328525SEric.Schrock@Sun.COM nvpair_name(elem))) == ZPROP_INVAL) 5338525SEric.Schrock@Sun.COM return (EINVAL); 5348525SEric.Schrock@Sun.COM 535*13049SGeorge.Wilson@Sun.COM if (prop == ZPOOL_PROP_CACHEFILE || 536*13049SGeorge.Wilson@Sun.COM prop == ZPOOL_PROP_ALTROOT || 537*13049SGeorge.Wilson@Sun.COM prop == ZPOOL_PROP_READONLY) 5388525SEric.Schrock@Sun.COM continue; 5398525SEric.Schrock@Sun.COM 5408525SEric.Schrock@Sun.COM need_sync = B_TRUE; 5418525SEric.Schrock@Sun.COM break; 5428525SEric.Schrock@Sun.COM } 5438525SEric.Schrock@Sun.COM 5448525SEric.Schrock@Sun.COM if (need_sync) 5458525SEric.Schrock@Sun.COM return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 5468525SEric.Schrock@Sun.COM spa, nvp, 3)); 5478525SEric.Schrock@Sun.COM else 5488525SEric.Schrock@Sun.COM return (0); 5495094Slling } 5505094Slling 5515094Slling /* 5525094Slling * If the bootfs property value is dsobj, clear it. 5535094Slling */ 5545094Slling void 5555094Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 5565094Slling { 5575094Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 5585094Slling VERIFY(zap_remove(spa->spa_meta_objset, 5595094Slling spa->spa_pool_props_object, 5605094Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 5615094Slling spa->spa_bootfs = 0; 5625094Slling } 5635094Slling } 5645094Slling 565789Sahrens /* 566789Sahrens * ========================================================================== 567789Sahrens * SPA state manipulation (open/create/destroy/import/export) 568789Sahrens * ========================================================================== 569789Sahrens */ 570789Sahrens 5711544Seschrock static int 5721544Seschrock spa_error_entry_compare(const void *a, const void *b) 5731544Seschrock { 5741544Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 5751544Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 5761544Seschrock int ret; 5771544Seschrock 5781544Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 5791544Seschrock sizeof (zbookmark_t)); 5801544Seschrock 5811544Seschrock if (ret < 0) 5821544Seschrock return (-1); 5831544Seschrock else if (ret > 0) 5841544Seschrock return (1); 5851544Seschrock else 5861544Seschrock return (0); 5871544Seschrock } 5881544Seschrock 5891544Seschrock /* 5901544Seschrock * Utility function which retrieves copies of the current logs and 5911544Seschrock * re-initializes them in the process. 5921544Seschrock */ 5931544Seschrock void 5941544Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 5951544Seschrock { 5961544Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 5971544Seschrock 5981544Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 5991544Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 6001544Seschrock 6011544Seschrock avl_create(&spa->spa_errlist_scrub, 6021544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6031544Seschrock offsetof(spa_error_entry_t, se_avl)); 6041544Seschrock avl_create(&spa->spa_errlist_last, 6051544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 6061544Seschrock offsetof(spa_error_entry_t, se_avl)); 6071544Seschrock } 6081544Seschrock 60911173SJonathan.Adams@Sun.COM static taskq_t * 61011173SJonathan.Adams@Sun.COM spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, 61111173SJonathan.Adams@Sun.COM uint_t value) 61211173SJonathan.Adams@Sun.COM { 61311173SJonathan.Adams@Sun.COM uint_t flags = TASKQ_PREPOPULATE; 61411173SJonathan.Adams@Sun.COM boolean_t batch = B_FALSE; 61511173SJonathan.Adams@Sun.COM 61611173SJonathan.Adams@Sun.COM switch (mode) { 61711173SJonathan.Adams@Sun.COM case zti_mode_null: 61811173SJonathan.Adams@Sun.COM return (NULL); /* no taskq needed */ 61911173SJonathan.Adams@Sun.COM 62011173SJonathan.Adams@Sun.COM case zti_mode_fixed: 62111173SJonathan.Adams@Sun.COM ASSERT3U(value, >=, 1); 62211173SJonathan.Adams@Sun.COM value = MAX(value, 1); 62311173SJonathan.Adams@Sun.COM break; 62411173SJonathan.Adams@Sun.COM 62511173SJonathan.Adams@Sun.COM case zti_mode_batch: 62611173SJonathan.Adams@Sun.COM batch = B_TRUE; 62711173SJonathan.Adams@Sun.COM flags |= TASKQ_THREADS_CPU_PCT; 62811173SJonathan.Adams@Sun.COM value = zio_taskq_batch_pct; 62911173SJonathan.Adams@Sun.COM break; 63011173SJonathan.Adams@Sun.COM 63111173SJonathan.Adams@Sun.COM case zti_mode_online_percent: 63211173SJonathan.Adams@Sun.COM flags |= TASKQ_THREADS_CPU_PCT; 63311173SJonathan.Adams@Sun.COM break; 63411173SJonathan.Adams@Sun.COM 63511173SJonathan.Adams@Sun.COM default: 63611173SJonathan.Adams@Sun.COM panic("unrecognized mode for %s taskq (%u:%u) in " 63711173SJonathan.Adams@Sun.COM "spa_activate()", 63811173SJonathan.Adams@Sun.COM name, mode, value); 63911173SJonathan.Adams@Sun.COM break; 64011173SJonathan.Adams@Sun.COM } 64111173SJonathan.Adams@Sun.COM 64211173SJonathan.Adams@Sun.COM if (zio_taskq_sysdc && spa->spa_proc != &p0) { 64311173SJonathan.Adams@Sun.COM if (batch) 64411173SJonathan.Adams@Sun.COM flags |= TASKQ_DC_BATCH; 64511173SJonathan.Adams@Sun.COM 64611173SJonathan.Adams@Sun.COM return (taskq_create_sysdc(name, value, 50, INT_MAX, 64711173SJonathan.Adams@Sun.COM spa->spa_proc, zio_taskq_basedc, flags)); 64811173SJonathan.Adams@Sun.COM } 64911173SJonathan.Adams@Sun.COM return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, 65011173SJonathan.Adams@Sun.COM spa->spa_proc, flags)); 65111173SJonathan.Adams@Sun.COM } 65211173SJonathan.Adams@Sun.COM 65311173SJonathan.Adams@Sun.COM static void 65411173SJonathan.Adams@Sun.COM spa_create_zio_taskqs(spa_t *spa) 65511173SJonathan.Adams@Sun.COM { 65611173SJonathan.Adams@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 65711173SJonathan.Adams@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 65811173SJonathan.Adams@Sun.COM const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; 65911173SJonathan.Adams@Sun.COM enum zti_modes mode = ztip->zti_mode; 66011173SJonathan.Adams@Sun.COM uint_t value = ztip->zti_value; 66111173SJonathan.Adams@Sun.COM char name[32]; 66211173SJonathan.Adams@Sun.COM 66311173SJonathan.Adams@Sun.COM (void) snprintf(name, sizeof (name), 66411173SJonathan.Adams@Sun.COM "%s_%s", zio_type_name[t], zio_taskq_types[q]); 66511173SJonathan.Adams@Sun.COM 66611173SJonathan.Adams@Sun.COM spa->spa_zio_taskq[t][q] = 66711173SJonathan.Adams@Sun.COM spa_taskq_create(spa, name, mode, value); 66811173SJonathan.Adams@Sun.COM } 66911173SJonathan.Adams@Sun.COM } 67011173SJonathan.Adams@Sun.COM } 67111173SJonathan.Adams@Sun.COM 67211173SJonathan.Adams@Sun.COM #ifdef _KERNEL 67311173SJonathan.Adams@Sun.COM static void 67411173SJonathan.Adams@Sun.COM spa_thread(void *arg) 67511173SJonathan.Adams@Sun.COM { 67611173SJonathan.Adams@Sun.COM callb_cpr_t cprinfo; 67711173SJonathan.Adams@Sun.COM 67811173SJonathan.Adams@Sun.COM spa_t *spa = arg; 67911173SJonathan.Adams@Sun.COM user_t *pu = PTOU(curproc); 68011173SJonathan.Adams@Sun.COM 68111173SJonathan.Adams@Sun.COM CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, 68211173SJonathan.Adams@Sun.COM spa->spa_name); 68311173SJonathan.Adams@Sun.COM 68411173SJonathan.Adams@Sun.COM ASSERT(curproc != &p0); 68511173SJonathan.Adams@Sun.COM (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), 68611173SJonathan.Adams@Sun.COM "zpool-%s", spa->spa_name); 68711173SJonathan.Adams@Sun.COM (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); 68811173SJonathan.Adams@Sun.COM 68911173SJonathan.Adams@Sun.COM /* bind this thread to the requested psrset */ 69011173SJonathan.Adams@Sun.COM if (zio_taskq_psrset_bind != PS_NONE) { 69111173SJonathan.Adams@Sun.COM pool_lock(); 69211173SJonathan.Adams@Sun.COM mutex_enter(&cpu_lock); 69311173SJonathan.Adams@Sun.COM mutex_enter(&pidlock); 69411173SJonathan.Adams@Sun.COM mutex_enter(&curproc->p_lock); 69511173SJonathan.Adams@Sun.COM 69611173SJonathan.Adams@Sun.COM if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 69711173SJonathan.Adams@Sun.COM 0, NULL, NULL) == 0) { 69811173SJonathan.Adams@Sun.COM curthread->t_bind_pset = zio_taskq_psrset_bind; 69911173SJonathan.Adams@Sun.COM } else { 70011173SJonathan.Adams@Sun.COM cmn_err(CE_WARN, 70111173SJonathan.Adams@Sun.COM "Couldn't bind process for zfs pool \"%s\" to " 70211173SJonathan.Adams@Sun.COM "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); 70311173SJonathan.Adams@Sun.COM } 70411173SJonathan.Adams@Sun.COM 70511173SJonathan.Adams@Sun.COM mutex_exit(&curproc->p_lock); 70611173SJonathan.Adams@Sun.COM mutex_exit(&pidlock); 70711173SJonathan.Adams@Sun.COM mutex_exit(&cpu_lock); 70811173SJonathan.Adams@Sun.COM pool_unlock(); 70911173SJonathan.Adams@Sun.COM } 71011173SJonathan.Adams@Sun.COM 71111173SJonathan.Adams@Sun.COM if (zio_taskq_sysdc) { 71211173SJonathan.Adams@Sun.COM sysdc_thread_enter(curthread, 100, 0); 71311173SJonathan.Adams@Sun.COM } 71411173SJonathan.Adams@Sun.COM 71511173SJonathan.Adams@Sun.COM spa->spa_proc = curproc; 71611173SJonathan.Adams@Sun.COM spa->spa_did = curthread->t_did; 71711173SJonathan.Adams@Sun.COM 71811173SJonathan.Adams@Sun.COM spa_create_zio_taskqs(spa); 71911173SJonathan.Adams@Sun.COM 72011173SJonathan.Adams@Sun.COM mutex_enter(&spa->spa_proc_lock); 72111173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); 72211173SJonathan.Adams@Sun.COM 72311173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_ACTIVE; 72411173SJonathan.Adams@Sun.COM cv_broadcast(&spa->spa_proc_cv); 72511173SJonathan.Adams@Sun.COM 72611173SJonathan.Adams@Sun.COM CALLB_CPR_SAFE_BEGIN(&cprinfo); 72711173SJonathan.Adams@Sun.COM while (spa->spa_proc_state == SPA_PROC_ACTIVE) 72811173SJonathan.Adams@Sun.COM cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 72911173SJonathan.Adams@Sun.COM CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); 73011173SJonathan.Adams@Sun.COM 73111173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); 73211173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_GONE; 73311173SJonathan.Adams@Sun.COM spa->spa_proc = &p0; 73411173SJonathan.Adams@Sun.COM cv_broadcast(&spa->spa_proc_cv); 73511173SJonathan.Adams@Sun.COM CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ 73611173SJonathan.Adams@Sun.COM 73711173SJonathan.Adams@Sun.COM mutex_enter(&curproc->p_lock); 73811173SJonathan.Adams@Sun.COM lwp_exit(); 73911173SJonathan.Adams@Sun.COM } 74011173SJonathan.Adams@Sun.COM #endif 74111173SJonathan.Adams@Sun.COM 742789Sahrens /* 743789Sahrens * Activate an uninitialized pool. 744789Sahrens */ 745789Sahrens static void 7468241SJeff.Bonwick@Sun.COM spa_activate(spa_t *spa, int mode) 747789Sahrens { 748789Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 749789Sahrens 750789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 7518241SJeff.Bonwick@Sun.COM spa->spa_mode = mode; 752789Sahrens 75310594SGeorge.Wilson@Sun.COM spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 75410594SGeorge.Wilson@Sun.COM spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 755789Sahrens 75611173SJonathan.Adams@Sun.COM /* Try to create a covering process */ 75711173SJonathan.Adams@Sun.COM mutex_enter(&spa->spa_proc_lock); 75811173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_NONE); 75911173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc == &p0); 76011173SJonathan.Adams@Sun.COM spa->spa_did = 0; 76111173SJonathan.Adams@Sun.COM 76211173SJonathan.Adams@Sun.COM /* Only create a process if we're going to be around a while. */ 76311173SJonathan.Adams@Sun.COM if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { 76411173SJonathan.Adams@Sun.COM if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, 76511173SJonathan.Adams@Sun.COM NULL, 0) == 0) { 76611173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_CREATED; 76711173SJonathan.Adams@Sun.COM while (spa->spa_proc_state == SPA_PROC_CREATED) { 76811173SJonathan.Adams@Sun.COM cv_wait(&spa->spa_proc_cv, 76911173SJonathan.Adams@Sun.COM &spa->spa_proc_lock); 7709515SJonathan.Adams@Sun.COM } 77111173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 77211173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc != &p0); 77311173SJonathan.Adams@Sun.COM ASSERT(spa->spa_did != 0); 77411173SJonathan.Adams@Sun.COM } else { 77511173SJonathan.Adams@Sun.COM #ifdef _KERNEL 77611173SJonathan.Adams@Sun.COM cmn_err(CE_WARN, 77711173SJonathan.Adams@Sun.COM "Couldn't create process for zfs pool \"%s\"\n", 77811173SJonathan.Adams@Sun.COM spa->spa_name); 77911173SJonathan.Adams@Sun.COM #endif 7807754SJeff.Bonwick@Sun.COM } 781789Sahrens } 78211173SJonathan.Adams@Sun.COM mutex_exit(&spa->spa_proc_lock); 78311173SJonathan.Adams@Sun.COM 78411173SJonathan.Adams@Sun.COM /* If we didn't create a process, we need to create our taskqs. */ 78511173SJonathan.Adams@Sun.COM if (spa->spa_proc == &p0) { 78611173SJonathan.Adams@Sun.COM spa_create_zio_taskqs(spa); 78711173SJonathan.Adams@Sun.COM } 788789Sahrens 7897754SJeff.Bonwick@Sun.COM list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 7907754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_config_dirty_node)); 7917754SJeff.Bonwick@Sun.COM list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 7927754SJeff.Bonwick@Sun.COM offsetof(vdev_t, vdev_state_dirty_node)); 793789Sahrens 794789Sahrens txg_list_create(&spa->spa_vdev_txg_list, 795789Sahrens offsetof(struct vdev, vdev_txg_node)); 7961544Seschrock 7971544Seschrock avl_create(&spa->spa_errlist_scrub, 7981544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 7991544Seschrock offsetof(spa_error_entry_t, se_avl)); 8001544Seschrock avl_create(&spa->spa_errlist_last, 8011544Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 8021544Seschrock offsetof(spa_error_entry_t, se_avl)); 803789Sahrens } 804789Sahrens 805789Sahrens /* 806789Sahrens * Opposite of spa_activate(). 807789Sahrens */ 808789Sahrens static void 809789Sahrens spa_deactivate(spa_t *spa) 810789Sahrens { 811789Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 812789Sahrens ASSERT(spa->spa_dsl_pool == NULL); 813789Sahrens ASSERT(spa->spa_root_vdev == NULL); 8149630SJeff.Bonwick@Sun.COM ASSERT(spa->spa_async_zio_root == NULL); 815789Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 816789Sahrens 817789Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 818789Sahrens 8197754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_config_dirty_list); 8207754SJeff.Bonwick@Sun.COM list_destroy(&spa->spa_state_dirty_list); 8217754SJeff.Bonwick@Sun.COM 8227754SJeff.Bonwick@Sun.COM for (int t = 0; t < ZIO_TYPES; t++) { 8237754SJeff.Bonwick@Sun.COM for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 82411146SGeorge.Wilson@Sun.COM if (spa->spa_zio_taskq[t][q] != NULL) 82511146SGeorge.Wilson@Sun.COM taskq_destroy(spa->spa_zio_taskq[t][q]); 8267754SJeff.Bonwick@Sun.COM spa->spa_zio_taskq[t][q] = NULL; 8277754SJeff.Bonwick@Sun.COM } 828789Sahrens } 829789Sahrens 830789Sahrens metaslab_class_destroy(spa->spa_normal_class); 831789Sahrens spa->spa_normal_class = NULL; 832789Sahrens 8334527Sperrin metaslab_class_destroy(spa->spa_log_class); 8344527Sperrin spa->spa_log_class = NULL; 8354527Sperrin 8361544Seschrock /* 8371544Seschrock * If this was part of an import or the open otherwise failed, we may 8381544Seschrock * still have errors left in the queues. Empty them just in case. 8391544Seschrock */ 8401544Seschrock spa_errlog_drain(spa); 8411544Seschrock 8421544Seschrock avl_destroy(&spa->spa_errlist_scrub); 8431544Seschrock avl_destroy(&spa->spa_errlist_last); 8441544Seschrock 845789Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 84611173SJonathan.Adams@Sun.COM 84711173SJonathan.Adams@Sun.COM mutex_enter(&spa->spa_proc_lock); 84811173SJonathan.Adams@Sun.COM if (spa->spa_proc_state != SPA_PROC_NONE) { 84911173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 85011173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_DEACTIVATE; 85111173SJonathan.Adams@Sun.COM cv_broadcast(&spa->spa_proc_cv); 85211173SJonathan.Adams@Sun.COM while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { 85311173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc != &p0); 85411173SJonathan.Adams@Sun.COM cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 85511173SJonathan.Adams@Sun.COM } 85611173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc_state == SPA_PROC_GONE); 85711173SJonathan.Adams@Sun.COM spa->spa_proc_state = SPA_PROC_NONE; 85811173SJonathan.Adams@Sun.COM } 85911173SJonathan.Adams@Sun.COM ASSERT(spa->spa_proc == &p0); 86011173SJonathan.Adams@Sun.COM mutex_exit(&spa->spa_proc_lock); 86111173SJonathan.Adams@Sun.COM 86211173SJonathan.Adams@Sun.COM /* 86311173SJonathan.Adams@Sun.COM * We want to make sure spa_thread() has actually exited the ZFS 86411173SJonathan.Adams@Sun.COM * module, so that the module can't be unloaded out from underneath 86511173SJonathan.Adams@Sun.COM * it. 86611173SJonathan.Adams@Sun.COM */ 86711173SJonathan.Adams@Sun.COM if (spa->spa_did != 0) { 86811173SJonathan.Adams@Sun.COM thread_join(spa->spa_did); 86911173SJonathan.Adams@Sun.COM spa->spa_did = 0; 87011173SJonathan.Adams@Sun.COM } 871789Sahrens } 872789Sahrens 873789Sahrens /* 874789Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 875789Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 876789Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 877789Sahrens * All vdev validation is done by the vdev_alloc() routine. 878789Sahrens */ 8792082Seschrock static int 8802082Seschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 8812082Seschrock uint_t id, int atype) 882789Sahrens { 883789Sahrens nvlist_t **child; 8849816SGeorge.Wilson@Sun.COM uint_t children; 8852082Seschrock int error; 8862082Seschrock 8872082Seschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 8882082Seschrock return (error); 8892082Seschrock 8902082Seschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 8912082Seschrock return (0); 892789Sahrens 8937754SJeff.Bonwick@Sun.COM error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 8947754SJeff.Bonwick@Sun.COM &child, &children); 8957754SJeff.Bonwick@Sun.COM 8967754SJeff.Bonwick@Sun.COM if (error == ENOENT) 8977754SJeff.Bonwick@Sun.COM return (0); 8987754SJeff.Bonwick@Sun.COM 8997754SJeff.Bonwick@Sun.COM if (error) { 9002082Seschrock vdev_free(*vdp); 9012082Seschrock *vdp = NULL; 9022082Seschrock return (EINVAL); 903789Sahrens } 904789Sahrens 9059816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 9062082Seschrock vdev_t *vd; 9072082Seschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 9082082Seschrock atype)) != 0) { 9092082Seschrock vdev_free(*vdp); 9102082Seschrock *vdp = NULL; 9112082Seschrock return (error); 912789Sahrens } 913789Sahrens } 914789Sahrens 9152082Seschrock ASSERT(*vdp != NULL); 9162082Seschrock 9172082Seschrock return (0); 918789Sahrens } 919789Sahrens 920789Sahrens /* 921789Sahrens * Opposite of spa_load(). 922789Sahrens */ 923789Sahrens static void 924789Sahrens spa_unload(spa_t *spa) 925789Sahrens { 9262082Seschrock int i; 9272082Seschrock 9287754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 9297754SJeff.Bonwick@Sun.COM 930789Sahrens /* 9311544Seschrock * Stop async tasks. 9321544Seschrock */ 9331544Seschrock spa_async_suspend(spa); 9341544Seschrock 9351544Seschrock /* 936789Sahrens * Stop syncing. 937789Sahrens */ 938789Sahrens if (spa->spa_sync_on) { 939789Sahrens txg_sync_stop(spa->spa_dsl_pool); 940789Sahrens spa->spa_sync_on = B_FALSE; 941789Sahrens } 942789Sahrens 943789Sahrens /* 9447754SJeff.Bonwick@Sun.COM * Wait for any outstanding async I/O to complete. 945789Sahrens */ 9469234SGeorge.Wilson@Sun.COM if (spa->spa_async_zio_root != NULL) { 9479234SGeorge.Wilson@Sun.COM (void) zio_wait(spa->spa_async_zio_root); 9489234SGeorge.Wilson@Sun.COM spa->spa_async_zio_root = NULL; 9499234SGeorge.Wilson@Sun.COM } 950789Sahrens 95112470SMatthew.Ahrens@Sun.COM bpobj_close(&spa->spa_deferred_bpobj); 95212470SMatthew.Ahrens@Sun.COM 953789Sahrens /* 954789Sahrens * Close the dsl pool. 955789Sahrens */ 956789Sahrens if (spa->spa_dsl_pool) { 957789Sahrens dsl_pool_close(spa->spa_dsl_pool); 958789Sahrens spa->spa_dsl_pool = NULL; 95911619SGeorge.Wilson@Sun.COM spa->spa_meta_objset = NULL; 960789Sahrens } 961789Sahrens 96210922SJeff.Bonwick@Sun.COM ddt_unload(spa); 96310922SJeff.Bonwick@Sun.COM 9648241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 9658241SJeff.Bonwick@Sun.COM 9668241SJeff.Bonwick@Sun.COM /* 9678241SJeff.Bonwick@Sun.COM * Drop and purge level 2 cache 9688241SJeff.Bonwick@Sun.COM */ 9698241SJeff.Bonwick@Sun.COM spa_l2cache_drop(spa); 9708241SJeff.Bonwick@Sun.COM 971789Sahrens /* 972789Sahrens * Close all vdevs. 973789Sahrens */ 9741585Sbonwick if (spa->spa_root_vdev) 975789Sahrens vdev_free(spa->spa_root_vdev); 9761585Sbonwick ASSERT(spa->spa_root_vdev == NULL); 9771544Seschrock 9785450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 9795450Sbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 9805450Sbrendan if (spa->spa_spares.sav_vdevs) { 9815450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 9825450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 9835450Sbrendan spa->spa_spares.sav_vdevs = NULL; 9845450Sbrendan } 9855450Sbrendan if (spa->spa_spares.sav_config) { 9865450Sbrendan nvlist_free(spa->spa_spares.sav_config); 9875450Sbrendan spa->spa_spares.sav_config = NULL; 9882082Seschrock } 9897377SEric.Schrock@Sun.COM spa->spa_spares.sav_count = 0; 9905450Sbrendan 9915450Sbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 9925450Sbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 9935450Sbrendan if (spa->spa_l2cache.sav_vdevs) { 9945450Sbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 9955450Sbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 9965450Sbrendan spa->spa_l2cache.sav_vdevs = NULL; 9975450Sbrendan } 9985450Sbrendan if (spa->spa_l2cache.sav_config) { 9995450Sbrendan nvlist_free(spa->spa_l2cache.sav_config); 10005450Sbrendan spa->spa_l2cache.sav_config = NULL; 10012082Seschrock } 10027377SEric.Schrock@Sun.COM spa->spa_l2cache.sav_count = 0; 10032082Seschrock 10041544Seschrock spa->spa_async_suspended = 0; 10058241SJeff.Bonwick@Sun.COM 10068241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1007789Sahrens } 1008789Sahrens 1009789Sahrens /* 10102082Seschrock * Load (or re-load) the current list of vdevs describing the active spares for 10112082Seschrock * this pool. When this is called, we have some form of basic information in 10125450Sbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 10135450Sbrendan * then re-generate a more complete list including status information. 10142082Seschrock */ 10152082Seschrock static void 10162082Seschrock spa_load_spares(spa_t *spa) 10172082Seschrock { 10182082Seschrock nvlist_t **spares; 10192082Seschrock uint_t nspares; 10202082Seschrock int i; 10213377Seschrock vdev_t *vd, *tvd; 10222082Seschrock 10237754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 10247754SJeff.Bonwick@Sun.COM 10252082Seschrock /* 10262082Seschrock * First, close and free any existing spare vdevs. 10272082Seschrock */ 10285450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 10295450Sbrendan vd = spa->spa_spares.sav_vdevs[i]; 10303377Seschrock 10313377Seschrock /* Undo the call to spa_activate() below */ 10326643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 10336643Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 10343377Seschrock spa_spare_remove(tvd); 10353377Seschrock vdev_close(vd); 10363377Seschrock vdev_free(vd); 10372082Seschrock } 10383377Seschrock 10395450Sbrendan if (spa->spa_spares.sav_vdevs) 10405450Sbrendan kmem_free(spa->spa_spares.sav_vdevs, 10415450Sbrendan spa->spa_spares.sav_count * sizeof (void *)); 10425450Sbrendan 10435450Sbrendan if (spa->spa_spares.sav_config == NULL) 10442082Seschrock nspares = 0; 10452082Seschrock else 10465450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 10472082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 10482082Seschrock 10495450Sbrendan spa->spa_spares.sav_count = (int)nspares; 10505450Sbrendan spa->spa_spares.sav_vdevs = NULL; 10512082Seschrock 10522082Seschrock if (nspares == 0) 10532082Seschrock return; 10542082Seschrock 10552082Seschrock /* 10562082Seschrock * Construct the array of vdevs, opening them to get status in the 10573377Seschrock * process. For each spare, there is potentially two different vdev_t 10583377Seschrock * structures associated with it: one in the list of spares (used only 10593377Seschrock * for basic validation purposes) and one in the active vdev 10603377Seschrock * configuration (if it's spared in). During this phase we open and 10613377Seschrock * validate each vdev on the spare list. If the vdev also exists in the 10623377Seschrock * active configuration, then we also mark this vdev as an active spare. 10632082Seschrock */ 10645450Sbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 10655450Sbrendan KM_SLEEP); 10665450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 10672082Seschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 10682082Seschrock VDEV_ALLOC_SPARE) == 0); 10692082Seschrock ASSERT(vd != NULL); 10702082Seschrock 10715450Sbrendan spa->spa_spares.sav_vdevs[i] = vd; 10722082Seschrock 10736643Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 10746643Seschrock B_FALSE)) != NULL) { 10753377Seschrock if (!tvd->vdev_isspare) 10763377Seschrock spa_spare_add(tvd); 10773377Seschrock 10783377Seschrock /* 10793377Seschrock * We only mark the spare active if we were successfully 10803377Seschrock * able to load the vdev. Otherwise, importing a pool 10813377Seschrock * with a bad active spare would result in strange 10823377Seschrock * behavior, because multiple pool would think the spare 10833377Seschrock * is actively in use. 10843377Seschrock * 10853377Seschrock * There is a vulnerability here to an equally bizarre 10863377Seschrock * circumstance, where a dead active spare is later 10873377Seschrock * brought back to life (onlined or otherwise). Given 10883377Seschrock * the rarity of this scenario, and the extra complexity 10893377Seschrock * it adds, we ignore the possibility. 10903377Seschrock */ 10913377Seschrock if (!vdev_is_dead(tvd)) 10923377Seschrock spa_spare_activate(tvd); 10933377Seschrock } 10943377Seschrock 10957754SJeff.Bonwick@Sun.COM vd->vdev_top = vd; 10969425SEric.Schrock@Sun.COM vd->vdev_aux = &spa->spa_spares; 10977754SJeff.Bonwick@Sun.COM 10982082Seschrock if (vdev_open(vd) != 0) 10992082Seschrock continue; 11002082Seschrock 11015450Sbrendan if (vdev_validate_aux(vd) == 0) 11025450Sbrendan spa_spare_add(vd); 11032082Seschrock } 11042082Seschrock 11052082Seschrock /* 11062082Seschrock * Recompute the stashed list of spares, with status information 11072082Seschrock * this time. 11082082Seschrock */ 11095450Sbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 11102082Seschrock DATA_TYPE_NVLIST_ARRAY) == 0); 11112082Seschrock 11125450Sbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 11135450Sbrendan KM_SLEEP); 11145450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 11155450Sbrendan spares[i] = vdev_config_generate(spa, 111612296SLin.Ling@Sun.COM spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); 11175450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 11185450Sbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 11195450Sbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 11202082Seschrock nvlist_free(spares[i]); 11215450Sbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 11225450Sbrendan } 11235450Sbrendan 11245450Sbrendan /* 11255450Sbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 11265450Sbrendan * this pool. When this is called, we have some form of basic information in 11275450Sbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 11285450Sbrendan * then re-generate a more complete list including status information. 11295450Sbrendan * Devices which are already active have their details maintained, and are 11305450Sbrendan * not re-opened. 11315450Sbrendan */ 11325450Sbrendan static void 11335450Sbrendan spa_load_l2cache(spa_t *spa) 11345450Sbrendan { 11355450Sbrendan nvlist_t **l2cache; 11365450Sbrendan uint_t nl2cache; 11375450Sbrendan int i, j, oldnvdevs; 11389816SGeorge.Wilson@Sun.COM uint64_t guid; 11395450Sbrendan vdev_t *vd, **oldvdevs, **newvdevs; 11405450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 11415450Sbrendan 11427754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 11437754SJeff.Bonwick@Sun.COM 11445450Sbrendan if (sav->sav_config != NULL) { 11455450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 11465450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 11475450Sbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 11485450Sbrendan } else { 11495450Sbrendan nl2cache = 0; 11505450Sbrendan } 11515450Sbrendan 11525450Sbrendan oldvdevs = sav->sav_vdevs; 11535450Sbrendan oldnvdevs = sav->sav_count; 11545450Sbrendan sav->sav_vdevs = NULL; 11555450Sbrendan sav->sav_count = 0; 11565450Sbrendan 11575450Sbrendan /* 11585450Sbrendan * Process new nvlist of vdevs. 11595450Sbrendan */ 11605450Sbrendan for (i = 0; i < nl2cache; i++) { 11615450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 11625450Sbrendan &guid) == 0); 11635450Sbrendan 11645450Sbrendan newvdevs[i] = NULL; 11655450Sbrendan for (j = 0; j < oldnvdevs; j++) { 11665450Sbrendan vd = oldvdevs[j]; 11675450Sbrendan if (vd != NULL && guid == vd->vdev_guid) { 11685450Sbrendan /* 11695450Sbrendan * Retain previous vdev for add/remove ops. 11705450Sbrendan */ 11715450Sbrendan newvdevs[i] = vd; 11725450Sbrendan oldvdevs[j] = NULL; 11735450Sbrendan break; 11745450Sbrendan } 11755450Sbrendan } 11765450Sbrendan 11775450Sbrendan if (newvdevs[i] == NULL) { 11785450Sbrendan /* 11795450Sbrendan * Create new vdev 11805450Sbrendan */ 11815450Sbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 11825450Sbrendan VDEV_ALLOC_L2CACHE) == 0); 11835450Sbrendan ASSERT(vd != NULL); 11845450Sbrendan newvdevs[i] = vd; 11855450Sbrendan 11865450Sbrendan /* 11875450Sbrendan * Commit this vdev as an l2cache device, 11885450Sbrendan * even if it fails to open. 11895450Sbrendan */ 11905450Sbrendan spa_l2cache_add(vd); 11915450Sbrendan 11926643Seschrock vd->vdev_top = vd; 11936643Seschrock vd->vdev_aux = sav; 11946643Seschrock 11956643Seschrock spa_l2cache_activate(vd); 11966643Seschrock 11975450Sbrendan if (vdev_open(vd) != 0) 11985450Sbrendan continue; 11995450Sbrendan 12005450Sbrendan (void) vdev_validate_aux(vd); 12015450Sbrendan 12029816SGeorge.Wilson@Sun.COM if (!vdev_is_dead(vd)) 12039816SGeorge.Wilson@Sun.COM l2arc_add_vdev(spa, vd); 12045450Sbrendan } 12055450Sbrendan } 12065450Sbrendan 12075450Sbrendan /* 12085450Sbrendan * Purge vdevs that were dropped 12095450Sbrendan */ 12105450Sbrendan for (i = 0; i < oldnvdevs; i++) { 12115450Sbrendan uint64_t pool; 12125450Sbrendan 12135450Sbrendan vd = oldvdevs[i]; 12145450Sbrendan if (vd != NULL) { 12158241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 12168241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 12175450Sbrendan l2arc_remove_vdev(vd); 12185450Sbrendan (void) vdev_close(vd); 12195450Sbrendan spa_l2cache_remove(vd); 12205450Sbrendan } 12215450Sbrendan } 12225450Sbrendan 12235450Sbrendan if (oldvdevs) 12245450Sbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 12255450Sbrendan 12265450Sbrendan if (sav->sav_config == NULL) 12275450Sbrendan goto out; 12285450Sbrendan 12295450Sbrendan sav->sav_vdevs = newvdevs; 12305450Sbrendan sav->sav_count = (int)nl2cache; 12315450Sbrendan 12325450Sbrendan /* 12335450Sbrendan * Recompute the stashed list of l2cache devices, with status 12345450Sbrendan * information this time. 12355450Sbrendan */ 12365450Sbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 12375450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 12385450Sbrendan 12395450Sbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 12405450Sbrendan for (i = 0; i < sav->sav_count; i++) 12415450Sbrendan l2cache[i] = vdev_config_generate(spa, 124212296SLin.Ling@Sun.COM sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); 12435450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 12445450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 12455450Sbrendan out: 12465450Sbrendan for (i = 0; i < sav->sav_count; i++) 12475450Sbrendan nvlist_free(l2cache[i]); 12485450Sbrendan if (sav->sav_count) 12495450Sbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 12502082Seschrock } 12512082Seschrock 12522082Seschrock static int 12532082Seschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 12542082Seschrock { 12552082Seschrock dmu_buf_t *db; 12562082Seschrock char *packed = NULL; 12572082Seschrock size_t nvsize = 0; 12582082Seschrock int error; 12592082Seschrock *value = NULL; 12602082Seschrock 12612082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 12622082Seschrock nvsize = *(uint64_t *)db->db_data; 12632082Seschrock dmu_buf_rele(db, FTAG); 12642082Seschrock 12652082Seschrock packed = kmem_alloc(nvsize, KM_SLEEP); 12669512SNeil.Perrin@Sun.COM error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 12679512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 12682082Seschrock if (error == 0) 12692082Seschrock error = nvlist_unpack(packed, nvsize, value, 0); 12702082Seschrock kmem_free(packed, nvsize); 12712082Seschrock 12722082Seschrock return (error); 12732082Seschrock } 12742082Seschrock 12752082Seschrock /* 12764451Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 12774451Seschrock * sysevent to notify the autoreplace code that the device has been removed. 12784451Seschrock */ 12794451Seschrock static void 12804451Seschrock spa_check_removed(vdev_t *vd) 12814451Seschrock { 12829816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 12834451Seschrock spa_check_removed(vd->vdev_child[c]); 12844451Seschrock 12854451Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 12864451Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 12874451Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 12884451Seschrock } 12894451Seschrock } 12904451Seschrock 12914451Seschrock /* 129212949SGeorge.Wilson@Sun.COM * Validate the current config against the MOS config 12939701SGeorge.Wilson@Sun.COM */ 129412949SGeorge.Wilson@Sun.COM static boolean_t 129512949SGeorge.Wilson@Sun.COM spa_config_valid(spa_t *spa, nvlist_t *config) 12969701SGeorge.Wilson@Sun.COM { 129712949SGeorge.Wilson@Sun.COM vdev_t *mrvd, *rvd = spa->spa_root_vdev; 129812949SGeorge.Wilson@Sun.COM nvlist_t *nv; 129912949SGeorge.Wilson@Sun.COM 130012949SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0); 130112949SGeorge.Wilson@Sun.COM 130212949SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 130312949SGeorge.Wilson@Sun.COM VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 130412949SGeorge.Wilson@Sun.COM 130512949SGeorge.Wilson@Sun.COM ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children); 130612949SGeorge.Wilson@Sun.COM 130712949SGeorge.Wilson@Sun.COM /* 130812949SGeorge.Wilson@Sun.COM * If we're doing a normal import, then build up any additional 130912949SGeorge.Wilson@Sun.COM * diagnostic information about missing devices in this config. 131012949SGeorge.Wilson@Sun.COM * We'll pass this up to the user for further processing. 131112949SGeorge.Wilson@Sun.COM */ 131212949SGeorge.Wilson@Sun.COM if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { 131312949SGeorge.Wilson@Sun.COM nvlist_t **child, *nv; 131412949SGeorge.Wilson@Sun.COM uint64_t idx = 0; 131512949SGeorge.Wilson@Sun.COM 131612949SGeorge.Wilson@Sun.COM child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), 131712949SGeorge.Wilson@Sun.COM KM_SLEEP); 131812949SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 131912949SGeorge.Wilson@Sun.COM 132012949SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 132112949SGeorge.Wilson@Sun.COM vdev_t *tvd = rvd->vdev_child[c]; 132212949SGeorge.Wilson@Sun.COM vdev_t *mtvd = mrvd->vdev_child[c]; 132312949SGeorge.Wilson@Sun.COM 132412949SGeorge.Wilson@Sun.COM if (tvd->vdev_ops == &vdev_missing_ops && 132512949SGeorge.Wilson@Sun.COM mtvd->vdev_ops != &vdev_missing_ops && 132612949SGeorge.Wilson@Sun.COM mtvd->vdev_islog) 132712949SGeorge.Wilson@Sun.COM child[idx++] = vdev_config_generate(spa, mtvd, 132812949SGeorge.Wilson@Sun.COM B_FALSE, 0); 132912949SGeorge.Wilson@Sun.COM } 133012949SGeorge.Wilson@Sun.COM 133112949SGeorge.Wilson@Sun.COM if (idx) { 133212949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_nvlist_array(nv, 133312949SGeorge.Wilson@Sun.COM ZPOOL_CONFIG_CHILDREN, child, idx) == 0); 133412949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_nvlist(spa->spa_load_info, 133512949SGeorge.Wilson@Sun.COM ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0); 133612949SGeorge.Wilson@Sun.COM 133712949SGeorge.Wilson@Sun.COM for (int i = 0; i < idx; i++) 133812949SGeorge.Wilson@Sun.COM nvlist_free(child[i]); 133912949SGeorge.Wilson@Sun.COM } 134012949SGeorge.Wilson@Sun.COM nvlist_free(nv); 134112949SGeorge.Wilson@Sun.COM kmem_free(child, rvd->vdev_children * sizeof (char **)); 134212949SGeorge.Wilson@Sun.COM } 134310594SGeorge.Wilson@Sun.COM 134410594SGeorge.Wilson@Sun.COM /* 134512949SGeorge.Wilson@Sun.COM * Compare the root vdev tree with the information we have 134612949SGeorge.Wilson@Sun.COM * from the MOS config (mrvd). Check each top-level vdev 134712949SGeorge.Wilson@Sun.COM * with the corresponding MOS config top-level (mtvd). 134810594SGeorge.Wilson@Sun.COM */ 134910594SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 135012949SGeorge.Wilson@Sun.COM vdev_t *tvd = rvd->vdev_child[c]; 135112949SGeorge.Wilson@Sun.COM vdev_t *mtvd = mrvd->vdev_child[c]; 135212949SGeorge.Wilson@Sun.COM 135312949SGeorge.Wilson@Sun.COM /* 135412949SGeorge.Wilson@Sun.COM * Resolve any "missing" vdevs in the current configuration. 135512949SGeorge.Wilson@Sun.COM * If we find that the MOS config has more accurate information 135612949SGeorge.Wilson@Sun.COM * about the top-level vdev then use that vdev instead. 135712949SGeorge.Wilson@Sun.COM */ 135812949SGeorge.Wilson@Sun.COM if (tvd->vdev_ops == &vdev_missing_ops && 135912949SGeorge.Wilson@Sun.COM mtvd->vdev_ops != &vdev_missing_ops) { 136012949SGeorge.Wilson@Sun.COM 136112949SGeorge.Wilson@Sun.COM if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) 136212949SGeorge.Wilson@Sun.COM continue; 136312949SGeorge.Wilson@Sun.COM 136412949SGeorge.Wilson@Sun.COM /* 136512949SGeorge.Wilson@Sun.COM * Device specific actions. 136612949SGeorge.Wilson@Sun.COM */ 136712949SGeorge.Wilson@Sun.COM if (mtvd->vdev_islog) { 136812949SGeorge.Wilson@Sun.COM spa_set_log_state(spa, SPA_LOG_CLEAR); 136912949SGeorge.Wilson@Sun.COM } else { 137012949SGeorge.Wilson@Sun.COM /* 137112949SGeorge.Wilson@Sun.COM * XXX - once we have 'readonly' pool 137212949SGeorge.Wilson@Sun.COM * support we should be able to handle 137312949SGeorge.Wilson@Sun.COM * missing data devices by transitioning 137412949SGeorge.Wilson@Sun.COM * the pool to readonly. 137512949SGeorge.Wilson@Sun.COM */ 137612949SGeorge.Wilson@Sun.COM continue; 137712949SGeorge.Wilson@Sun.COM } 137812949SGeorge.Wilson@Sun.COM 137912949SGeorge.Wilson@Sun.COM /* 138012949SGeorge.Wilson@Sun.COM * Swap the missing vdev with the data we were 138112949SGeorge.Wilson@Sun.COM * able to obtain from the MOS config. 138212949SGeorge.Wilson@Sun.COM */ 138312949SGeorge.Wilson@Sun.COM vdev_remove_child(rvd, tvd); 138412949SGeorge.Wilson@Sun.COM vdev_remove_child(mrvd, mtvd); 138512949SGeorge.Wilson@Sun.COM 138612949SGeorge.Wilson@Sun.COM vdev_add_child(rvd, mtvd); 138712949SGeorge.Wilson@Sun.COM vdev_add_child(mrvd, tvd); 138812949SGeorge.Wilson@Sun.COM 138912949SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 139012949SGeorge.Wilson@Sun.COM vdev_load(mtvd); 139112949SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 139212949SGeorge.Wilson@Sun.COM 139312949SGeorge.Wilson@Sun.COM vdev_reopen(rvd); 139412949SGeorge.Wilson@Sun.COM } else if (mtvd->vdev_islog) { 139512949SGeorge.Wilson@Sun.COM /* 139612949SGeorge.Wilson@Sun.COM * Load the slog device's state from the MOS config 139712949SGeorge.Wilson@Sun.COM * since it's possible that the label does not 139812949SGeorge.Wilson@Sun.COM * contain the most up-to-date information. 139912949SGeorge.Wilson@Sun.COM */ 140012949SGeorge.Wilson@Sun.COM vdev_load_log_state(tvd, mtvd); 140112949SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 140212949SGeorge.Wilson@Sun.COM } 14039701SGeorge.Wilson@Sun.COM } 140412949SGeorge.Wilson@Sun.COM vdev_free(mrvd); 140510594SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 140612949SGeorge.Wilson@Sun.COM 140712949SGeorge.Wilson@Sun.COM /* 140812949SGeorge.Wilson@Sun.COM * Ensure we were able to validate the config. 140912949SGeorge.Wilson@Sun.COM */ 141012949SGeorge.Wilson@Sun.COM return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum); 14119701SGeorge.Wilson@Sun.COM } 14129701SGeorge.Wilson@Sun.COM 14139701SGeorge.Wilson@Sun.COM /* 14147294Sperrin * Check for missing log devices 14157294Sperrin */ 141612949SGeorge.Wilson@Sun.COM static int 14177294Sperrin spa_check_logs(spa_t *spa) 14187294Sperrin { 14197294Sperrin switch (spa->spa_log_state) { 14207294Sperrin case SPA_LOG_MISSING: 14217294Sperrin /* need to recheck in case slog has been restored */ 14227294Sperrin case SPA_LOG_UNKNOWN: 14237294Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 14247294Sperrin DS_FIND_CHILDREN)) { 142511422SMark.Musante@Sun.COM spa_set_log_state(spa, SPA_LOG_MISSING); 14267294Sperrin return (1); 14277294Sperrin } 14287294Sperrin break; 14297294Sperrin } 14307294Sperrin return (0); 14317294Sperrin } 14327294Sperrin 143311422SMark.Musante@Sun.COM static boolean_t 143411422SMark.Musante@Sun.COM spa_passivate_log(spa_t *spa) 143511422SMark.Musante@Sun.COM { 143611422SMark.Musante@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 143711422SMark.Musante@Sun.COM boolean_t slog_found = B_FALSE; 143811422SMark.Musante@Sun.COM 143911422SMark.Musante@Sun.COM ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); 144011422SMark.Musante@Sun.COM 144111422SMark.Musante@Sun.COM if (!spa_has_slogs(spa)) 144211422SMark.Musante@Sun.COM return (B_FALSE); 144311422SMark.Musante@Sun.COM 144411422SMark.Musante@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 144511422SMark.Musante@Sun.COM vdev_t *tvd = rvd->vdev_child[c]; 144611422SMark.Musante@Sun.COM metaslab_group_t *mg = tvd->vdev_mg; 144711422SMark.Musante@Sun.COM 144811422SMark.Musante@Sun.COM if (tvd->vdev_islog) { 144911422SMark.Musante@Sun.COM metaslab_group_passivate(mg); 145011422SMark.Musante@Sun.COM slog_found = B_TRUE; 145111422SMark.Musante@Sun.COM } 145211422SMark.Musante@Sun.COM } 145311422SMark.Musante@Sun.COM 145411422SMark.Musante@Sun.COM return (slog_found); 145511422SMark.Musante@Sun.COM } 145611422SMark.Musante@Sun.COM 145711422SMark.Musante@Sun.COM static void 145811422SMark.Musante@Sun.COM spa_activate_log(spa_t *spa) 145911422SMark.Musante@Sun.COM { 146011422SMark.Musante@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 146111422SMark.Musante@Sun.COM 146211422SMark.Musante@Sun.COM ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); 146311422SMark.Musante@Sun.COM 146411422SMark.Musante@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 146511422SMark.Musante@Sun.COM vdev_t *tvd = rvd->vdev_child[c]; 146611422SMark.Musante@Sun.COM metaslab_group_t *mg = tvd->vdev_mg; 146711422SMark.Musante@Sun.COM 146811422SMark.Musante@Sun.COM if (tvd->vdev_islog) 146911422SMark.Musante@Sun.COM metaslab_group_activate(mg); 147011422SMark.Musante@Sun.COM } 147111422SMark.Musante@Sun.COM } 147211422SMark.Musante@Sun.COM 147311422SMark.Musante@Sun.COM int 147411422SMark.Musante@Sun.COM spa_offline_log(spa_t *spa) 147511422SMark.Musante@Sun.COM { 147611422SMark.Musante@Sun.COM int error = 0; 147711422SMark.Musante@Sun.COM 147811422SMark.Musante@Sun.COM if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 147911422SMark.Musante@Sun.COM NULL, DS_FIND_CHILDREN)) == 0) { 148011422SMark.Musante@Sun.COM 148111422SMark.Musante@Sun.COM /* 148211422SMark.Musante@Sun.COM * We successfully offlined the log device, sync out the 148311422SMark.Musante@Sun.COM * current txg so that the "stubby" block can be removed 148411422SMark.Musante@Sun.COM * by zil_sync(). 148511422SMark.Musante@Sun.COM */ 148611422SMark.Musante@Sun.COM txg_wait_synced(spa->spa_dsl_pool, 0); 148711422SMark.Musante@Sun.COM } 148811422SMark.Musante@Sun.COM return (error); 148911422SMark.Musante@Sun.COM } 149011422SMark.Musante@Sun.COM 149110672SEric.Schrock@Sun.COM static void 149210672SEric.Schrock@Sun.COM spa_aux_check_removed(spa_aux_vdev_t *sav) 149310672SEric.Schrock@Sun.COM { 149410922SJeff.Bonwick@Sun.COM for (int i = 0; i < sav->sav_count; i++) 149510672SEric.Schrock@Sun.COM spa_check_removed(sav->sav_vdevs[i]); 149610672SEric.Schrock@Sun.COM } 149710672SEric.Schrock@Sun.COM 149810922SJeff.Bonwick@Sun.COM void 149910922SJeff.Bonwick@Sun.COM spa_claim_notify(zio_t *zio) 150010922SJeff.Bonwick@Sun.COM { 150110922SJeff.Bonwick@Sun.COM spa_t *spa = zio->io_spa; 150210922SJeff.Bonwick@Sun.COM 150310922SJeff.Bonwick@Sun.COM if (zio->io_error) 150410922SJeff.Bonwick@Sun.COM return; 150510922SJeff.Bonwick@Sun.COM 150610922SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 150710922SJeff.Bonwick@Sun.COM if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 150810922SJeff.Bonwick@Sun.COM spa->spa_claim_max_txg = zio->io_bp->blk_birth; 150910922SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 151010922SJeff.Bonwick@Sun.COM } 151110922SJeff.Bonwick@Sun.COM 151210921STim.Haley@Sun.COM typedef struct spa_load_error { 151311727SVictor.Latushkin@Sun.COM uint64_t sle_meta_count; 151410921STim.Haley@Sun.COM uint64_t sle_data_count; 151510921STim.Haley@Sun.COM } spa_load_error_t; 151610921STim.Haley@Sun.COM 151710921STim.Haley@Sun.COM static void 151810921STim.Haley@Sun.COM spa_load_verify_done(zio_t *zio) 151910921STim.Haley@Sun.COM { 152010921STim.Haley@Sun.COM blkptr_t *bp = zio->io_bp; 152110921STim.Haley@Sun.COM spa_load_error_t *sle = zio->io_private; 152210921STim.Haley@Sun.COM dmu_object_type_t type = BP_GET_TYPE(bp); 152310921STim.Haley@Sun.COM int error = zio->io_error; 152410921STim.Haley@Sun.COM 152510921STim.Haley@Sun.COM if (error) { 152610921STim.Haley@Sun.COM if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && 152710921STim.Haley@Sun.COM type != DMU_OT_INTENT_LOG) 152811727SVictor.Latushkin@Sun.COM atomic_add_64(&sle->sle_meta_count, 1); 152910921STim.Haley@Sun.COM else 153010921STim.Haley@Sun.COM atomic_add_64(&sle->sle_data_count, 1); 153110921STim.Haley@Sun.COM } 153210921STim.Haley@Sun.COM zio_data_buf_free(zio->io_data, zio->io_size); 153310921STim.Haley@Sun.COM } 153410921STim.Haley@Sun.COM 153510921STim.Haley@Sun.COM /*ARGSUSED*/ 153610921STim.Haley@Sun.COM static int 153710922SJeff.Bonwick@Sun.COM spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 153812296SLin.Ling@Sun.COM arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 153910921STim.Haley@Sun.COM { 154010921STim.Haley@Sun.COM if (bp != NULL) { 154110921STim.Haley@Sun.COM zio_t *rio = arg; 154210921STim.Haley@Sun.COM size_t size = BP_GET_PSIZE(bp); 154310921STim.Haley@Sun.COM void *data = zio_data_buf_alloc(size); 154410921STim.Haley@Sun.COM 154510921STim.Haley@Sun.COM zio_nowait(zio_read(rio, spa, bp, data, size, 154610921STim.Haley@Sun.COM spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 154710921STim.Haley@Sun.COM ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 154810921STim.Haley@Sun.COM ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 154910921STim.Haley@Sun.COM } 155010921STim.Haley@Sun.COM return (0); 155110921STim.Haley@Sun.COM } 155210921STim.Haley@Sun.COM 155310921STim.Haley@Sun.COM static int 155410921STim.Haley@Sun.COM spa_load_verify(spa_t *spa) 155510921STim.Haley@Sun.COM { 155610921STim.Haley@Sun.COM zio_t *rio; 155710921STim.Haley@Sun.COM spa_load_error_t sle = { 0 }; 155810921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 155910921STim.Haley@Sun.COM boolean_t verify_ok = B_FALSE; 156010921STim.Haley@Sun.COM int error; 156110921STim.Haley@Sun.COM 156211727SVictor.Latushkin@Sun.COM zpool_get_rewind_policy(spa->spa_config, &policy); 156311727SVictor.Latushkin@Sun.COM 156411727SVictor.Latushkin@Sun.COM if (policy.zrp_request & ZPOOL_NEVER_REWIND) 156511727SVictor.Latushkin@Sun.COM return (0); 156611727SVictor.Latushkin@Sun.COM 156710921STim.Haley@Sun.COM rio = zio_root(spa, NULL, &sle, 156810921STim.Haley@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 156910921STim.Haley@Sun.COM 157011125SJeff.Bonwick@Sun.COM error = traverse_pool(spa, spa->spa_verify_min_txg, 157111125SJeff.Bonwick@Sun.COM TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); 157210921STim.Haley@Sun.COM 157310921STim.Haley@Sun.COM (void) zio_wait(rio); 157410921STim.Haley@Sun.COM 157511727SVictor.Latushkin@Sun.COM spa->spa_load_meta_errors = sle.sle_meta_count; 157610921STim.Haley@Sun.COM spa->spa_load_data_errors = sle.sle_data_count; 157710921STim.Haley@Sun.COM 157811727SVictor.Latushkin@Sun.COM if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && 157910921STim.Haley@Sun.COM sle.sle_data_count <= policy.zrp_maxdata) { 158012949SGeorge.Wilson@Sun.COM int64_t loss = 0; 158112949SGeorge.Wilson@Sun.COM 158210921STim.Haley@Sun.COM verify_ok = B_TRUE; 158310921STim.Haley@Sun.COM spa->spa_load_txg = spa->spa_uberblock.ub_txg; 158410921STim.Haley@Sun.COM spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 158512949SGeorge.Wilson@Sun.COM 158612949SGeorge.Wilson@Sun.COM loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; 158712949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_uint64(spa->spa_load_info, 158812949SGeorge.Wilson@Sun.COM ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); 158912949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_int64(spa->spa_load_info, 159012949SGeorge.Wilson@Sun.COM ZPOOL_CONFIG_REWIND_TIME, loss) == 0); 159112949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_uint64(spa->spa_load_info, 159212949SGeorge.Wilson@Sun.COM ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); 159311026STim.Haley@Sun.COM } else { 159411026STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 159510921STim.Haley@Sun.COM } 159610921STim.Haley@Sun.COM 159710921STim.Haley@Sun.COM if (error) { 159810921STim.Haley@Sun.COM if (error != ENXIO && error != EIO) 159910921STim.Haley@Sun.COM error = EIO; 160010921STim.Haley@Sun.COM return (error); 160110921STim.Haley@Sun.COM } 160210921STim.Haley@Sun.COM 160310921STim.Haley@Sun.COM return (verify_ok ? 0 : EIO); 160410921STim.Haley@Sun.COM } 160510921STim.Haley@Sun.COM 16067294Sperrin /* 160711422SMark.Musante@Sun.COM * Find a value in the pool props object. 160811422SMark.Musante@Sun.COM */ 160911422SMark.Musante@Sun.COM static void 161011422SMark.Musante@Sun.COM spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) 161111422SMark.Musante@Sun.COM { 161211422SMark.Musante@Sun.COM (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, 161311422SMark.Musante@Sun.COM zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); 161411422SMark.Musante@Sun.COM } 161511422SMark.Musante@Sun.COM 161611422SMark.Musante@Sun.COM /* 161711422SMark.Musante@Sun.COM * Find a value in the pool directory object. 161811422SMark.Musante@Sun.COM */ 161911422SMark.Musante@Sun.COM static int 162011422SMark.Musante@Sun.COM spa_dir_prop(spa_t *spa, const char *name, uint64_t *val) 162111422SMark.Musante@Sun.COM { 162211422SMark.Musante@Sun.COM return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 162311422SMark.Musante@Sun.COM name, sizeof (uint64_t), 1, val)); 162411422SMark.Musante@Sun.COM } 162511422SMark.Musante@Sun.COM 162611422SMark.Musante@Sun.COM static int 162711422SMark.Musante@Sun.COM spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) 162811422SMark.Musante@Sun.COM { 162911422SMark.Musante@Sun.COM vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); 163011422SMark.Musante@Sun.COM return (err); 163111422SMark.Musante@Sun.COM } 163211422SMark.Musante@Sun.COM 163311422SMark.Musante@Sun.COM /* 163411422SMark.Musante@Sun.COM * Fix up config after a partly-completed split. This is done with the 163511422SMark.Musante@Sun.COM * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off 163611422SMark.Musante@Sun.COM * pool have that entry in their config, but only the splitting one contains 163711422SMark.Musante@Sun.COM * a list of all the guids of the vdevs that are being split off. 163811422SMark.Musante@Sun.COM * 163911422SMark.Musante@Sun.COM * This function determines what to do with that list: either rejoin 164011422SMark.Musante@Sun.COM * all the disks to the pool, or complete the splitting process. To attempt 164111422SMark.Musante@Sun.COM * the rejoin, each disk that is offlined is marked online again, and 164211422SMark.Musante@Sun.COM * we do a reopen() call. If the vdev label for every disk that was 164311422SMark.Musante@Sun.COM * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) 164411422SMark.Musante@Sun.COM * then we call vdev_split() on each disk, and complete the split. 164511422SMark.Musante@Sun.COM * 164611497SMark.Musante@Sun.COM * Otherwise we leave the config alone, with all the vdevs in place in 164711497SMark.Musante@Sun.COM * the original pool. 164811422SMark.Musante@Sun.COM */ 164911422SMark.Musante@Sun.COM static void 165011422SMark.Musante@Sun.COM spa_try_repair(spa_t *spa, nvlist_t *config) 165111422SMark.Musante@Sun.COM { 165211422SMark.Musante@Sun.COM uint_t extracted; 165311422SMark.Musante@Sun.COM uint64_t *glist; 165411422SMark.Musante@Sun.COM uint_t i, gcount; 165511422SMark.Musante@Sun.COM nvlist_t *nvl; 165611422SMark.Musante@Sun.COM vdev_t **vd; 165711422SMark.Musante@Sun.COM boolean_t attempt_reopen; 165811422SMark.Musante@Sun.COM 165911422SMark.Musante@Sun.COM if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) 166011422SMark.Musante@Sun.COM return; 166111422SMark.Musante@Sun.COM 166211422SMark.Musante@Sun.COM /* check that the config is complete */ 166311422SMark.Musante@Sun.COM if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, 166411422SMark.Musante@Sun.COM &glist, &gcount) != 0) 166511422SMark.Musante@Sun.COM return; 166611422SMark.Musante@Sun.COM 166711422SMark.Musante@Sun.COM vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); 166811422SMark.Musante@Sun.COM 166911422SMark.Musante@Sun.COM /* attempt to online all the vdevs & validate */ 167011422SMark.Musante@Sun.COM attempt_reopen = B_TRUE; 167111422SMark.Musante@Sun.COM for (i = 0; i < gcount; i++) { 167211422SMark.Musante@Sun.COM if (glist[i] == 0) /* vdev is hole */ 167311422SMark.Musante@Sun.COM continue; 167411422SMark.Musante@Sun.COM 167511422SMark.Musante@Sun.COM vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); 167611422SMark.Musante@Sun.COM if (vd[i] == NULL) { 167711422SMark.Musante@Sun.COM /* 167811422SMark.Musante@Sun.COM * Don't bother attempting to reopen the disks; 167911422SMark.Musante@Sun.COM * just do the split. 168011422SMark.Musante@Sun.COM */ 168111422SMark.Musante@Sun.COM attempt_reopen = B_FALSE; 168211422SMark.Musante@Sun.COM } else { 168311422SMark.Musante@Sun.COM /* attempt to re-online it */ 168411422SMark.Musante@Sun.COM vd[i]->vdev_offline = B_FALSE; 168511422SMark.Musante@Sun.COM } 168611422SMark.Musante@Sun.COM } 168711422SMark.Musante@Sun.COM 168811422SMark.Musante@Sun.COM if (attempt_reopen) { 168911422SMark.Musante@Sun.COM vdev_reopen(spa->spa_root_vdev); 169011422SMark.Musante@Sun.COM 169111422SMark.Musante@Sun.COM /* check each device to see what state it's in */ 169211422SMark.Musante@Sun.COM for (extracted = 0, i = 0; i < gcount; i++) { 169311422SMark.Musante@Sun.COM if (vd[i] != NULL && 169411422SMark.Musante@Sun.COM vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) 169511422SMark.Musante@Sun.COM break; 169611422SMark.Musante@Sun.COM ++extracted; 169711422SMark.Musante@Sun.COM } 169811422SMark.Musante@Sun.COM } 169911422SMark.Musante@Sun.COM 170011422SMark.Musante@Sun.COM /* 170111422SMark.Musante@Sun.COM * If every disk has been moved to the new pool, or if we never 170211422SMark.Musante@Sun.COM * even attempted to look at them, then we split them off for 170311422SMark.Musante@Sun.COM * good. 170411422SMark.Musante@Sun.COM */ 170511422SMark.Musante@Sun.COM if (!attempt_reopen || gcount == extracted) { 170611422SMark.Musante@Sun.COM for (i = 0; i < gcount; i++) 170711422SMark.Musante@Sun.COM if (vd[i] != NULL) 170811422SMark.Musante@Sun.COM vdev_split(vd[i]); 170911422SMark.Musante@Sun.COM vdev_reopen(spa->spa_root_vdev); 171011422SMark.Musante@Sun.COM } 171111422SMark.Musante@Sun.COM 171211422SMark.Musante@Sun.COM kmem_free(vd, gcount * sizeof (vdev_t *)); 171311422SMark.Musante@Sun.COM } 171411422SMark.Musante@Sun.COM 171511422SMark.Musante@Sun.COM static int 171611422SMark.Musante@Sun.COM spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, 171711422SMark.Musante@Sun.COM boolean_t mosconfig) 171811422SMark.Musante@Sun.COM { 171911422SMark.Musante@Sun.COM nvlist_t *config = spa->spa_config; 172011422SMark.Musante@Sun.COM char *ereport = FM_EREPORT_ZFS_POOL; 172111422SMark.Musante@Sun.COM int error; 172211422SMark.Musante@Sun.COM uint64_t pool_guid; 172311422SMark.Musante@Sun.COM nvlist_t *nvl; 172411422SMark.Musante@Sun.COM 172511422SMark.Musante@Sun.COM if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) 172611422SMark.Musante@Sun.COM return (EINVAL); 172711422SMark.Musante@Sun.COM 172811422SMark.Musante@Sun.COM /* 172911422SMark.Musante@Sun.COM * Versioning wasn't explicitly added to the label until later, so if 173011422SMark.Musante@Sun.COM * it's not present treat it as the initial version. 173111422SMark.Musante@Sun.COM */ 173211422SMark.Musante@Sun.COM if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 173311422SMark.Musante@Sun.COM &spa->spa_ubsync.ub_version) != 0) 173411422SMark.Musante@Sun.COM spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; 173511422SMark.Musante@Sun.COM 173611422SMark.Musante@Sun.COM (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 173711422SMark.Musante@Sun.COM &spa->spa_config_txg); 173811422SMark.Musante@Sun.COM 173911422SMark.Musante@Sun.COM if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 174011422SMark.Musante@Sun.COM spa_guid_exists(pool_guid, 0)) { 174111422SMark.Musante@Sun.COM error = EEXIST; 174211422SMark.Musante@Sun.COM } else { 174311422SMark.Musante@Sun.COM spa->spa_load_guid = pool_guid; 174411422SMark.Musante@Sun.COM 174511422SMark.Musante@Sun.COM if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, 174611422SMark.Musante@Sun.COM &nvl) == 0) { 174711422SMark.Musante@Sun.COM VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting, 174811422SMark.Musante@Sun.COM KM_SLEEP) == 0); 174911422SMark.Musante@Sun.COM } 175011422SMark.Musante@Sun.COM 175112817STim.Haley@Sun.COM gethrestime(&spa->spa_loaded_ts); 175211422SMark.Musante@Sun.COM error = spa_load_impl(spa, pool_guid, config, state, type, 175311422SMark.Musante@Sun.COM mosconfig, &ereport); 175411422SMark.Musante@Sun.COM } 175511422SMark.Musante@Sun.COM 175611422SMark.Musante@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 175712817STim.Haley@Sun.COM if (error) { 175812817STim.Haley@Sun.COM if (error != EEXIST) { 175912817STim.Haley@Sun.COM spa->spa_loaded_ts.tv_sec = 0; 176012817STim.Haley@Sun.COM spa->spa_loaded_ts.tv_nsec = 0; 176112817STim.Haley@Sun.COM } 176212817STim.Haley@Sun.COM if (error != EBADF) { 176312817STim.Haley@Sun.COM zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 176412817STim.Haley@Sun.COM } 176512817STim.Haley@Sun.COM } 176611422SMark.Musante@Sun.COM spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; 176711422SMark.Musante@Sun.COM spa->spa_ena = 0; 176811422SMark.Musante@Sun.COM 176911422SMark.Musante@Sun.COM return (error); 177011422SMark.Musante@Sun.COM } 177111422SMark.Musante@Sun.COM 177211422SMark.Musante@Sun.COM /* 1773789Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 17741544Seschrock * source of configuration information. 1775789Sahrens */ 1776789Sahrens static int 177711422SMark.Musante@Sun.COM spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, 177811422SMark.Musante@Sun.COM spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, 177911422SMark.Musante@Sun.COM char **ereport) 1780789Sahrens { 1781789Sahrens int error = 0; 178211810SMark.Musante@Sun.COM nvlist_t *nvroot = NULL; 1783789Sahrens vdev_t *rvd; 1784789Sahrens uberblock_t *ub = &spa->spa_uberblock; 178512949SGeorge.Wilson@Sun.COM uint64_t children, config_cache_txg = spa->spa_config_txg; 17868241SJeff.Bonwick@Sun.COM int orig_mode = spa->spa_mode; 178711422SMark.Musante@Sun.COM int parse; 178812470SMatthew.Ahrens@Sun.COM uint64_t obj; 1789789Sahrens 17908241SJeff.Bonwick@Sun.COM /* 17918241SJeff.Bonwick@Sun.COM * If this is an untrusted config, access the pool in read-only mode. 17928241SJeff.Bonwick@Sun.COM * This prevents things like resilvering recently removed devices. 17938241SJeff.Bonwick@Sun.COM */ 17948241SJeff.Bonwick@Sun.COM if (!mosconfig) 17958241SJeff.Bonwick@Sun.COM spa->spa_mode = FREAD; 17968241SJeff.Bonwick@Sun.COM 17977754SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 17987754SJeff.Bonwick@Sun.COM 17991544Seschrock spa->spa_load_state = state; 18001635Sbonwick 180111422SMark.Musante@Sun.COM if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)) 180211422SMark.Musante@Sun.COM return (EINVAL); 180311422SMark.Musante@Sun.COM 180411422SMark.Musante@Sun.COM parse = (type == SPA_IMPORT_EXISTING ? 180511422SMark.Musante@Sun.COM VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); 18062174Seschrock 1807789Sahrens /* 18089234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 18099234SGeorge.Wilson@Sun.COM */ 18109630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 18119630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 18129234SGeorge.Wilson@Sun.COM 18139234SGeorge.Wilson@Sun.COM /* 18142082Seschrock * Parse the configuration into a vdev tree. We explicitly set the 18152082Seschrock * value that will be returned by spa_version() since parsing the 18162082Seschrock * configuration requires knowing the version number. 1817789Sahrens */ 18187754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 181911422SMark.Musante@Sun.COM error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse); 18207754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 1821789Sahrens 18222082Seschrock if (error != 0) 182311422SMark.Musante@Sun.COM return (error); 1824789Sahrens 18251585Sbonwick ASSERT(spa->spa_root_vdev == rvd); 182611422SMark.Musante@Sun.COM 182711422SMark.Musante@Sun.COM if (type != SPA_IMPORT_ASSEMBLE) { 182811422SMark.Musante@Sun.COM ASSERT(spa_guid(spa) == pool_guid); 182911422SMark.Musante@Sun.COM } 1830789Sahrens 1831789Sahrens /* 1832789Sahrens * Try to open all vdevs, loading each label in the process. 1833789Sahrens */ 18347754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 18354070Smc142369 error = vdev_open(rvd); 18367754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 18374070Smc142369 if (error != 0) 183811422SMark.Musante@Sun.COM return (error); 1839789Sahrens 1840789Sahrens /* 18419276SMark.Musante@Sun.COM * We need to validate the vdev labels against the configuration that 18429276SMark.Musante@Sun.COM * we have in hand, which is dependent on the setting of mosconfig. If 18439276SMark.Musante@Sun.COM * mosconfig is true then we're validating the vdev labels based on 184411422SMark.Musante@Sun.COM * that config. Otherwise, we're validating against the cached config 18459276SMark.Musante@Sun.COM * (zpool.cache) that was read when we loaded the zfs module, and then 18469276SMark.Musante@Sun.COM * later we will recursively call spa_load() and validate against 18479276SMark.Musante@Sun.COM * the vdev config. 184811422SMark.Musante@Sun.COM * 184911422SMark.Musante@Sun.COM * If we're assembling a new pool that's been split off from an 185011422SMark.Musante@Sun.COM * existing pool, the labels haven't yet been updated so we skip 185111422SMark.Musante@Sun.COM * validation for now. 18521986Seschrock */ 185311422SMark.Musante@Sun.COM if (type != SPA_IMPORT_ASSEMBLE) { 185411422SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 185511422SMark.Musante@Sun.COM error = vdev_validate(rvd); 185611422SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 185711422SMark.Musante@Sun.COM 185811422SMark.Musante@Sun.COM if (error != 0) 185911422SMark.Musante@Sun.COM return (error); 186011422SMark.Musante@Sun.COM 186111422SMark.Musante@Sun.COM if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) 186211422SMark.Musante@Sun.COM return (ENXIO); 18631986Seschrock } 18641986Seschrock 18651986Seschrock /* 1866789Sahrens * Find the best uberblock. 1867789Sahrens */ 18687754SJeff.Bonwick@Sun.COM vdev_uberblock_load(NULL, rvd, ub); 1869789Sahrens 1870789Sahrens /* 1871789Sahrens * If we weren't able to find a single valid uberblock, return failure. 1872789Sahrens */ 187311422SMark.Musante@Sun.COM if (ub->ub_txg == 0) 187411422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); 18751544Seschrock 18761544Seschrock /* 18771544Seschrock * If the pool is newer than the code, we can't open it. 18781544Seschrock */ 187911422SMark.Musante@Sun.COM if (ub->ub_version > SPA_VERSION) 188011422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); 1881789Sahrens 1882789Sahrens /* 1883789Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 188412949SGeorge.Wilson@Sun.COM * incomplete configuration. We first check to see if the pool 188512949SGeorge.Wilson@Sun.COM * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN). 188612949SGeorge.Wilson@Sun.COM * If it is, defer the vdev_guid_sum check till later so we 188712949SGeorge.Wilson@Sun.COM * can handle missing vdevs. 1888789Sahrens */ 188912949SGeorge.Wilson@Sun.COM if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, 189012949SGeorge.Wilson@Sun.COM &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE && 189111422SMark.Musante@Sun.COM rvd->vdev_guid_sum != ub->ub_guid_sum) 189211422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); 189311422SMark.Musante@Sun.COM 189411422SMark.Musante@Sun.COM if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { 189511422SMark.Musante@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 189611422SMark.Musante@Sun.COM spa_try_repair(spa, config); 189711422SMark.Musante@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 189811422SMark.Musante@Sun.COM nvlist_free(spa->spa_config_splitting); 189911422SMark.Musante@Sun.COM spa->spa_config_splitting = NULL; 1900789Sahrens } 1901789Sahrens 1902789Sahrens /* 1903789Sahrens * Initialize internal SPA structures. 1904789Sahrens */ 1905789Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1906789Sahrens spa->spa_ubsync = spa->spa_uberblock; 190710921STim.Haley@Sun.COM spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 190811727SVictor.Latushkin@Sun.COM TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; 190910921STim.Haley@Sun.COM spa->spa_first_txg = spa->spa_last_ubsync_txg ? 191010921STim.Haley@Sun.COM spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 191110922SJeff.Bonwick@Sun.COM spa->spa_claim_max_txg = spa->spa_first_txg; 191212296SLin.Ling@Sun.COM spa->spa_prev_software_version = ub->ub_software_version; 191310922SJeff.Bonwick@Sun.COM 19141544Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 191511422SMark.Musante@Sun.COM if (error) 191611422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 1917789Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1918789Sahrens 191911422SMark.Musante@Sun.COM if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0) 192011422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 192111422SMark.Musante@Sun.COM 1922789Sahrens if (!mosconfig) { 19233975Sek110237 uint64_t hostid; 192411810SMark.Musante@Sun.COM nvlist_t *policy = NULL, *nvconfig; 192511810SMark.Musante@Sun.COM 192611810SMark.Musante@Sun.COM if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) 192711810SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 19282082Seschrock 192910594SGeorge.Wilson@Sun.COM if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 19307706SLin.Ling@Sun.COM ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 19313975Sek110237 char *hostname; 19323975Sek110237 unsigned long myhostid = 0; 19333975Sek110237 193410594SGeorge.Wilson@Sun.COM VERIFY(nvlist_lookup_string(nvconfig, 19353975Sek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 19363975Sek110237 19378662SJordan.Vaughan@Sun.com #ifdef _KERNEL 19388662SJordan.Vaughan@Sun.com myhostid = zone_get_hostid(NULL); 19398662SJordan.Vaughan@Sun.com #else /* _KERNEL */ 19408662SJordan.Vaughan@Sun.com /* 19418662SJordan.Vaughan@Sun.com * We're emulating the system's hostid in userland, so 19428662SJordan.Vaughan@Sun.com * we can't use zone_get_hostid(). 19438662SJordan.Vaughan@Sun.com */ 19443975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 19458662SJordan.Vaughan@Sun.com #endif /* _KERNEL */ 19464178Slling if (hostid != 0 && myhostid != 0 && 19478662SJordan.Vaughan@Sun.com hostid != myhostid) { 194811810SMark.Musante@Sun.COM nvlist_free(nvconfig); 19493975Sek110237 cmn_err(CE_WARN, "pool '%s' could not be " 19503975Sek110237 "loaded as it was last accessed by " 19517706SLin.Ling@Sun.COM "another system (host: %s hostid: 0x%lx). " 19523975Sek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 19537754SJeff.Bonwick@Sun.COM spa_name(spa), hostname, 19543975Sek110237 (unsigned long)hostid); 195511422SMark.Musante@Sun.COM return (EBADF); 19563975Sek110237 } 19573975Sek110237 } 195811727SVictor.Latushkin@Sun.COM if (nvlist_lookup_nvlist(spa->spa_config, 195911727SVictor.Latushkin@Sun.COM ZPOOL_REWIND_POLICY, &policy) == 0) 196011727SVictor.Latushkin@Sun.COM VERIFY(nvlist_add_nvlist(nvconfig, 196111727SVictor.Latushkin@Sun.COM ZPOOL_REWIND_POLICY, policy) == 0); 19623975Sek110237 196310594SGeorge.Wilson@Sun.COM spa_config_set(spa, nvconfig); 1964789Sahrens spa_unload(spa); 1965789Sahrens spa_deactivate(spa); 19668241SJeff.Bonwick@Sun.COM spa_activate(spa, orig_mode); 1967789Sahrens 196811422SMark.Musante@Sun.COM return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE)); 19691544Seschrock } 19701544Seschrock 197112470SMatthew.Ahrens@Sun.COM if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0) 197212470SMatthew.Ahrens@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 197312470SMatthew.Ahrens@Sun.COM error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); 197412470SMatthew.Ahrens@Sun.COM if (error != 0) 197511422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 1976789Sahrens 19771544Seschrock /* 19782082Seschrock * Load the bit that tells us to use the new accounting function 19792082Seschrock * (raid-z deflation). If we have an older pool, this will not 19802082Seschrock * be present. 19812082Seschrock */ 198211422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate); 198311422SMark.Musante@Sun.COM if (error != 0 && error != ENOENT) 198411422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 19852082Seschrock 198612296SLin.Ling@Sun.COM error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, 198712296SLin.Ling@Sun.COM &spa->spa_creation_version); 198812296SLin.Ling@Sun.COM if (error != 0 && error != ENOENT) 198912296SLin.Ling@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 199012296SLin.Ling@Sun.COM 19912082Seschrock /* 19921544Seschrock * Load the persistent error log. If we have an older pool, this will 19931544Seschrock * not be present. 19941544Seschrock */ 199511422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last); 199611422SMark.Musante@Sun.COM if (error != 0 && error != ENOENT) 199711422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 199811422SMark.Musante@Sun.COM 199911422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, 200011422SMark.Musante@Sun.COM &spa->spa_errlog_scrub); 200111422SMark.Musante@Sun.COM if (error != 0 && error != ENOENT) 200211422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2003789Sahrens 2004789Sahrens /* 20052926Sek110237 * Load the history object. If we have an older pool, this 20062926Sek110237 * will not be present. 20072926Sek110237 */ 200811422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history); 200911422SMark.Musante@Sun.COM if (error != 0 && error != ENOENT) 201011422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 201111422SMark.Musante@Sun.COM 201211422SMark.Musante@Sun.COM /* 201311422SMark.Musante@Sun.COM * If we're assembling the pool from the split-off vdevs of 201411422SMark.Musante@Sun.COM * an existing pool, we don't want to attach the spares & cache 201511422SMark.Musante@Sun.COM * devices. 201611422SMark.Musante@Sun.COM */ 20172926Sek110237 20182926Sek110237 /* 20192082Seschrock * Load any hot spares for this pool. 20202082Seschrock */ 202111422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object); 202211422SMark.Musante@Sun.COM if (error != 0 && error != ENOENT) 202311422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 202411422SMark.Musante@Sun.COM if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { 20254577Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 20265450Sbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 202711422SMark.Musante@Sun.COM &spa->spa_spares.sav_config) != 0) 202811422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 20292082Seschrock 20307754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 20312082Seschrock spa_load_spares(spa); 20327754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 203311422SMark.Musante@Sun.COM } else if (error == 0) { 203411422SMark.Musante@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 20352082Seschrock } 20362082Seschrock 20375450Sbrendan /* 20385450Sbrendan * Load any level 2 ARC devices for this pool. 20395450Sbrendan */ 204011422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_L2CACHE, 20415450Sbrendan &spa->spa_l2cache.sav_object); 204211422SMark.Musante@Sun.COM if (error != 0 && error != ENOENT) 204311422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 204411422SMark.Musante@Sun.COM if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { 20455450Sbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 20465450Sbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 204711422SMark.Musante@Sun.COM &spa->spa_l2cache.sav_config) != 0) 204811422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 20495450Sbrendan 20507754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 20515450Sbrendan spa_load_l2cache(spa); 20527754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 205311422SMark.Musante@Sun.COM } else if (error == 0) { 205411422SMark.Musante@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 20555450Sbrendan } 20565450Sbrendan 20575094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 20584543Smarks 205911422SMark.Musante@Sun.COM error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object); 206011422SMark.Musante@Sun.COM if (error && error != ENOENT) 206111422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 20623912Slling 20633912Slling if (error == 0) { 206411422SMark.Musante@Sun.COM uint64_t autoreplace; 206511422SMark.Musante@Sun.COM 206611422SMark.Musante@Sun.COM spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); 206711422SMark.Musante@Sun.COM spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); 206811422SMark.Musante@Sun.COM spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); 206911422SMark.Musante@Sun.COM spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); 207011422SMark.Musante@Sun.COM spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); 207111422SMark.Musante@Sun.COM spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, 207211422SMark.Musante@Sun.COM &spa->spa_dedup_ditto); 207311422SMark.Musante@Sun.COM 207410672SEric.Schrock@Sun.COM spa->spa_autoreplace = (autoreplace != 0); 20753912Slling } 20763912Slling 20772082Seschrock /* 20784451Seschrock * If the 'autoreplace' property is set, then post a resource notifying 20794451Seschrock * the ZFS DE that it should not issue any faults for unopenable 20804451Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 20814451Seschrock * unopenable vdevs so that the normal autoreplace handler can take 20824451Seschrock * over. 20834451Seschrock */ 208410672SEric.Schrock@Sun.COM if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 20854451Seschrock spa_check_removed(spa->spa_root_vdev); 208610672SEric.Schrock@Sun.COM /* 208710672SEric.Schrock@Sun.COM * For the import case, this is done in spa_import(), because 208810672SEric.Schrock@Sun.COM * at this point we're using the spare definitions from 208910672SEric.Schrock@Sun.COM * the MOS config, not necessarily from the userland config. 209010672SEric.Schrock@Sun.COM */ 209110672SEric.Schrock@Sun.COM if (state != SPA_LOAD_IMPORT) { 209210672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 209310672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 209410672SEric.Schrock@Sun.COM } 209510672SEric.Schrock@Sun.COM } 20964451Seschrock 20974451Seschrock /* 20981986Seschrock * Load the vdev state for all toplevel vdevs. 2099789Sahrens */ 21001986Seschrock vdev_load(rvd); 2101789Sahrens 2102789Sahrens /* 2103789Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 2104789Sahrens */ 21057754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2106789Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 21077754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2108789Sahrens 2109789Sahrens /* 211010922SJeff.Bonwick@Sun.COM * Load the DDTs (dedup tables). 211110922SJeff.Bonwick@Sun.COM */ 211210922SJeff.Bonwick@Sun.COM error = ddt_load(spa); 211311422SMark.Musante@Sun.COM if (error != 0) 211411422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 211510922SJeff.Bonwick@Sun.COM 211610956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 211710956SGeorge.Wilson@Sun.COM 211810922SJeff.Bonwick@Sun.COM /* 211912949SGeorge.Wilson@Sun.COM * Validate the config, using the MOS config to fill in any 212012949SGeorge.Wilson@Sun.COM * information which might be missing. If we fail to validate 212112949SGeorge.Wilson@Sun.COM * the config then declare the pool unfit for use. If we're 212212949SGeorge.Wilson@Sun.COM * assembling a pool from a split, the log is not transferred 212312949SGeorge.Wilson@Sun.COM * over. 212410922SJeff.Bonwick@Sun.COM */ 212511422SMark.Musante@Sun.COM if (type != SPA_IMPORT_ASSEMBLE) { 212611810SMark.Musante@Sun.COM nvlist_t *nvconfig; 212711810SMark.Musante@Sun.COM 212811810SMark.Musante@Sun.COM if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) 212911810SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 213011810SMark.Musante@Sun.COM 213112949SGeorge.Wilson@Sun.COM if (!spa_config_valid(spa, nvconfig)) { 213212949SGeorge.Wilson@Sun.COM nvlist_free(nvconfig); 213312949SGeorge.Wilson@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, 213412949SGeorge.Wilson@Sun.COM ENXIO)); 213512949SGeorge.Wilson@Sun.COM } 213611422SMark.Musante@Sun.COM nvlist_free(nvconfig); 213711422SMark.Musante@Sun.COM 213812949SGeorge.Wilson@Sun.COM /* 213912949SGeorge.Wilson@Sun.COM * Now that we've validate the config, check the state of the 214012949SGeorge.Wilson@Sun.COM * root vdev. If it can't be opened, it indicates one or 214112949SGeorge.Wilson@Sun.COM * more toplevel vdevs are faulted. 214212949SGeorge.Wilson@Sun.COM */ 214312949SGeorge.Wilson@Sun.COM if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) 214412949SGeorge.Wilson@Sun.COM return (ENXIO); 214512949SGeorge.Wilson@Sun.COM 214611422SMark.Musante@Sun.COM if (spa_check_logs(spa)) { 214711422SMark.Musante@Sun.COM *ereport = FM_EREPORT_ZFS_LOG_REPLAY; 214811422SMark.Musante@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO)); 214911422SMark.Musante@Sun.COM } 215010922SJeff.Bonwick@Sun.COM } 215110922SJeff.Bonwick@Sun.COM 215212949SGeorge.Wilson@Sun.COM /* 215312949SGeorge.Wilson@Sun.COM * We've successfully opened the pool, verify that we're ready 215412949SGeorge.Wilson@Sun.COM * to start pushing transactions. 215512949SGeorge.Wilson@Sun.COM */ 215612949SGeorge.Wilson@Sun.COM if (state != SPA_LOAD_TRYIMPORT) { 215712949SGeorge.Wilson@Sun.COM if (error = spa_load_verify(spa)) 215812949SGeorge.Wilson@Sun.COM return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, 215912949SGeorge.Wilson@Sun.COM error)); 216012949SGeorge.Wilson@Sun.COM } 216112949SGeorge.Wilson@Sun.COM 216210921STim.Haley@Sun.COM if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 216310921STim.Haley@Sun.COM spa->spa_load_max_txg == UINT64_MAX)) { 21641635Sbonwick dmu_tx_t *tx; 21651635Sbonwick int need_update = B_FALSE; 21668241SJeff.Bonwick@Sun.COM 21678241SJeff.Bonwick@Sun.COM ASSERT(state != SPA_LOAD_TRYIMPORT); 21681601Sbonwick 21691635Sbonwick /* 21701635Sbonwick * Claim log blocks that haven't been committed yet. 21711635Sbonwick * This must all happen in a single txg. 217210922SJeff.Bonwick@Sun.COM * Note: spa_claim_max_txg is updated by spa_claim_notify(), 217310922SJeff.Bonwick@Sun.COM * invoked from zil_claim_log_block()'s i/o done callback. 217410921STim.Haley@Sun.COM * Price of rollback is that we abandon the log. 21751635Sbonwick */ 217610922SJeff.Bonwick@Sun.COM spa->spa_claiming = B_TRUE; 217710922SJeff.Bonwick@Sun.COM 21781601Sbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 2179789Sahrens spa_first_txg(spa)); 21807754SJeff.Bonwick@Sun.COM (void) dmu_objset_find(spa_name(spa), 21812417Sahrens zil_claim, tx, DS_FIND_CHILDREN); 2182789Sahrens dmu_tx_commit(tx); 2183789Sahrens 218410922SJeff.Bonwick@Sun.COM spa->spa_claiming = B_FALSE; 218510922SJeff.Bonwick@Sun.COM 218611422SMark.Musante@Sun.COM spa_set_log_state(spa, SPA_LOG_GOOD); 2187789Sahrens spa->spa_sync_on = B_TRUE; 2188789Sahrens txg_sync_start(spa->spa_dsl_pool); 2189789Sahrens 2190789Sahrens /* 219110922SJeff.Bonwick@Sun.COM * Wait for all claims to sync. We sync up to the highest 219210922SJeff.Bonwick@Sun.COM * claimed log block birth time so that claimed log blocks 219310922SJeff.Bonwick@Sun.COM * don't appear to be from the future. spa_claim_max_txg 219410922SJeff.Bonwick@Sun.COM * will have been set for us by either zil_check_log_chain() 219510922SJeff.Bonwick@Sun.COM * (invoked from spa_check_logs()) or zil_claim() above. 2196789Sahrens */ 219710922SJeff.Bonwick@Sun.COM txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 21981585Sbonwick 21991585Sbonwick /* 22001635Sbonwick * If the config cache is stale, or we have uninitialized 22011635Sbonwick * metaslabs (see spa_vdev_add()), then update the config. 220210100SLin.Ling@Sun.COM * 220312949SGeorge.Wilson@Sun.COM * If this is a verbatim import, trust the current 220410100SLin.Ling@Sun.COM * in-core spa_config and update the disk labels. 22051585Sbonwick */ 22061635Sbonwick if (config_cache_txg != spa->spa_config_txg || 220712949SGeorge.Wilson@Sun.COM state == SPA_LOAD_IMPORT || 220812949SGeorge.Wilson@Sun.COM state == SPA_LOAD_RECOVER || 220912949SGeorge.Wilson@Sun.COM (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) 22101635Sbonwick need_update = B_TRUE; 22111635Sbonwick 22128241SJeff.Bonwick@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) 22131635Sbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 22141635Sbonwick need_update = B_TRUE; 22151585Sbonwick 22161585Sbonwick /* 22171635Sbonwick * Update the config cache asychronously in case we're the 22181635Sbonwick * root pool, in which case the config cache isn't writable yet. 22191585Sbonwick */ 22201635Sbonwick if (need_update) 22211635Sbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 22228241SJeff.Bonwick@Sun.COM 22238241SJeff.Bonwick@Sun.COM /* 22248241SJeff.Bonwick@Sun.COM * Check all DTLs to see if anything needs resilvering. 22258241SJeff.Bonwick@Sun.COM */ 222612296SLin.Ling@Sun.COM if (!dsl_scan_resilvering(spa->spa_dsl_pool) && 222712296SLin.Ling@Sun.COM vdev_resilver_needed(rvd, NULL, NULL)) 22288241SJeff.Bonwick@Sun.COM spa_async_request(spa, SPA_ASYNC_RESILVER); 222910298SMatthew.Ahrens@Sun.COM 223010298SMatthew.Ahrens@Sun.COM /* 223110298SMatthew.Ahrens@Sun.COM * Delete any inconsistent datasets. 223210298SMatthew.Ahrens@Sun.COM */ 223310298SMatthew.Ahrens@Sun.COM (void) dmu_objset_find(spa_name(spa), 223410298SMatthew.Ahrens@Sun.COM dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 223510342Schris.kirby@sun.com 223610342Schris.kirby@sun.com /* 223710342Schris.kirby@sun.com * Clean up any stale temporary dataset userrefs. 223810342Schris.kirby@sun.com */ 223910342Schris.kirby@sun.com dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 2240789Sahrens } 2241789Sahrens 224211422SMark.Musante@Sun.COM return (0); 2243789Sahrens } 2244789Sahrens 224510921STim.Haley@Sun.COM static int 224610921STim.Haley@Sun.COM spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 224710921STim.Haley@Sun.COM { 2248*13049SGeorge.Wilson@Sun.COM int mode = spa->spa_mode; 2249*13049SGeorge.Wilson@Sun.COM 225010921STim.Haley@Sun.COM spa_unload(spa); 225110921STim.Haley@Sun.COM spa_deactivate(spa); 225210921STim.Haley@Sun.COM 225310921STim.Haley@Sun.COM spa->spa_load_max_txg--; 225410921STim.Haley@Sun.COM 2255*13049SGeorge.Wilson@Sun.COM spa_activate(spa, mode); 225610921STim.Haley@Sun.COM spa_async_suspend(spa); 225710921STim.Haley@Sun.COM 225811422SMark.Musante@Sun.COM return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig)); 225910921STim.Haley@Sun.COM } 226010921STim.Haley@Sun.COM 226110921STim.Haley@Sun.COM static int 226210921STim.Haley@Sun.COM spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 226311727SVictor.Latushkin@Sun.COM uint64_t max_request, int rewind_flags) 226410921STim.Haley@Sun.COM { 226510921STim.Haley@Sun.COM nvlist_t *config = NULL; 226610921STim.Haley@Sun.COM int load_error, rewind_error; 226711727SVictor.Latushkin@Sun.COM uint64_t safe_rewind_txg; 226810921STim.Haley@Sun.COM uint64_t min_txg; 226910921STim.Haley@Sun.COM 227011026STim.Haley@Sun.COM if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 227110921STim.Haley@Sun.COM spa->spa_load_max_txg = spa->spa_load_txg; 227211422SMark.Musante@Sun.COM spa_set_log_state(spa, SPA_LOG_CLEAR); 227311026STim.Haley@Sun.COM } else { 227410921STim.Haley@Sun.COM spa->spa_load_max_txg = max_request; 227511026STim.Haley@Sun.COM } 227610921STim.Haley@Sun.COM 227711422SMark.Musante@Sun.COM load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, 227811422SMark.Musante@Sun.COM mosconfig); 227910921STim.Haley@Sun.COM if (load_error == 0) 228010921STim.Haley@Sun.COM return (0); 228110921STim.Haley@Sun.COM 228210921STim.Haley@Sun.COM if (spa->spa_root_vdev != NULL) 228310921STim.Haley@Sun.COM config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 228410921STim.Haley@Sun.COM 228510921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 228610921STim.Haley@Sun.COM spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 228710921STim.Haley@Sun.COM 228811727SVictor.Latushkin@Sun.COM if (rewind_flags & ZPOOL_NEVER_REWIND) { 228910921STim.Haley@Sun.COM nvlist_free(config); 229010921STim.Haley@Sun.COM return (load_error); 229110921STim.Haley@Sun.COM } 229210921STim.Haley@Sun.COM 229310921STim.Haley@Sun.COM /* Price of rolling back is discarding txgs, including log */ 229410921STim.Haley@Sun.COM if (state == SPA_LOAD_RECOVER) 229511422SMark.Musante@Sun.COM spa_set_log_state(spa, SPA_LOG_CLEAR); 229610921STim.Haley@Sun.COM 229711727SVictor.Latushkin@Sun.COM spa->spa_load_max_txg = spa->spa_last_ubsync_txg; 229811727SVictor.Latushkin@Sun.COM safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; 229911727SVictor.Latushkin@Sun.COM min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? 230011727SVictor.Latushkin@Sun.COM TXG_INITIAL : safe_rewind_txg; 230111727SVictor.Latushkin@Sun.COM 230211727SVictor.Latushkin@Sun.COM /* 230311727SVictor.Latushkin@Sun.COM * Continue as long as we're finding errors, we're still within 230411727SVictor.Latushkin@Sun.COM * the acceptable rewind range, and we're still finding uberblocks 230511727SVictor.Latushkin@Sun.COM */ 230611727SVictor.Latushkin@Sun.COM while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && 230711727SVictor.Latushkin@Sun.COM spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { 230811727SVictor.Latushkin@Sun.COM if (spa->spa_load_max_txg < safe_rewind_txg) 230910921STim.Haley@Sun.COM spa->spa_extreme_rewind = B_TRUE; 231010921STim.Haley@Sun.COM rewind_error = spa_load_retry(spa, state, mosconfig); 231110921STim.Haley@Sun.COM } 231210921STim.Haley@Sun.COM 231310921STim.Haley@Sun.COM spa->spa_extreme_rewind = B_FALSE; 231410921STim.Haley@Sun.COM spa->spa_load_max_txg = UINT64_MAX; 231510921STim.Haley@Sun.COM 231610921STim.Haley@Sun.COM if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 231710921STim.Haley@Sun.COM spa_config_set(spa, config); 231810921STim.Haley@Sun.COM 231910921STim.Haley@Sun.COM return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); 232010921STim.Haley@Sun.COM } 232110921STim.Haley@Sun.COM 2322789Sahrens /* 2323789Sahrens * Pool Open/Import 2324789Sahrens * 2325789Sahrens * The import case is identical to an open except that the configuration is sent 2326789Sahrens * down from userland, instead of grabbed from the configuration cache. For the 2327789Sahrens * case of an open, the pool configuration will exist in the 23284451Seschrock * POOL_STATE_UNINITIALIZED state. 2329789Sahrens * 2330789Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 2331789Sahrens * the same time open the pool, without having to keep around the spa_t in some 2332789Sahrens * ambiguous state. 2333789Sahrens */ 2334789Sahrens static int 233510921STim.Haley@Sun.COM spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 233610921STim.Haley@Sun.COM nvlist_t **config) 2337789Sahrens { 2338789Sahrens spa_t *spa; 233912949SGeorge.Wilson@Sun.COM spa_load_state_t state = SPA_LOAD_OPEN; 2340789Sahrens int error; 2341789Sahrens int locked = B_FALSE; 2342789Sahrens 2343789Sahrens *spapp = NULL; 2344789Sahrens 2345789Sahrens /* 2346789Sahrens * As disgusting as this is, we need to support recursive calls to this 2347789Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 2348789Sahrens * up calling spa_open() again. The real fix is to figure out how to 2349789Sahrens * avoid dsl_dir_open() calling this in the first place. 2350789Sahrens */ 2351789Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 2352789Sahrens mutex_enter(&spa_namespace_lock); 2353789Sahrens locked = B_TRUE; 2354789Sahrens } 2355789Sahrens 2356789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2357789Sahrens if (locked) 2358789Sahrens mutex_exit(&spa_namespace_lock); 2359789Sahrens return (ENOENT); 2360789Sahrens } 236110921STim.Haley@Sun.COM 2362789Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 236311819STim.Haley@Sun.COM zpool_rewind_policy_t policy; 236411819STim.Haley@Sun.COM 236511819STim.Haley@Sun.COM zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config, 236611819STim.Haley@Sun.COM &policy); 236711819STim.Haley@Sun.COM if (policy.zrp_request & ZPOOL_DO_REWIND) 236811819STim.Haley@Sun.COM state = SPA_LOAD_RECOVER; 2369789Sahrens 23708241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2371789Sahrens 237210921STim.Haley@Sun.COM if (state != SPA_LOAD_RECOVER) 237310921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 237410921STim.Haley@Sun.COM 237510921STim.Haley@Sun.COM error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 237611727SVictor.Latushkin@Sun.COM policy.zrp_request); 2377789Sahrens 2378789Sahrens if (error == EBADF) { 2379789Sahrens /* 23801986Seschrock * If vdev_validate() returns failure (indicated by 23811986Seschrock * EBADF), it indicates that one of the vdevs indicates 23821986Seschrock * that the pool has been exported or destroyed. If 23831986Seschrock * this is the case, the config cache is out of sync and 23841986Seschrock * we should remove the pool from the namespace. 2385789Sahrens */ 2386789Sahrens spa_unload(spa); 2387789Sahrens spa_deactivate(spa); 23886643Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 2389789Sahrens spa_remove(spa); 2390789Sahrens if (locked) 2391789Sahrens mutex_exit(&spa_namespace_lock); 2392789Sahrens return (ENOENT); 23931544Seschrock } 23941544Seschrock 23951544Seschrock if (error) { 2396789Sahrens /* 2397789Sahrens * We can't open the pool, but we still have useful 2398789Sahrens * information: the state of each vdev after the 2399789Sahrens * attempted vdev_open(). Return this to the user. 2400789Sahrens */ 240112949SGeorge.Wilson@Sun.COM if (config != NULL && spa->spa_config) { 240210921STim.Haley@Sun.COM VERIFY(nvlist_dup(spa->spa_config, config, 240310921STim.Haley@Sun.COM KM_SLEEP) == 0); 240412949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_nvlist(*config, 240512949SGeorge.Wilson@Sun.COM ZPOOL_CONFIG_LOAD_INFO, 240612949SGeorge.Wilson@Sun.COM spa->spa_load_info) == 0); 240712949SGeorge.Wilson@Sun.COM } 2408789Sahrens spa_unload(spa); 2409789Sahrens spa_deactivate(spa); 241010921STim.Haley@Sun.COM spa->spa_last_open_failed = error; 2411789Sahrens if (locked) 2412789Sahrens mutex_exit(&spa_namespace_lock); 2413789Sahrens *spapp = NULL; 2414789Sahrens return (error); 2415789Sahrens } 2416789Sahrens } 2417789Sahrens 2418789Sahrens spa_open_ref(spa, tag); 24194451Seschrock 242010921STim.Haley@Sun.COM if (config != NULL) 242110921STim.Haley@Sun.COM *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 242210921STim.Haley@Sun.COM 242312949SGeorge.Wilson@Sun.COM /* 242412949SGeorge.Wilson@Sun.COM * If we've recovered the pool, pass back any information we 242512949SGeorge.Wilson@Sun.COM * gathered while doing the load. 242612949SGeorge.Wilson@Sun.COM */ 242712949SGeorge.Wilson@Sun.COM if (state == SPA_LOAD_RECOVER) { 242812949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, 242912949SGeorge.Wilson@Sun.COM spa->spa_load_info) == 0); 243012949SGeorge.Wilson@Sun.COM } 243112949SGeorge.Wilson@Sun.COM 243211026STim.Haley@Sun.COM if (locked) { 243311026STim.Haley@Sun.COM spa->spa_last_open_failed = 0; 243411026STim.Haley@Sun.COM spa->spa_last_ubsync_txg = 0; 243511026STim.Haley@Sun.COM spa->spa_load_txg = 0; 2436789Sahrens mutex_exit(&spa_namespace_lock); 243711026STim.Haley@Sun.COM } 2438789Sahrens 2439789Sahrens *spapp = spa; 2440789Sahrens 2441789Sahrens return (0); 2442789Sahrens } 2443789Sahrens 2444789Sahrens int 244510921STim.Haley@Sun.COM spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 244610921STim.Haley@Sun.COM nvlist_t **config) 244710921STim.Haley@Sun.COM { 244810921STim.Haley@Sun.COM return (spa_open_common(name, spapp, tag, policy, config)); 244910921STim.Haley@Sun.COM } 245010921STim.Haley@Sun.COM 245110921STim.Haley@Sun.COM int 2452789Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 2453789Sahrens { 245410921STim.Haley@Sun.COM return (spa_open_common(name, spapp, tag, NULL, NULL)); 2455789Sahrens } 2456789Sahrens 24571544Seschrock /* 24581544Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 24591544Seschrock * preventing it from being exported or destroyed. 24601544Seschrock */ 24611544Seschrock spa_t * 24621544Seschrock spa_inject_addref(char *name) 24631544Seschrock { 24641544Seschrock spa_t *spa; 24651544Seschrock 24661544Seschrock mutex_enter(&spa_namespace_lock); 24671544Seschrock if ((spa = spa_lookup(name)) == NULL) { 24681544Seschrock mutex_exit(&spa_namespace_lock); 24691544Seschrock return (NULL); 24701544Seschrock } 24711544Seschrock spa->spa_inject_ref++; 24721544Seschrock mutex_exit(&spa_namespace_lock); 24731544Seschrock 24741544Seschrock return (spa); 24751544Seschrock } 24761544Seschrock 24771544Seschrock void 24781544Seschrock spa_inject_delref(spa_t *spa) 24791544Seschrock { 24801544Seschrock mutex_enter(&spa_namespace_lock); 24811544Seschrock spa->spa_inject_ref--; 24821544Seschrock mutex_exit(&spa_namespace_lock); 24831544Seschrock } 24841544Seschrock 24855450Sbrendan /* 24865450Sbrendan * Add spares device information to the nvlist. 24875450Sbrendan */ 24882082Seschrock static void 24892082Seschrock spa_add_spares(spa_t *spa, nvlist_t *config) 24902082Seschrock { 24912082Seschrock nvlist_t **spares; 24922082Seschrock uint_t i, nspares; 24932082Seschrock nvlist_t *nvroot; 24942082Seschrock uint64_t guid; 24952082Seschrock vdev_stat_t *vs; 24962082Seschrock uint_t vsc; 24973377Seschrock uint64_t pool; 24982082Seschrock 24999425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 25009425SEric.Schrock@Sun.COM 25015450Sbrendan if (spa->spa_spares.sav_count == 0) 25022082Seschrock return; 25032082Seschrock 25042082Seschrock VERIFY(nvlist_lookup_nvlist(config, 25052082Seschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 25065450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 25072082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 25082082Seschrock if (nspares != 0) { 25092082Seschrock VERIFY(nvlist_add_nvlist_array(nvroot, 25102082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 25112082Seschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 25122082Seschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 25132082Seschrock 25142082Seschrock /* 25152082Seschrock * Go through and find any spares which have since been 25162082Seschrock * repurposed as an active spare. If this is the case, update 25172082Seschrock * their status appropriately. 25182082Seschrock */ 25192082Seschrock for (i = 0; i < nspares; i++) { 25202082Seschrock VERIFY(nvlist_lookup_uint64(spares[i], 25212082Seschrock ZPOOL_CONFIG_GUID, &guid) == 0); 25227214Slling if (spa_spare_exists(guid, &pool, NULL) && 25237214Slling pool != 0ULL) { 25242082Seschrock VERIFY(nvlist_lookup_uint64_array( 252512296SLin.Ling@Sun.COM spares[i], ZPOOL_CONFIG_VDEV_STATS, 25262082Seschrock (uint64_t **)&vs, &vsc) == 0); 25272082Seschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 25282082Seschrock vs->vs_aux = VDEV_AUX_SPARED; 25292082Seschrock } 25302082Seschrock } 25312082Seschrock } 25322082Seschrock } 25332082Seschrock 25345450Sbrendan /* 25355450Sbrendan * Add l2cache device information to the nvlist, including vdev stats. 25365450Sbrendan */ 25375450Sbrendan static void 25385450Sbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 25395450Sbrendan { 25405450Sbrendan nvlist_t **l2cache; 25415450Sbrendan uint_t i, j, nl2cache; 25425450Sbrendan nvlist_t *nvroot; 25435450Sbrendan uint64_t guid; 25445450Sbrendan vdev_t *vd; 25455450Sbrendan vdev_stat_t *vs; 25465450Sbrendan uint_t vsc; 25475450Sbrendan 25489425SEric.Schrock@Sun.COM ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 25499425SEric.Schrock@Sun.COM 25505450Sbrendan if (spa->spa_l2cache.sav_count == 0) 25515450Sbrendan return; 25525450Sbrendan 25535450Sbrendan VERIFY(nvlist_lookup_nvlist(config, 25545450Sbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 25555450Sbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 25565450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 25575450Sbrendan if (nl2cache != 0) { 25585450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 25595450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 25605450Sbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 25615450Sbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 25625450Sbrendan 25635450Sbrendan /* 25645450Sbrendan * Update level 2 cache device stats. 25655450Sbrendan */ 25665450Sbrendan 25675450Sbrendan for (i = 0; i < nl2cache; i++) { 25685450Sbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 25695450Sbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 25705450Sbrendan 25715450Sbrendan vd = NULL; 25725450Sbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 25735450Sbrendan if (guid == 25745450Sbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 25755450Sbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 25765450Sbrendan break; 25775450Sbrendan } 25785450Sbrendan } 25795450Sbrendan ASSERT(vd != NULL); 25805450Sbrendan 25815450Sbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 258212296SLin.Ling@Sun.COM ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) 258312296SLin.Ling@Sun.COM == 0); 25845450Sbrendan vdev_get_stats(vd, vs); 25855450Sbrendan } 25865450Sbrendan } 25875450Sbrendan } 25885450Sbrendan 2589789Sahrens int 25901544Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 2591789Sahrens { 2592789Sahrens int error; 2593789Sahrens spa_t *spa; 2594789Sahrens 2595789Sahrens *config = NULL; 259610921STim.Haley@Sun.COM error = spa_open_common(name, &spa, FTAG, NULL, config); 2597789Sahrens 25989425SEric.Schrock@Sun.COM if (spa != NULL) { 25999425SEric.Schrock@Sun.COM /* 26009425SEric.Schrock@Sun.COM * This still leaves a window of inconsistency where the spares 26019425SEric.Schrock@Sun.COM * or l2cache devices could change and the config would be 26029425SEric.Schrock@Sun.COM * self-inconsistent. 26039425SEric.Schrock@Sun.COM */ 26049425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 26059425SEric.Schrock@Sun.COM 26069425SEric.Schrock@Sun.COM if (*config != NULL) { 260712817STim.Haley@Sun.COM uint64_t loadtimes[2]; 260812817STim.Haley@Sun.COM 260912817STim.Haley@Sun.COM loadtimes[0] = spa->spa_loaded_ts.tv_sec; 261012817STim.Haley@Sun.COM loadtimes[1] = spa->spa_loaded_ts.tv_nsec; 261112817STim.Haley@Sun.COM VERIFY(nvlist_add_uint64_array(*config, 261212817STim.Haley@Sun.COM ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); 261312817STim.Haley@Sun.COM 26147754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_uint64(*config, 26159425SEric.Schrock@Sun.COM ZPOOL_CONFIG_ERRCOUNT, 26169425SEric.Schrock@Sun.COM spa_get_errlog_size(spa)) == 0); 26179425SEric.Schrock@Sun.COM 26189425SEric.Schrock@Sun.COM if (spa_suspended(spa)) 26199425SEric.Schrock@Sun.COM VERIFY(nvlist_add_uint64(*config, 26209425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SUSPENDED, 26219425SEric.Schrock@Sun.COM spa->spa_failmode) == 0); 26229425SEric.Schrock@Sun.COM 26239425SEric.Schrock@Sun.COM spa_add_spares(spa, *config); 26249425SEric.Schrock@Sun.COM spa_add_l2cache(spa, *config); 26259425SEric.Schrock@Sun.COM } 26262082Seschrock } 26272082Seschrock 26281544Seschrock /* 26291544Seschrock * We want to get the alternate root even for faulted pools, so we cheat 26301544Seschrock * and call spa_lookup() directly. 26311544Seschrock */ 26321544Seschrock if (altroot) { 26331544Seschrock if (spa == NULL) { 26341544Seschrock mutex_enter(&spa_namespace_lock); 26351544Seschrock spa = spa_lookup(name); 26361544Seschrock if (spa) 26371544Seschrock spa_altroot(spa, altroot, buflen); 26381544Seschrock else 26391544Seschrock altroot[0] = '\0'; 26401544Seschrock spa = NULL; 26411544Seschrock mutex_exit(&spa_namespace_lock); 26421544Seschrock } else { 26431544Seschrock spa_altroot(spa, altroot, buflen); 26441544Seschrock } 26451544Seschrock } 26461544Seschrock 26479425SEric.Schrock@Sun.COM if (spa != NULL) { 26489425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 2649789Sahrens spa_close(spa, FTAG); 26509425SEric.Schrock@Sun.COM } 2651789Sahrens 2652789Sahrens return (error); 2653789Sahrens } 2654789Sahrens 2655789Sahrens /* 26565450Sbrendan * Validate that the auxiliary device array is well formed. We must have an 26575450Sbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 26585450Sbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 26595450Sbrendan * specified, as long as they are well-formed. 26602082Seschrock */ 26612082Seschrock static int 26625450Sbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 26635450Sbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 26645450Sbrendan vdev_labeltype_t label) 26652082Seschrock { 26665450Sbrendan nvlist_t **dev; 26675450Sbrendan uint_t i, ndev; 26682082Seschrock vdev_t *vd; 26692082Seschrock int error; 26702082Seschrock 26717754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 26727754SJeff.Bonwick@Sun.COM 26732082Seschrock /* 26745450Sbrendan * It's acceptable to have no devs specified. 26752082Seschrock */ 26765450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 26772082Seschrock return (0); 26782082Seschrock 26795450Sbrendan if (ndev == 0) 26802082Seschrock return (EINVAL); 26812082Seschrock 26822082Seschrock /* 26835450Sbrendan * Make sure the pool is formatted with a version that supports this 26845450Sbrendan * device type. 26852082Seschrock */ 26865450Sbrendan if (spa_version(spa) < version) 26872082Seschrock return (ENOTSUP); 26882082Seschrock 26893377Seschrock /* 26905450Sbrendan * Set the pending device list so we correctly handle device in-use 26913377Seschrock * checking. 26923377Seschrock */ 26935450Sbrendan sav->sav_pending = dev; 26945450Sbrendan sav->sav_npending = ndev; 26955450Sbrendan 26965450Sbrendan for (i = 0; i < ndev; i++) { 26975450Sbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 26982082Seschrock mode)) != 0) 26993377Seschrock goto out; 27002082Seschrock 27012082Seschrock if (!vd->vdev_ops->vdev_op_leaf) { 27022082Seschrock vdev_free(vd); 27033377Seschrock error = EINVAL; 27043377Seschrock goto out; 27052082Seschrock } 27062082Seschrock 27075450Sbrendan /* 27087754SJeff.Bonwick@Sun.COM * The L2ARC currently only supports disk devices in 27097754SJeff.Bonwick@Sun.COM * kernel context. For user-level testing, we allow it. 27105450Sbrendan */ 27117754SJeff.Bonwick@Sun.COM #ifdef _KERNEL 27125450Sbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 27135450Sbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 27145450Sbrendan error = ENOTBLK; 27155450Sbrendan goto out; 27165450Sbrendan } 27177754SJeff.Bonwick@Sun.COM #endif 27182082Seschrock vd->vdev_top = vd; 27193377Seschrock 27203377Seschrock if ((error = vdev_open(vd)) == 0 && 27215450Sbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 27225450Sbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 27233377Seschrock vd->vdev_guid) == 0); 27242082Seschrock } 27252082Seschrock 27262082Seschrock vdev_free(vd); 27273377Seschrock 27285450Sbrendan if (error && 27295450Sbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 27303377Seschrock goto out; 27313377Seschrock else 27323377Seschrock error = 0; 27332082Seschrock } 27342082Seschrock 27353377Seschrock out: 27365450Sbrendan sav->sav_pending = NULL; 27375450Sbrendan sav->sav_npending = 0; 27383377Seschrock return (error); 27392082Seschrock } 27402082Seschrock 27415450Sbrendan static int 27425450Sbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 27435450Sbrendan { 27445450Sbrendan int error; 27455450Sbrendan 27467754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 27477754SJeff.Bonwick@Sun.COM 27485450Sbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 27495450Sbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 27505450Sbrendan VDEV_LABEL_SPARE)) != 0) { 27515450Sbrendan return (error); 27525450Sbrendan } 27535450Sbrendan 27545450Sbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 27555450Sbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 27565450Sbrendan VDEV_LABEL_L2CACHE)); 27575450Sbrendan } 27585450Sbrendan 27595450Sbrendan static void 27605450Sbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 27615450Sbrendan const char *config) 27625450Sbrendan { 27635450Sbrendan int i; 27645450Sbrendan 27655450Sbrendan if (sav->sav_config != NULL) { 27665450Sbrendan nvlist_t **olddevs; 27675450Sbrendan uint_t oldndevs; 27685450Sbrendan nvlist_t **newdevs; 27695450Sbrendan 27705450Sbrendan /* 27715450Sbrendan * Generate new dev list by concatentating with the 27725450Sbrendan * current dev list. 27735450Sbrendan */ 27745450Sbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 27755450Sbrendan &olddevs, &oldndevs) == 0); 27765450Sbrendan 27775450Sbrendan newdevs = kmem_alloc(sizeof (void *) * 27785450Sbrendan (ndevs + oldndevs), KM_SLEEP); 27795450Sbrendan for (i = 0; i < oldndevs; i++) 27805450Sbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 27815450Sbrendan KM_SLEEP) == 0); 27825450Sbrendan for (i = 0; i < ndevs; i++) 27835450Sbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 27845450Sbrendan KM_SLEEP) == 0); 27855450Sbrendan 27865450Sbrendan VERIFY(nvlist_remove(sav->sav_config, config, 27875450Sbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 27885450Sbrendan 27895450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 27905450Sbrendan config, newdevs, ndevs + oldndevs) == 0); 27915450Sbrendan for (i = 0; i < oldndevs + ndevs; i++) 27925450Sbrendan nvlist_free(newdevs[i]); 27935450Sbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 27945450Sbrendan } else { 27955450Sbrendan /* 27965450Sbrendan * Generate a new dev list. 27975450Sbrendan */ 27985450Sbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 27995450Sbrendan KM_SLEEP) == 0); 28005450Sbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 28015450Sbrendan devs, ndevs) == 0); 28025450Sbrendan } 28035450Sbrendan } 28045450Sbrendan 28055450Sbrendan /* 28065450Sbrendan * Stop and drop level 2 ARC devices 28075450Sbrendan */ 28085450Sbrendan void 28095450Sbrendan spa_l2cache_drop(spa_t *spa) 28105450Sbrendan { 28115450Sbrendan vdev_t *vd; 28125450Sbrendan int i; 28135450Sbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 28145450Sbrendan 28155450Sbrendan for (i = 0; i < sav->sav_count; i++) { 28165450Sbrendan uint64_t pool; 28175450Sbrendan 28185450Sbrendan vd = sav->sav_vdevs[i]; 28195450Sbrendan ASSERT(vd != NULL); 28205450Sbrendan 28218241SJeff.Bonwick@Sun.COM if (spa_l2cache_exists(vd->vdev_guid, &pool) && 28228241SJeff.Bonwick@Sun.COM pool != 0ULL && l2arc_vdev_present(vd)) 28235450Sbrendan l2arc_remove_vdev(vd); 28245450Sbrendan if (vd->vdev_isl2cache) 28255450Sbrendan spa_l2cache_remove(vd); 28265450Sbrendan vdev_clear_stats(vd); 28275450Sbrendan (void) vdev_close(vd); 28285450Sbrendan } 28295450Sbrendan } 28305450Sbrendan 28312082Seschrock /* 2832789Sahrens * Pool Creation 2833789Sahrens */ 2834789Sahrens int 28355094Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 28367184Stimh const char *history_str, nvlist_t *zplprops) 2837789Sahrens { 2838789Sahrens spa_t *spa; 28395094Slling char *altroot = NULL; 28401635Sbonwick vdev_t *rvd; 2841789Sahrens dsl_pool_t *dp; 2842789Sahrens dmu_tx_t *tx; 28439816SGeorge.Wilson@Sun.COM int error = 0; 2844789Sahrens uint64_t txg = TXG_INITIAL; 28455450Sbrendan nvlist_t **spares, **l2cache; 28465450Sbrendan uint_t nspares, nl2cache; 284712470SMatthew.Ahrens@Sun.COM uint64_t version, obj; 2848789Sahrens 2849789Sahrens /* 2850789Sahrens * If this pool already exists, return failure. 2851789Sahrens */ 2852789Sahrens mutex_enter(&spa_namespace_lock); 2853789Sahrens if (spa_lookup(pool) != NULL) { 2854789Sahrens mutex_exit(&spa_namespace_lock); 2855789Sahrens return (EEXIST); 2856789Sahrens } 2857789Sahrens 2858789Sahrens /* 2859789Sahrens * Allocate a new spa_t structure. 2860789Sahrens */ 28615094Slling (void) nvlist_lookup_string(props, 28625094Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 286310921STim.Haley@Sun.COM spa = spa_add(pool, NULL, altroot); 28648241SJeff.Bonwick@Sun.COM spa_activate(spa, spa_mode_global); 2865789Sahrens 28665094Slling if (props && (error = spa_prop_validate(spa, props))) { 28675094Slling spa_deactivate(spa); 28685094Slling spa_remove(spa); 28696643Seschrock mutex_exit(&spa_namespace_lock); 28705094Slling return (error); 28715094Slling } 28725094Slling 28735094Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 28745094Slling &version) != 0) 28755094Slling version = SPA_VERSION; 28765094Slling ASSERT(version <= SPA_VERSION); 287710922SJeff.Bonwick@Sun.COM 287810922SJeff.Bonwick@Sun.COM spa->spa_first_txg = txg; 287910922SJeff.Bonwick@Sun.COM spa->spa_uberblock.ub_txg = txg - 1; 28805094Slling spa->spa_uberblock.ub_version = version; 2881789Sahrens spa->spa_ubsync = spa->spa_uberblock; 2882789Sahrens 28831635Sbonwick /* 28849234SGeorge.Wilson@Sun.COM * Create "The Godfather" zio to hold all async IOs 28859234SGeorge.Wilson@Sun.COM */ 28869630SJeff.Bonwick@Sun.COM spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 28879630SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 28889234SGeorge.Wilson@Sun.COM 28899234SGeorge.Wilson@Sun.COM /* 28901635Sbonwick * Create the root vdev. 28911635Sbonwick */ 28927754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28931635Sbonwick 28942082Seschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 28952082Seschrock 28962082Seschrock ASSERT(error != 0 || rvd != NULL); 28972082Seschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 28982082Seschrock 28995913Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 29001635Sbonwick error = EINVAL; 29012082Seschrock 29022082Seschrock if (error == 0 && 29032082Seschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 29045450Sbrendan (error = spa_validate_aux(spa, nvroot, txg, 29052082Seschrock VDEV_ALLOC_ADD)) == 0) { 29069816SGeorge.Wilson@Sun.COM for (int c = 0; c < rvd->vdev_children; c++) { 29079816SGeorge.Wilson@Sun.COM vdev_metaslab_set_size(rvd->vdev_child[c]); 29089816SGeorge.Wilson@Sun.COM vdev_expand(rvd->vdev_child[c], txg); 29099816SGeorge.Wilson@Sun.COM } 29101635Sbonwick } 29111635Sbonwick 29127754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 2913789Sahrens 29142082Seschrock if (error != 0) { 2915789Sahrens spa_unload(spa); 2916789Sahrens spa_deactivate(spa); 2917789Sahrens spa_remove(spa); 2918789Sahrens mutex_exit(&spa_namespace_lock); 2919789Sahrens return (error); 2920789Sahrens } 2921789Sahrens 29222082Seschrock /* 29232082Seschrock * Get the list of spares, if specified. 29242082Seschrock */ 29252082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 29262082Seschrock &spares, &nspares) == 0) { 29275450Sbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 29282082Seschrock KM_SLEEP) == 0); 29295450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 29302082Seschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 29317754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29322082Seschrock spa_load_spares(spa); 29337754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29345450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 29355450Sbrendan } 29365450Sbrendan 29375450Sbrendan /* 29385450Sbrendan * Get the list of level 2 cache devices, if specified. 29395450Sbrendan */ 29405450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 29415450Sbrendan &l2cache, &nl2cache) == 0) { 29425450Sbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 29435450Sbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 29445450Sbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 29455450Sbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 29467754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29475450Sbrendan spa_load_l2cache(spa); 29487754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 29495450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 29502082Seschrock } 29512082Seschrock 29527184Stimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2953789Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2954789Sahrens 295510956SGeorge.Wilson@Sun.COM /* 295610956SGeorge.Wilson@Sun.COM * Create DDTs (dedup tables). 295710956SGeorge.Wilson@Sun.COM */ 295810956SGeorge.Wilson@Sun.COM ddt_create(spa); 295910956SGeorge.Wilson@Sun.COM 296010956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 296110956SGeorge.Wilson@Sun.COM 2962789Sahrens tx = dmu_tx_create_assigned(dp, txg); 2963789Sahrens 2964789Sahrens /* 2965789Sahrens * Create the pool config object. 2966789Sahrens */ 2967789Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 29687497STim.Haley@Sun.COM DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2969789Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2970789Sahrens 29711544Seschrock if (zap_add(spa->spa_meta_objset, 2972789Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 29731544Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 29741544Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 29751544Seschrock } 2976789Sahrens 297712296SLin.Ling@Sun.COM if (zap_add(spa->spa_meta_objset, 297812296SLin.Ling@Sun.COM DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, 297912296SLin.Ling@Sun.COM sizeof (uint64_t), 1, &version, tx) != 0) { 298012296SLin.Ling@Sun.COM cmn_err(CE_PANIC, "failed to add pool version"); 298112296SLin.Ling@Sun.COM } 298212296SLin.Ling@Sun.COM 29835094Slling /* Newly created pools with the right version are always deflated. */ 29845094Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 29855094Slling spa->spa_deflate = TRUE; 29865094Slling if (zap_add(spa->spa_meta_objset, 29875094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 29885094Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 29895094Slling cmn_err(CE_PANIC, "failed to add deflate"); 29905094Slling } 29912082Seschrock } 29922082Seschrock 2993789Sahrens /* 299412470SMatthew.Ahrens@Sun.COM * Create the deferred-free bpobj. Turn off compression 2995789Sahrens * because sync-to-convergence takes longer if the blocksize 2996789Sahrens * keeps changing. 2997789Sahrens */ 299812470SMatthew.Ahrens@Sun.COM obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); 299912470SMatthew.Ahrens@Sun.COM dmu_object_set_compress(spa->spa_meta_objset, obj, 300012470SMatthew.Ahrens@Sun.COM ZIO_COMPRESS_OFF, tx); 30011544Seschrock if (zap_add(spa->spa_meta_objset, 300212470SMatthew.Ahrens@Sun.COM DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, 300312470SMatthew.Ahrens@Sun.COM sizeof (uint64_t), 1, &obj, tx) != 0) { 300412470SMatthew.Ahrens@Sun.COM cmn_err(CE_PANIC, "failed to add bpobj"); 30051544Seschrock } 300612470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, 300712470SMatthew.Ahrens@Sun.COM spa->spa_meta_objset, obj)); 3008789Sahrens 30092926Sek110237 /* 30102926Sek110237 * Create the pool's history object. 30112926Sek110237 */ 30125094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 30135094Slling spa_history_create_obj(spa, tx); 30145094Slling 30155094Slling /* 30165094Slling * Set pool properties. 30175094Slling */ 30185094Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 30195094Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 30205329Sgw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 30219816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 302210922SJeff.Bonwick@Sun.COM 30238525SEric.Schrock@Sun.COM if (props != NULL) { 30248525SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 302512296SLin.Ling@Sun.COM spa_sync_props(spa, props, tx); 30268525SEric.Schrock@Sun.COM } 30272926Sek110237 3028789Sahrens dmu_tx_commit(tx); 3029789Sahrens 3030789Sahrens spa->spa_sync_on = B_TRUE; 3031789Sahrens txg_sync_start(spa->spa_dsl_pool); 3032789Sahrens 3033789Sahrens /* 3034789Sahrens * We explicitly wait for the first transaction to complete so that our 3035789Sahrens * bean counters are appropriately updated. 3036789Sahrens */ 3037789Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 3038789Sahrens 30396643Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 3040789Sahrens 30415094Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 30424715Sek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 30439946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_CREATE); 30444715Sek110237 30458667SGeorge.Wilson@Sun.COM spa->spa_minref = refcount_count(&spa->spa_refcount); 30468667SGeorge.Wilson@Sun.COM 3047789Sahrens mutex_exit(&spa_namespace_lock); 3048789Sahrens 3049789Sahrens return (0); 3050789Sahrens } 3051789Sahrens 30526423Sgw25295 #ifdef _KERNEL 30536423Sgw25295 /* 30549790SLin.Ling@Sun.COM * Get the root pool information from the root disk, then import the root pool 30559790SLin.Ling@Sun.COM * during the system boot up time. 30566423Sgw25295 */ 30579790SLin.Ling@Sun.COM extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 30589790SLin.Ling@Sun.COM 30599790SLin.Ling@Sun.COM static nvlist_t * 30609790SLin.Ling@Sun.COM spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 30616423Sgw25295 { 30629790SLin.Ling@Sun.COM nvlist_t *config; 30636423Sgw25295 nvlist_t *nvtop, *nvroot; 30646423Sgw25295 uint64_t pgid; 30656423Sgw25295 30669790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 30679790SLin.Ling@Sun.COM return (NULL); 30689790SLin.Ling@Sun.COM 30696423Sgw25295 /* 30706423Sgw25295 * Add this top-level vdev to the child array. 30716423Sgw25295 */ 30729790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 30739790SLin.Ling@Sun.COM &nvtop) == 0); 30749790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 30759790SLin.Ling@Sun.COM &pgid) == 0); 30769790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 30776423Sgw25295 30786423Sgw25295 /* 30796423Sgw25295 * Put this pool's top-level vdevs into a root vdev. 30806423Sgw25295 */ 30816423Sgw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 30829790SLin.Ling@Sun.COM VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 30839790SLin.Ling@Sun.COM VDEV_TYPE_ROOT) == 0); 30846423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 30856423Sgw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 30866423Sgw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 30876423Sgw25295 &nvtop, 1) == 0); 30886423Sgw25295 30896423Sgw25295 /* 30906423Sgw25295 * Replace the existing vdev_tree with the new root vdev in 30916423Sgw25295 * this pool's configuration (remove the old, add the new). 30926423Sgw25295 */ 30936423Sgw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 30946423Sgw25295 nvlist_free(nvroot); 30959790SLin.Ling@Sun.COM return (config); 30966423Sgw25295 } 30976423Sgw25295 30986423Sgw25295 /* 30999790SLin.Ling@Sun.COM * Walk the vdev tree and see if we can find a device with "better" 31009790SLin.Ling@Sun.COM * configuration. A configuration is "better" if the label on that 31019790SLin.Ling@Sun.COM * device has a more recent txg. 31026423Sgw25295 */ 31039790SLin.Ling@Sun.COM static void 31049790SLin.Ling@Sun.COM spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 31057147Staylor { 31069816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 31079790SLin.Ling@Sun.COM spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 31089790SLin.Ling@Sun.COM 31099790SLin.Ling@Sun.COM if (vd->vdev_ops->vdev_op_leaf) { 31109790SLin.Ling@Sun.COM nvlist_t *label; 31119790SLin.Ling@Sun.COM uint64_t label_txg; 31129790SLin.Ling@Sun.COM 31139790SLin.Ling@Sun.COM if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 31149790SLin.Ling@Sun.COM &label) != 0) 31159790SLin.Ling@Sun.COM return; 31169790SLin.Ling@Sun.COM 31179790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 31189790SLin.Ling@Sun.COM &label_txg) == 0); 31199790SLin.Ling@Sun.COM 31209790SLin.Ling@Sun.COM /* 31219790SLin.Ling@Sun.COM * Do we have a better boot device? 31229790SLin.Ling@Sun.COM */ 31239790SLin.Ling@Sun.COM if (label_txg > *txg) { 31249790SLin.Ling@Sun.COM *txg = label_txg; 31259790SLin.Ling@Sun.COM *avd = vd; 31267147Staylor } 31279790SLin.Ling@Sun.COM nvlist_free(label); 31287147Staylor } 31297147Staylor } 31307147Staylor 31316423Sgw25295 /* 31326423Sgw25295 * Import a root pool. 31336423Sgw25295 * 31347147Staylor * For x86. devpath_list will consist of devid and/or physpath name of 31357147Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 31367147Staylor * The GRUB "findroot" command will return the vdev we should boot. 31376423Sgw25295 * 31386423Sgw25295 * For Sparc, devpath_list consists the physpath name of the booting device 31396423Sgw25295 * no matter the rootpool is a single device pool or a mirrored pool. 31406423Sgw25295 * e.g. 31416423Sgw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 31426423Sgw25295 */ 31436423Sgw25295 int 31447147Staylor spa_import_rootpool(char *devpath, char *devid) 31456423Sgw25295 { 31469790SLin.Ling@Sun.COM spa_t *spa; 31479790SLin.Ling@Sun.COM vdev_t *rvd, *bvd, *avd = NULL; 31489790SLin.Ling@Sun.COM nvlist_t *config, *nvtop; 31499790SLin.Ling@Sun.COM uint64_t guid, txg; 31506423Sgw25295 char *pname; 31516423Sgw25295 int error; 31526423Sgw25295 31536423Sgw25295 /* 31549790SLin.Ling@Sun.COM * Read the label from the boot device and generate a configuration. 31556423Sgw25295 */ 315610822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 315710822SJack.Meng@Sun.COM #if defined(_OBP) && defined(_KERNEL) 315810822SJack.Meng@Sun.COM if (config == NULL) { 315910822SJack.Meng@Sun.COM if (strstr(devpath, "/iscsi/ssd") != NULL) { 316010822SJack.Meng@Sun.COM /* iscsi boot */ 316110822SJack.Meng@Sun.COM get_iscsi_bootpath_phy(devpath); 316210822SJack.Meng@Sun.COM config = spa_generate_rootconf(devpath, devid, &guid); 316310822SJack.Meng@Sun.COM } 316410822SJack.Meng@Sun.COM } 316510822SJack.Meng@Sun.COM #endif 316610822SJack.Meng@Sun.COM if (config == NULL) { 31679790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 31689790SLin.Ling@Sun.COM devpath); 31699790SLin.Ling@Sun.COM return (EIO); 31709790SLin.Ling@Sun.COM } 31719790SLin.Ling@Sun.COM 31729790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 31739790SLin.Ling@Sun.COM &pname) == 0); 31749790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 31756423Sgw25295 31769425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 31779425SEric.Schrock@Sun.COM if ((spa = spa_lookup(pname)) != NULL) { 31789425SEric.Schrock@Sun.COM /* 31799425SEric.Schrock@Sun.COM * Remove the existing root pool from the namespace so that we 31809425SEric.Schrock@Sun.COM * can replace it with the correct config we just read in. 31819425SEric.Schrock@Sun.COM */ 31829425SEric.Schrock@Sun.COM spa_remove(spa); 31839425SEric.Schrock@Sun.COM } 31849425SEric.Schrock@Sun.COM 318510921STim.Haley@Sun.COM spa = spa_add(pname, config, NULL); 31869425SEric.Schrock@Sun.COM spa->spa_is_root = B_TRUE; 318712949SGeorge.Wilson@Sun.COM spa->spa_import_flags = ZFS_IMPORT_VERBATIM; 31889790SLin.Ling@Sun.COM 31899790SLin.Ling@Sun.COM /* 31909790SLin.Ling@Sun.COM * Build up a vdev tree based on the boot device's label config. 31919790SLin.Ling@Sun.COM */ 31929790SLin.Ling@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 31939790SLin.Ling@Sun.COM &nvtop) == 0); 31949790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 31959790SLin.Ling@Sun.COM error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 31969790SLin.Ling@Sun.COM VDEV_ALLOC_ROOTPOOL); 31979790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 31989790SLin.Ling@Sun.COM if (error) { 31999790SLin.Ling@Sun.COM mutex_exit(&spa_namespace_lock); 32009790SLin.Ling@Sun.COM nvlist_free(config); 32019790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 32029790SLin.Ling@Sun.COM pname); 32039790SLin.Ling@Sun.COM return (error); 32049790SLin.Ling@Sun.COM } 32059790SLin.Ling@Sun.COM 32069790SLin.Ling@Sun.COM /* 32079790SLin.Ling@Sun.COM * Get the boot vdev. 32089790SLin.Ling@Sun.COM */ 32099790SLin.Ling@Sun.COM if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 32109790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 32119790SLin.Ling@Sun.COM (u_longlong_t)guid); 32129790SLin.Ling@Sun.COM error = ENOENT; 32139790SLin.Ling@Sun.COM goto out; 32149790SLin.Ling@Sun.COM } 32159790SLin.Ling@Sun.COM 32169790SLin.Ling@Sun.COM /* 32179790SLin.Ling@Sun.COM * Determine if there is a better boot device. 32189790SLin.Ling@Sun.COM */ 32199790SLin.Ling@Sun.COM avd = bvd; 32209790SLin.Ling@Sun.COM spa_alt_rootvdev(rvd, &avd, &txg); 32219790SLin.Ling@Sun.COM if (avd != bvd) { 32229790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 32239790SLin.Ling@Sun.COM "try booting from '%s'", avd->vdev_path); 32249790SLin.Ling@Sun.COM error = EINVAL; 32259790SLin.Ling@Sun.COM goto out; 32269790SLin.Ling@Sun.COM } 32279790SLin.Ling@Sun.COM 32289790SLin.Ling@Sun.COM /* 32299790SLin.Ling@Sun.COM * If the boot device is part of a spare vdev then ensure that 32309790SLin.Ling@Sun.COM * we're booting off the active spare. 32319790SLin.Ling@Sun.COM */ 32329790SLin.Ling@Sun.COM if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 32339790SLin.Ling@Sun.COM !bvd->vdev_isspare) { 32349790SLin.Ling@Sun.COM cmn_err(CE_NOTE, "The boot device is currently spared. Please " 32359790SLin.Ling@Sun.COM "try booting from '%s'", 323613037SMark.Musante@Sun.COM bvd->vdev_parent-> 323713037SMark.Musante@Sun.COM vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); 32389790SLin.Ling@Sun.COM error = EINVAL; 32399790SLin.Ling@Sun.COM goto out; 32409790SLin.Ling@Sun.COM } 32419790SLin.Ling@Sun.COM 32429790SLin.Ling@Sun.COM error = 0; 32439946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 32449790SLin.Ling@Sun.COM out: 32459790SLin.Ling@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 32469790SLin.Ling@Sun.COM vdev_free(rvd); 32479790SLin.Ling@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 32489425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 32496423Sgw25295 32509790SLin.Ling@Sun.COM nvlist_free(config); 32516423Sgw25295 return (error); 32526423Sgw25295 } 32539790SLin.Ling@Sun.COM 32546423Sgw25295 #endif 32556423Sgw25295 32566423Sgw25295 /* 32576423Sgw25295 * Import a non-root pool into the system. 32586423Sgw25295 */ 32596423Sgw25295 int 326012949SGeorge.Wilson@Sun.COM spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) 32616423Sgw25295 { 32629425SEric.Schrock@Sun.COM spa_t *spa; 32639425SEric.Schrock@Sun.COM char *altroot = NULL; 326410921STim.Haley@Sun.COM spa_load_state_t state = SPA_LOAD_IMPORT; 326510921STim.Haley@Sun.COM zpool_rewind_policy_t policy; 3266*13049SGeorge.Wilson@Sun.COM uint64_t mode = spa_mode_global; 3267*13049SGeorge.Wilson@Sun.COM uint64_t readonly = B_FALSE; 32689425SEric.Schrock@Sun.COM int error; 32699425SEric.Schrock@Sun.COM nvlist_t *nvroot; 32709425SEric.Schrock@Sun.COM nvlist_t **spares, **l2cache; 32719425SEric.Schrock@Sun.COM uint_t nspares, nl2cache; 32729425SEric.Schrock@Sun.COM 32739425SEric.Schrock@Sun.COM /* 32749425SEric.Schrock@Sun.COM * If a pool with this name exists, return failure. 32759425SEric.Schrock@Sun.COM */ 32769425SEric.Schrock@Sun.COM mutex_enter(&spa_namespace_lock); 327711422SMark.Musante@Sun.COM if (spa_lookup(pool) != NULL) { 32789425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 32799425SEric.Schrock@Sun.COM return (EEXIST); 32809425SEric.Schrock@Sun.COM } 32819425SEric.Schrock@Sun.COM 32829425SEric.Schrock@Sun.COM /* 32839425SEric.Schrock@Sun.COM * Create and initialize the spa structure. 32849425SEric.Schrock@Sun.COM */ 32859425SEric.Schrock@Sun.COM (void) nvlist_lookup_string(props, 32869425SEric.Schrock@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 3287*13049SGeorge.Wilson@Sun.COM (void) nvlist_lookup_uint64(props, 3288*13049SGeorge.Wilson@Sun.COM zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); 3289*13049SGeorge.Wilson@Sun.COM if (readonly) 3290*13049SGeorge.Wilson@Sun.COM mode = FREAD; 329110921STim.Haley@Sun.COM spa = spa_add(pool, config, altroot); 329212949SGeorge.Wilson@Sun.COM spa->spa_import_flags = flags; 329312949SGeorge.Wilson@Sun.COM 329412949SGeorge.Wilson@Sun.COM /* 329512949SGeorge.Wilson@Sun.COM * Verbatim import - Take a pool and insert it into the namespace 329612949SGeorge.Wilson@Sun.COM * as if it had been loaded at boot. 329712949SGeorge.Wilson@Sun.COM */ 329812949SGeorge.Wilson@Sun.COM if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { 329912949SGeorge.Wilson@Sun.COM if (props != NULL) 330012949SGeorge.Wilson@Sun.COM spa_configfile_set(spa, props, B_FALSE); 330112949SGeorge.Wilson@Sun.COM 330212949SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_FALSE, B_TRUE); 330312949SGeorge.Wilson@Sun.COM 330412949SGeorge.Wilson@Sun.COM mutex_exit(&spa_namespace_lock); 330512949SGeorge.Wilson@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 330612949SGeorge.Wilson@Sun.COM 330712949SGeorge.Wilson@Sun.COM return (0); 330812949SGeorge.Wilson@Sun.COM } 330912949SGeorge.Wilson@Sun.COM 3310*13049SGeorge.Wilson@Sun.COM spa_activate(spa, mode); 33119425SEric.Schrock@Sun.COM 33129425SEric.Schrock@Sun.COM /* 33139630SJeff.Bonwick@Sun.COM * Don't start async tasks until we know everything is healthy. 33149630SJeff.Bonwick@Sun.COM */ 33159630SJeff.Bonwick@Sun.COM spa_async_suspend(spa); 33169630SJeff.Bonwick@Sun.COM 331712949SGeorge.Wilson@Sun.COM zpool_get_rewind_policy(config, &policy); 331812949SGeorge.Wilson@Sun.COM if (policy.zrp_request & ZPOOL_DO_REWIND) 331912949SGeorge.Wilson@Sun.COM state = SPA_LOAD_RECOVER; 332012949SGeorge.Wilson@Sun.COM 33219630SJeff.Bonwick@Sun.COM /* 33229425SEric.Schrock@Sun.COM * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 33239425SEric.Schrock@Sun.COM * because the user-supplied config is actually the one to trust when 33249425SEric.Schrock@Sun.COM * doing an import. 33259425SEric.Schrock@Sun.COM */ 332610921STim.Haley@Sun.COM if (state != SPA_LOAD_RECOVER) 332710921STim.Haley@Sun.COM spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 332812949SGeorge.Wilson@Sun.COM 332910921STim.Haley@Sun.COM error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 333011727SVictor.Latushkin@Sun.COM policy.zrp_request); 333110921STim.Haley@Sun.COM 333210921STim.Haley@Sun.COM /* 333312949SGeorge.Wilson@Sun.COM * Propagate anything learned while loading the pool and pass it 333412949SGeorge.Wilson@Sun.COM * back to caller (i.e. rewind info, missing devices, etc). 333510921STim.Haley@Sun.COM */ 333612949SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 333712949SGeorge.Wilson@Sun.COM spa->spa_load_info) == 0); 33389425SEric.Schrock@Sun.COM 33399425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 33409425SEric.Schrock@Sun.COM /* 33419425SEric.Schrock@Sun.COM * Toss any existing sparelist, as it doesn't have any validity 33429425SEric.Schrock@Sun.COM * anymore, and conflicts with spa_has_spare(). 33439425SEric.Schrock@Sun.COM */ 33449425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) { 33459425SEric.Schrock@Sun.COM nvlist_free(spa->spa_spares.sav_config); 33469425SEric.Schrock@Sun.COM spa->spa_spares.sav_config = NULL; 33479425SEric.Schrock@Sun.COM spa_load_spares(spa); 33489425SEric.Schrock@Sun.COM } 33499425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) { 33509425SEric.Schrock@Sun.COM nvlist_free(spa->spa_l2cache.sav_config); 33519425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_config = NULL; 33529425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 33539425SEric.Schrock@Sun.COM } 33549425SEric.Schrock@Sun.COM 33559425SEric.Schrock@Sun.COM VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 33569425SEric.Schrock@Sun.COM &nvroot) == 0); 33579425SEric.Schrock@Sun.COM if (error == 0) 33589425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 33599425SEric.Schrock@Sun.COM VDEV_ALLOC_SPARE); 33609425SEric.Schrock@Sun.COM if (error == 0) 33619425SEric.Schrock@Sun.COM error = spa_validate_aux(spa, nvroot, -1ULL, 33629425SEric.Schrock@Sun.COM VDEV_ALLOC_L2CACHE); 33639425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 33649425SEric.Schrock@Sun.COM 33659425SEric.Schrock@Sun.COM if (props != NULL) 33669425SEric.Schrock@Sun.COM spa_configfile_set(spa, props, B_FALSE); 33679425SEric.Schrock@Sun.COM 33689425SEric.Schrock@Sun.COM if (error != 0 || (props && spa_writeable(spa) && 33699425SEric.Schrock@Sun.COM (error = spa_prop_set(spa, props)))) { 33709425SEric.Schrock@Sun.COM spa_unload(spa); 33719425SEric.Schrock@Sun.COM spa_deactivate(spa); 33729425SEric.Schrock@Sun.COM spa_remove(spa); 33739425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 33749425SEric.Schrock@Sun.COM return (error); 33759425SEric.Schrock@Sun.COM } 33769425SEric.Schrock@Sun.COM 337712600SLin.Ling@Sun.COM spa_async_resume(spa); 337812600SLin.Ling@Sun.COM 33799425SEric.Schrock@Sun.COM /* 33809425SEric.Schrock@Sun.COM * Override any spares and level 2 cache devices as specified by 33819425SEric.Schrock@Sun.COM * the user, as these may have correct device names/devids, etc. 33829425SEric.Schrock@Sun.COM */ 33839425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 33849425SEric.Schrock@Sun.COM &spares, &nspares) == 0) { 33859425SEric.Schrock@Sun.COM if (spa->spa_spares.sav_config) 33869425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_spares.sav_config, 33879425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 33889425SEric.Schrock@Sun.COM else 33899425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 33909425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 33919425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 33929425SEric.Schrock@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 33939425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 33949425SEric.Schrock@Sun.COM spa_load_spares(spa); 33959425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 33969425SEric.Schrock@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 33979425SEric.Schrock@Sun.COM } 33989425SEric.Schrock@Sun.COM if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 33999425SEric.Schrock@Sun.COM &l2cache, &nl2cache) == 0) { 34009425SEric.Schrock@Sun.COM if (spa->spa_l2cache.sav_config) 34019425SEric.Schrock@Sun.COM VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 34029425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 34039425SEric.Schrock@Sun.COM else 34049425SEric.Schrock@Sun.COM VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 34059425SEric.Schrock@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 34069425SEric.Schrock@Sun.COM VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 34079425SEric.Schrock@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 34089425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 34099425SEric.Schrock@Sun.COM spa_load_l2cache(spa); 34109425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 34119425SEric.Schrock@Sun.COM spa->spa_l2cache.sav_sync = B_TRUE; 34129425SEric.Schrock@Sun.COM } 34139425SEric.Schrock@Sun.COM 341410672SEric.Schrock@Sun.COM /* 341510672SEric.Schrock@Sun.COM * Check for any removed devices. 341610672SEric.Schrock@Sun.COM */ 341710672SEric.Schrock@Sun.COM if (spa->spa_autoreplace) { 341810672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_spares); 341910672SEric.Schrock@Sun.COM spa_aux_check_removed(&spa->spa_l2cache); 342010672SEric.Schrock@Sun.COM } 342110672SEric.Schrock@Sun.COM 34229425SEric.Schrock@Sun.COM if (spa_writeable(spa)) { 34239425SEric.Schrock@Sun.COM /* 34249425SEric.Schrock@Sun.COM * Update the config cache to include the newly-imported pool. 34259425SEric.Schrock@Sun.COM */ 342610100SLin.Ling@Sun.COM spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 34279425SEric.Schrock@Sun.COM } 34289425SEric.Schrock@Sun.COM 34299816SGeorge.Wilson@Sun.COM /* 34309816SGeorge.Wilson@Sun.COM * It's possible that the pool was expanded while it was exported. 34319816SGeorge.Wilson@Sun.COM * We kick off an async task to handle this for us. 34329816SGeorge.Wilson@Sun.COM */ 34339816SGeorge.Wilson@Sun.COM spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 34349816SGeorge.Wilson@Sun.COM 34359425SEric.Schrock@Sun.COM mutex_exit(&spa_namespace_lock); 34369946SMark.Musante@Sun.COM spa_history_log_version(spa, LOG_POOL_IMPORT); 34379425SEric.Schrock@Sun.COM 34389425SEric.Schrock@Sun.COM return (0); 34396643Seschrock } 34406643Seschrock 3441789Sahrens nvlist_t * 3442789Sahrens spa_tryimport(nvlist_t *tryconfig) 3443789Sahrens { 3444789Sahrens nvlist_t *config = NULL; 3445789Sahrens char *poolname; 3446789Sahrens spa_t *spa; 3447789Sahrens uint64_t state; 34488680SLin.Ling@Sun.COM int error; 3449789Sahrens 3450789Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 3451789Sahrens return (NULL); 3452789Sahrens 3453789Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 3454789Sahrens return (NULL); 3455789Sahrens 34561635Sbonwick /* 34571635Sbonwick * Create and initialize the spa structure. 34581635Sbonwick */ 3459789Sahrens mutex_enter(&spa_namespace_lock); 346010921STim.Haley@Sun.COM spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 34618241SJeff.Bonwick@Sun.COM spa_activate(spa, FREAD); 3462789Sahrens 3463789Sahrens /* 34641635Sbonwick * Pass off the heavy lifting to spa_load(). 34651732Sbonwick * Pass TRUE for mosconfig because the user-supplied config 34661732Sbonwick * is actually the one to trust when doing an import. 3467789Sahrens */ 346811422SMark.Musante@Sun.COM error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE); 3469789Sahrens 3470789Sahrens /* 3471789Sahrens * If 'tryconfig' was at least parsable, return the current config. 3472789Sahrens */ 3473789Sahrens if (spa->spa_root_vdev != NULL) { 3474789Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 3475789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 3476789Sahrens poolname) == 0); 3477789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 3478789Sahrens state) == 0); 34793975Sek110237 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 34803975Sek110237 spa->spa_uberblock.ub_timestamp) == 0); 34812082Seschrock 34822082Seschrock /* 34836423Sgw25295 * If the bootfs property exists on this pool then we 34846423Sgw25295 * copy it out so that external consumers can tell which 34856423Sgw25295 * pools are bootable. 34866423Sgw25295 */ 34878680SLin.Ling@Sun.COM if ((!error || error == EEXIST) && spa->spa_bootfs) { 34886423Sgw25295 char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 34896423Sgw25295 34906423Sgw25295 /* 34916423Sgw25295 * We have to play games with the name since the 34926423Sgw25295 * pool was opened as TRYIMPORT_NAME. 34936423Sgw25295 */ 34947754SJeff.Bonwick@Sun.COM if (dsl_dsobj_to_dsname(spa_name(spa), 34956423Sgw25295 spa->spa_bootfs, tmpname) == 0) { 34966423Sgw25295 char *cp; 34976423Sgw25295 char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 34986423Sgw25295 34996423Sgw25295 cp = strchr(tmpname, '/'); 35006423Sgw25295 if (cp == NULL) { 35016423Sgw25295 (void) strlcpy(dsname, tmpname, 35026423Sgw25295 MAXPATHLEN); 35036423Sgw25295 } else { 35046423Sgw25295 (void) snprintf(dsname, MAXPATHLEN, 35056423Sgw25295 "%s/%s", poolname, ++cp); 35066423Sgw25295 } 35076423Sgw25295 VERIFY(nvlist_add_string(config, 35086423Sgw25295 ZPOOL_CONFIG_BOOTFS, dsname) == 0); 35096423Sgw25295 kmem_free(dsname, MAXPATHLEN); 35106423Sgw25295 } 35116423Sgw25295 kmem_free(tmpname, MAXPATHLEN); 35126423Sgw25295 } 35136423Sgw25295 35146423Sgw25295 /* 35155450Sbrendan * Add the list of hot spares and level 2 cache devices. 35162082Seschrock */ 35179425SEric.Schrock@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 35182082Seschrock spa_add_spares(spa, config); 35195450Sbrendan spa_add_l2cache(spa, config); 35209425SEric.Schrock@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 3521789Sahrens } 3522789Sahrens 3523789Sahrens spa_unload(spa); 3524789Sahrens spa_deactivate(spa); 3525789Sahrens spa_remove(spa); 3526789Sahrens mutex_exit(&spa_namespace_lock); 3527789Sahrens 3528789Sahrens return (config); 3529789Sahrens } 3530789Sahrens 3531789Sahrens /* 3532789Sahrens * Pool export/destroy 3533789Sahrens * 3534789Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 3535789Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 3536789Sahrens * update the pool state and sync all the labels to disk, removing the 35378211SGeorge.Wilson@Sun.COM * configuration from the cache afterwards. If the 'hardforce' flag is set, then 35388211SGeorge.Wilson@Sun.COM * we don't sync the labels or remove the configuration cache. 3539789Sahrens */ 3540789Sahrens static int 35417214Slling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 35428211SGeorge.Wilson@Sun.COM boolean_t force, boolean_t hardforce) 3543789Sahrens { 3544789Sahrens spa_t *spa; 3545789Sahrens 35461775Sbillm if (oldconfig) 35471775Sbillm *oldconfig = NULL; 35481775Sbillm 35498241SJeff.Bonwick@Sun.COM if (!(spa_mode_global & FWRITE)) 3550789Sahrens return (EROFS); 3551789Sahrens 3552789Sahrens mutex_enter(&spa_namespace_lock); 3553789Sahrens if ((spa = spa_lookup(pool)) == NULL) { 3554789Sahrens mutex_exit(&spa_namespace_lock); 3555789Sahrens return (ENOENT); 3556789Sahrens } 3557789Sahrens 3558789Sahrens /* 35591544Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 35601544Seschrock * reacquire the namespace lock, and see if we can export. 35611544Seschrock */ 35621544Seschrock spa_open_ref(spa, FTAG); 35631544Seschrock mutex_exit(&spa_namespace_lock); 35641544Seschrock spa_async_suspend(spa); 35651544Seschrock mutex_enter(&spa_namespace_lock); 35661544Seschrock spa_close(spa, FTAG); 35671544Seschrock 35681544Seschrock /* 3569789Sahrens * The pool will be in core if it's openable, 3570789Sahrens * in which case we can modify its state. 3571789Sahrens */ 3572789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 3573789Sahrens /* 3574789Sahrens * Objsets may be open only because they're dirty, so we 3575789Sahrens * have to force it to sync before checking spa_refcnt. 3576789Sahrens */ 3577789Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 3578789Sahrens 35791544Seschrock /* 35801544Seschrock * A pool cannot be exported or destroyed if there are active 35811544Seschrock * references. If we are resetting a pool, allow references by 35821544Seschrock * fault injection handlers. 35831544Seschrock */ 35841544Seschrock if (!spa_refcount_zero(spa) || 35851544Seschrock (spa->spa_inject_ref != 0 && 35861544Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 35871544Seschrock spa_async_resume(spa); 3588789Sahrens mutex_exit(&spa_namespace_lock); 3589789Sahrens return (EBUSY); 3590789Sahrens } 3591789Sahrens 3592789Sahrens /* 35937214Slling * A pool cannot be exported if it has an active shared spare. 35947214Slling * This is to prevent other pools stealing the active spare 35957214Slling * from an exported pool. At user's own will, such pool can 35967214Slling * be forcedly exported. 35977214Slling */ 35987214Slling if (!force && new_state == POOL_STATE_EXPORTED && 35997214Slling spa_has_active_shared_spare(spa)) { 36007214Slling spa_async_resume(spa); 36017214Slling mutex_exit(&spa_namespace_lock); 36027214Slling return (EXDEV); 36037214Slling } 36047214Slling 36057214Slling /* 3606789Sahrens * We want this to be reflected on every label, 3607789Sahrens * so mark them all dirty. spa_unload() will do the 3608789Sahrens * final sync that pushes these changes out. 3609789Sahrens */ 36108211SGeorge.Wilson@Sun.COM if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 36117754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 36121544Seschrock spa->spa_state = new_state; 361312296SLin.Ling@Sun.COM spa->spa_final_txg = spa_last_synced_txg(spa) + 361412296SLin.Ling@Sun.COM TXG_DEFER_SIZE + 1; 36151544Seschrock vdev_config_dirty(spa->spa_root_vdev); 36167754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 36171544Seschrock } 3618789Sahrens } 3619789Sahrens 36204451Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 36214451Seschrock 3622789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 3623789Sahrens spa_unload(spa); 3624789Sahrens spa_deactivate(spa); 3625789Sahrens } 3626789Sahrens 36271775Sbillm if (oldconfig && spa->spa_config) 36281775Sbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 36291775Sbillm 36301544Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 36318211SGeorge.Wilson@Sun.COM if (!hardforce) 36328211SGeorge.Wilson@Sun.COM spa_config_sync(spa, B_TRUE, B_TRUE); 36331544Seschrock spa_remove(spa); 36341544Seschrock } 3635789Sahrens mutex_exit(&spa_namespace_lock); 3636789Sahrens 3637789Sahrens return (0); 3638789Sahrens } 3639789Sahrens 3640789Sahrens /* 3641789Sahrens * Destroy a storage pool. 3642789Sahrens */ 3643789Sahrens int 3644789Sahrens spa_destroy(char *pool) 3645789Sahrens { 36468211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 36478211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 3648789Sahrens } 3649789Sahrens 3650789Sahrens /* 3651789Sahrens * Export a storage pool. 3652789Sahrens */ 3653789Sahrens int 36548211SGeorge.Wilson@Sun.COM spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 36558211SGeorge.Wilson@Sun.COM boolean_t hardforce) 3656789Sahrens { 36578211SGeorge.Wilson@Sun.COM return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 36588211SGeorge.Wilson@Sun.COM force, hardforce)); 3659789Sahrens } 3660789Sahrens 3661789Sahrens /* 36621544Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 36631544Seschrock * from the namespace in any way. 36641544Seschrock */ 36651544Seschrock int 36661544Seschrock spa_reset(char *pool) 36671544Seschrock { 36687214Slling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 36698211SGeorge.Wilson@Sun.COM B_FALSE, B_FALSE)); 36701544Seschrock } 36711544Seschrock 36721544Seschrock /* 3673789Sahrens * ========================================================================== 3674789Sahrens * Device manipulation 3675789Sahrens * ========================================================================== 3676789Sahrens */ 3677789Sahrens 3678789Sahrens /* 36794527Sperrin * Add a device to a storage pool. 3680789Sahrens */ 3681789Sahrens int 3682789Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 3683789Sahrens { 368410594SGeorge.Wilson@Sun.COM uint64_t txg, id; 36858241SJeff.Bonwick@Sun.COM int error; 3686789Sahrens vdev_t *rvd = spa->spa_root_vdev; 36871585Sbonwick vdev_t *vd, *tvd; 36885450Sbrendan nvlist_t **spares, **l2cache; 36895450Sbrendan uint_t nspares, nl2cache; 3690789Sahrens 3691*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 3692*13049SGeorge.Wilson@Sun.COM 3693789Sahrens txg = spa_vdev_enter(spa); 3694789Sahrens 36952082Seschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 36962082Seschrock VDEV_ALLOC_ADD)) != 0) 36972082Seschrock return (spa_vdev_exit(spa, NULL, txg, error)); 36982082Seschrock 36997754SJeff.Bonwick@Sun.COM spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 3700789Sahrens 37015450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 37025450Sbrendan &nspares) != 0) 37032082Seschrock nspares = 0; 37042082Seschrock 37055450Sbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 37065450Sbrendan &nl2cache) != 0) 37075450Sbrendan nl2cache = 0; 37085450Sbrendan 37097754SJeff.Bonwick@Sun.COM if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 37102082Seschrock return (spa_vdev_exit(spa, vd, txg, EINVAL)); 37117754SJeff.Bonwick@Sun.COM 37127754SJeff.Bonwick@Sun.COM if (vd->vdev_children != 0 && 37137754SJeff.Bonwick@Sun.COM (error = vdev_create(vd, txg, B_FALSE)) != 0) 37147754SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, vd, txg, error)); 37152082Seschrock 37163377Seschrock /* 37175450Sbrendan * We must validate the spares and l2cache devices after checking the 37185450Sbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 37193377Seschrock */ 37207754SJeff.Bonwick@Sun.COM if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 37213377Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 37223377Seschrock 37233377Seschrock /* 37243377Seschrock * Transfer each new top-level vdev from vd to rvd. 37253377Seschrock */ 37268241SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 372710594SGeorge.Wilson@Sun.COM 372810594SGeorge.Wilson@Sun.COM /* 372910594SGeorge.Wilson@Sun.COM * Set the vdev id to the first hole, if one exists. 373010594SGeorge.Wilson@Sun.COM */ 373110594SGeorge.Wilson@Sun.COM for (id = 0; id < rvd->vdev_children; id++) { 373210594SGeorge.Wilson@Sun.COM if (rvd->vdev_child[id]->vdev_ishole) { 373310594SGeorge.Wilson@Sun.COM vdev_free(rvd->vdev_child[id]); 373410594SGeorge.Wilson@Sun.COM break; 373510594SGeorge.Wilson@Sun.COM } 373610594SGeorge.Wilson@Sun.COM } 37373377Seschrock tvd = vd->vdev_child[c]; 37383377Seschrock vdev_remove_child(vd, tvd); 373910594SGeorge.Wilson@Sun.COM tvd->vdev_id = id; 37403377Seschrock vdev_add_child(rvd, tvd); 37413377Seschrock vdev_config_dirty(tvd); 37423377Seschrock } 37433377Seschrock 37442082Seschrock if (nspares != 0) { 37455450Sbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 37465450Sbrendan ZPOOL_CONFIG_SPARES); 37472082Seschrock spa_load_spares(spa); 37485450Sbrendan spa->spa_spares.sav_sync = B_TRUE; 37495450Sbrendan } 37505450Sbrendan 37515450Sbrendan if (nl2cache != 0) { 37525450Sbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 37535450Sbrendan ZPOOL_CONFIG_L2CACHE); 37545450Sbrendan spa_load_l2cache(spa); 37555450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3756789Sahrens } 3757789Sahrens 3758789Sahrens /* 37591585Sbonwick * We have to be careful when adding new vdevs to an existing pool. 37601585Sbonwick * If other threads start allocating from these vdevs before we 37611585Sbonwick * sync the config cache, and we lose power, then upon reboot we may 37621585Sbonwick * fail to open the pool because there are DVAs that the config cache 37631585Sbonwick * can't translate. Therefore, we first add the vdevs without 37641585Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 37651635Sbonwick * and then let spa_config_update() initialize the new metaslabs. 37661585Sbonwick * 37671585Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 37681585Sbonwick * if we lose power at any point in this sequence, the remaining 37691585Sbonwick * steps will be completed the next time we load the pool. 3770789Sahrens */ 37711635Sbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 37721585Sbonwick 37731635Sbonwick mutex_enter(&spa_namespace_lock); 37741635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 37751635Sbonwick mutex_exit(&spa_namespace_lock); 3776789Sahrens 37771635Sbonwick return (0); 3778789Sahrens } 3779789Sahrens 3780789Sahrens /* 3781789Sahrens * Attach a device to a mirror. The arguments are the path to any device 3782789Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3783789Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3784789Sahrens * 3785789Sahrens * If 'replacing' is specified, the new device is intended to replace the 3786789Sahrens * existing device; in this case the two devices are made into their own 37874451Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3788789Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3789789Sahrens * extra rules: you can't attach to it after it's been created, and upon 3790789Sahrens * completion of resilvering, the first disk (the one being replaced) 3791789Sahrens * is automatically detached. 3792789Sahrens */ 3793789Sahrens int 37941544Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3795789Sahrens { 379612296SLin.Ling@Sun.COM uint64_t txg, dtl_max_txg; 3797789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3798789Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 37992082Seschrock vdev_ops_t *pvops; 38007313SEric.Kustarz@Sun.COM char *oldvdpath, *newvdpath; 38017313SEric.Kustarz@Sun.COM int newvd_isspare; 38027313SEric.Kustarz@Sun.COM int error; 3803789Sahrens 3804*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 3805*13049SGeorge.Wilson@Sun.COM 3806789Sahrens txg = spa_vdev_enter(spa); 3807789Sahrens 38086643Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3809789Sahrens 3810789Sahrens if (oldvd == NULL) 3811789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3812789Sahrens 38131585Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 38141585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 38151585Sbonwick 3816789Sahrens pvd = oldvd->vdev_parent; 3817789Sahrens 38182082Seschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 38194451Seschrock VDEV_ALLOC_ADD)) != 0) 38204451Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 38214451Seschrock 38224451Seschrock if (newrootvd->vdev_children != 1) 3823789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3824789Sahrens 3825789Sahrens newvd = newrootvd->vdev_child[0]; 3826789Sahrens 3827789Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3828789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3829789Sahrens 38302082Seschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3831789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3832789Sahrens 38334527Sperrin /* 38344527Sperrin * Spares can't replace logs 38354527Sperrin */ 38367326SEric.Schrock@Sun.COM if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 38374527Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 38384527Sperrin 38392082Seschrock if (!replacing) { 38402082Seschrock /* 38412082Seschrock * For attach, the only allowable parent is a mirror or the root 38422082Seschrock * vdev. 38432082Seschrock */ 38442082Seschrock if (pvd->vdev_ops != &vdev_mirror_ops && 38452082Seschrock pvd->vdev_ops != &vdev_root_ops) 38462082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 38472082Seschrock 38482082Seschrock pvops = &vdev_mirror_ops; 38492082Seschrock } else { 38502082Seschrock /* 38512082Seschrock * Active hot spares can only be replaced by inactive hot 38522082Seschrock * spares. 38532082Seschrock */ 38542082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 385513037SMark.Musante@Sun.COM oldvd->vdev_isspare && 38562082Seschrock !spa_has_spare(spa, newvd->vdev_guid)) 38572082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 38582082Seschrock 38592082Seschrock /* 38602082Seschrock * If the source is a hot spare, and the parent isn't already a 38612082Seschrock * spare, then we want to create a new hot spare. Otherwise, we 38623377Seschrock * want to create a replacing vdev. The user is not allowed to 38633377Seschrock * attach to a spared vdev child unless the 'isspare' state is 38643377Seschrock * the same (spare replaces spare, non-spare replaces 38653377Seschrock * non-spare). 38662082Seschrock */ 386713037SMark.Musante@Sun.COM if (pvd->vdev_ops == &vdev_replacing_ops && 386813037SMark.Musante@Sun.COM spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { 38692082Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 387013037SMark.Musante@Sun.COM } else if (pvd->vdev_ops == &vdev_spare_ops && 387113037SMark.Musante@Sun.COM newvd->vdev_isspare != oldvd->vdev_isspare) { 38723377Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 387313037SMark.Musante@Sun.COM } 387413037SMark.Musante@Sun.COM 387513037SMark.Musante@Sun.COM if (newvd->vdev_isspare) 38762082Seschrock pvops = &vdev_spare_ops; 38772082Seschrock else 38782082Seschrock pvops = &vdev_replacing_ops; 38792082Seschrock } 38802082Seschrock 38811175Slling /* 38829816SGeorge.Wilson@Sun.COM * Make sure the new device is big enough. 38831175Slling */ 38849816SGeorge.Wilson@Sun.COM if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3885789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3886789Sahrens 38871732Sbonwick /* 38881732Sbonwick * The new device cannot have a higher alignment requirement 38891732Sbonwick * than the top-level vdev. 38901732Sbonwick */ 38911732Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3892789Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3893789Sahrens 3894789Sahrens /* 3895789Sahrens * If this is an in-place replacement, update oldvd's path and devid 3896789Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3897789Sahrens */ 3898789Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3899789Sahrens spa_strfree(oldvd->vdev_path); 3900789Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3901789Sahrens KM_SLEEP); 3902789Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3903789Sahrens newvd->vdev_path, "old"); 3904789Sahrens if (oldvd->vdev_devid != NULL) { 3905789Sahrens spa_strfree(oldvd->vdev_devid); 3906789Sahrens oldvd->vdev_devid = NULL; 3907789Sahrens } 3908789Sahrens } 3909789Sahrens 391013037SMark.Musante@Sun.COM /* mark the device being resilvered */ 391113037SMark.Musante@Sun.COM newvd->vdev_resilvering = B_TRUE; 391213037SMark.Musante@Sun.COM 3913789Sahrens /* 39142082Seschrock * If the parent is not a mirror, or if we're replacing, insert the new 39152082Seschrock * mirror/replacing/spare vdev above oldvd. 3916789Sahrens */ 3917789Sahrens if (pvd->vdev_ops != pvops) 3918789Sahrens pvd = vdev_add_parent(oldvd, pvops); 3919789Sahrens 3920789Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3921789Sahrens ASSERT(pvd->vdev_ops == pvops); 3922789Sahrens ASSERT(oldvd->vdev_parent == pvd); 3923789Sahrens 3924789Sahrens /* 3925789Sahrens * Extract the new device from its root and add it to pvd. 3926789Sahrens */ 3927789Sahrens vdev_remove_child(newrootvd, newvd); 3928789Sahrens newvd->vdev_id = pvd->vdev_children; 392910594SGeorge.Wilson@Sun.COM newvd->vdev_crtxg = oldvd->vdev_crtxg; 3930789Sahrens vdev_add_child(pvd, newvd); 3931789Sahrens 3932789Sahrens tvd = newvd->vdev_top; 3933789Sahrens ASSERT(pvd->vdev_top == tvd); 3934789Sahrens ASSERT(tvd->vdev_parent == rvd); 3935789Sahrens 3936789Sahrens vdev_config_dirty(tvd); 3937789Sahrens 3938789Sahrens /* 393912296SLin.Ling@Sun.COM * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account 394012296SLin.Ling@Sun.COM * for any dmu_sync-ed blocks. It will propagate upward when 394112296SLin.Ling@Sun.COM * spa_vdev_exit() calls vdev_dtl_reassess(). 3942789Sahrens */ 394312296SLin.Ling@Sun.COM dtl_max_txg = txg + TXG_CONCURRENT_STATES; 394412296SLin.Ling@Sun.COM 394512296SLin.Ling@Sun.COM vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, 394612296SLin.Ling@Sun.COM dtl_max_txg - TXG_INITIAL); 3947789Sahrens 39489425SEric.Schrock@Sun.COM if (newvd->vdev_isspare) { 39493377Seschrock spa_spare_activate(newvd); 39509425SEric.Schrock@Sun.COM spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 39519425SEric.Schrock@Sun.COM } 39529425SEric.Schrock@Sun.COM 39537754SJeff.Bonwick@Sun.COM oldvdpath = spa_strdup(oldvd->vdev_path); 39547754SJeff.Bonwick@Sun.COM newvdpath = spa_strdup(newvd->vdev_path); 39557313SEric.Kustarz@Sun.COM newvd_isspare = newvd->vdev_isspare; 39561544Seschrock 3957789Sahrens /* 3958789Sahrens * Mark newvd's DTL dirty in this txg. 3959789Sahrens */ 39601732Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3961789Sahrens 396212296SLin.Ling@Sun.COM /* 396312296SLin.Ling@Sun.COM * Restart the resilver 396412296SLin.Ling@Sun.COM */ 396512296SLin.Ling@Sun.COM dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); 396612296SLin.Ling@Sun.COM 396712296SLin.Ling@Sun.COM /* 396812296SLin.Ling@Sun.COM * Commit the config 396912296SLin.Ling@Sun.COM */ 397012296SLin.Ling@Sun.COM (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); 397112296SLin.Ling@Sun.COM 397212296SLin.Ling@Sun.COM spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, 397312296SLin.Ling@Sun.COM "%s vdev=%s %s vdev=%s", 39749946SMark.Musante@Sun.COM replacing && newvd_isspare ? "spare in" : 39759946SMark.Musante@Sun.COM replacing ? "replace" : "attach", newvdpath, 39769946SMark.Musante@Sun.COM replacing ? "for" : "to", oldvdpath); 39777313SEric.Kustarz@Sun.COM 39787313SEric.Kustarz@Sun.COM spa_strfree(oldvdpath); 39797313SEric.Kustarz@Sun.COM spa_strfree(newvdpath); 39807313SEric.Kustarz@Sun.COM 398112865Slori.alt@oracle.com if (spa->spa_bootfs) 398212865Slori.alt@oracle.com spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH); 398312865Slori.alt@oracle.com 3984789Sahrens return (0); 3985789Sahrens } 3986789Sahrens 3987789Sahrens /* 3988789Sahrens * Detach a device from a mirror or replacing vdev. 3989789Sahrens * If 'replace_done' is specified, only detach if the parent 3990789Sahrens * is a replacing vdev. 3991789Sahrens */ 3992789Sahrens int 39938241SJeff.Bonwick@Sun.COM spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3994789Sahrens { 3995789Sahrens uint64_t txg; 39968241SJeff.Bonwick@Sun.COM int error; 3997789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3998789Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 39992082Seschrock boolean_t unspare = B_FALSE; 40002082Seschrock uint64_t unspare_guid; 400111422SMark.Musante@Sun.COM char *vdpath; 4002789Sahrens 4003*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 4004*13049SGeorge.Wilson@Sun.COM 4005789Sahrens txg = spa_vdev_enter(spa); 4006789Sahrens 40076643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 4008789Sahrens 4009789Sahrens if (vd == NULL) 4010789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 4011789Sahrens 40121585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 40131585Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 40141585Sbonwick 4015789Sahrens pvd = vd->vdev_parent; 4016789Sahrens 4017789Sahrens /* 40188241SJeff.Bonwick@Sun.COM * If the parent/child relationship is not as expected, don't do it. 40198241SJeff.Bonwick@Sun.COM * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 40208241SJeff.Bonwick@Sun.COM * vdev that's replacing B with C. The user's intent in replacing 40218241SJeff.Bonwick@Sun.COM * is to go from M(A,B) to M(A,C). If the user decides to cancel 40228241SJeff.Bonwick@Sun.COM * the replace by detaching C, the expected behavior is to end up 40238241SJeff.Bonwick@Sun.COM * M(A,B). But suppose that right after deciding to detach C, 40248241SJeff.Bonwick@Sun.COM * the replacement of B completes. We would have M(A,C), and then 40258241SJeff.Bonwick@Sun.COM * ask to detach C, which would leave us with just A -- not what 40268241SJeff.Bonwick@Sun.COM * the user wanted. To prevent this, we make sure that the 40278241SJeff.Bonwick@Sun.COM * parent/child relationship hasn't changed -- in this example, 40288241SJeff.Bonwick@Sun.COM * that C's parent is still the replacing vdev R. 40298241SJeff.Bonwick@Sun.COM */ 40308241SJeff.Bonwick@Sun.COM if (pvd->vdev_guid != pguid && pguid != 0) 40318241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 40328241SJeff.Bonwick@Sun.COM 40338241SJeff.Bonwick@Sun.COM /* 403413037SMark.Musante@Sun.COM * Only 'replacing' or 'spare' vdevs can be replaced. 4035789Sahrens */ 403613037SMark.Musante@Sun.COM if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && 403713037SMark.Musante@Sun.COM pvd->vdev_ops != &vdev_spare_ops) 403813037SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 40392082Seschrock 40402082Seschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 40414577Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 4042789Sahrens 4043789Sahrens /* 40442082Seschrock * Only mirror, replacing, and spare vdevs support detach. 4045789Sahrens */ 4046789Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 40472082Seschrock pvd->vdev_ops != &vdev_mirror_ops && 40482082Seschrock pvd->vdev_ops != &vdev_spare_ops) 4049789Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 4050789Sahrens 4051789Sahrens /* 40528241SJeff.Bonwick@Sun.COM * If this device has the only valid copy of some data, 40538241SJeff.Bonwick@Sun.COM * we cannot safely detach it. 4054789Sahrens */ 40558241SJeff.Bonwick@Sun.COM if (vdev_dtl_required(vd)) 4056789Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 4057789Sahrens 40588241SJeff.Bonwick@Sun.COM ASSERT(pvd->vdev_children >= 2); 40598241SJeff.Bonwick@Sun.COM 4060789Sahrens /* 40616673Seschrock * If we are detaching the second disk from a replacing vdev, then 40626673Seschrock * check to see if we changed the original vdev's path to have "/old" 40636673Seschrock * at the end in spa_vdev_attach(). If so, undo that change now. 40646673Seschrock */ 406513037SMark.Musante@Sun.COM if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && 406613037SMark.Musante@Sun.COM vd->vdev_path != NULL) { 406713037SMark.Musante@Sun.COM size_t len = strlen(vd->vdev_path); 406813037SMark.Musante@Sun.COM 406913037SMark.Musante@Sun.COM for (int c = 0; c < pvd->vdev_children; c++) { 407013037SMark.Musante@Sun.COM cvd = pvd->vdev_child[c]; 407113037SMark.Musante@Sun.COM 407213037SMark.Musante@Sun.COM if (cvd == vd || cvd->vdev_path == NULL) 407313037SMark.Musante@Sun.COM continue; 407413037SMark.Musante@Sun.COM 407513037SMark.Musante@Sun.COM if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 407613037SMark.Musante@Sun.COM strcmp(cvd->vdev_path + len, "/old") == 0) { 407713037SMark.Musante@Sun.COM spa_strfree(cvd->vdev_path); 407813037SMark.Musante@Sun.COM cvd->vdev_path = spa_strdup(vd->vdev_path); 407913037SMark.Musante@Sun.COM break; 408013037SMark.Musante@Sun.COM } 40816673Seschrock } 40826673Seschrock } 40836673Seschrock 40846673Seschrock /* 40852082Seschrock * If we are detaching the original disk from a spare, then it implies 40862082Seschrock * that the spare should become a real disk, and be removed from the 40872082Seschrock * active spare list for the pool. 40882082Seschrock */ 40892082Seschrock if (pvd->vdev_ops == &vdev_spare_ops && 409013037SMark.Musante@Sun.COM vd->vdev_id == 0 && 409113037SMark.Musante@Sun.COM pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) 40922082Seschrock unspare = B_TRUE; 40932082Seschrock 40942082Seschrock /* 4095789Sahrens * Erase the disk labels so the disk can be used for other things. 4096789Sahrens * This must be done after all other error cases are handled, 4097789Sahrens * but before we disembowel vd (so we can still do I/O to it). 4098789Sahrens * But if we can't do it, don't treat the error as fatal -- 4099789Sahrens * it may be that the unwritability of the disk is the reason 4100789Sahrens * it's being detached! 4101789Sahrens */ 41023377Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 4103789Sahrens 4104789Sahrens /* 4105789Sahrens * Remove vd from its parent and compact the parent's children. 4106789Sahrens */ 4107789Sahrens vdev_remove_child(pvd, vd); 4108789Sahrens vdev_compact_children(pvd); 4109789Sahrens 4110789Sahrens /* 4111789Sahrens * Remember one of the remaining children so we can get tvd below. 4112789Sahrens */ 411313037SMark.Musante@Sun.COM cvd = pvd->vdev_child[pvd->vdev_children - 1]; 4114789Sahrens 4115789Sahrens /* 41162082Seschrock * If we need to remove the remaining child from the list of hot spares, 41178241SJeff.Bonwick@Sun.COM * do it now, marking the vdev as no longer a spare in the process. 41188241SJeff.Bonwick@Sun.COM * We must do this before vdev_remove_parent(), because that can 41198241SJeff.Bonwick@Sun.COM * change the GUID if it creates a new toplevel GUID. For a similar 41208241SJeff.Bonwick@Sun.COM * reason, we must remove the spare now, in the same txg as the detach; 41218241SJeff.Bonwick@Sun.COM * otherwise someone could attach a new sibling, change the GUID, and 41228241SJeff.Bonwick@Sun.COM * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 41232082Seschrock */ 41242082Seschrock if (unspare) { 41252082Seschrock ASSERT(cvd->vdev_isspare); 41263377Seschrock spa_spare_remove(cvd); 41272082Seschrock unspare_guid = cvd->vdev_guid; 41288241SJeff.Bonwick@Sun.COM (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 412913037SMark.Musante@Sun.COM cvd->vdev_unspare = B_TRUE; 41302082Seschrock } 41312082Seschrock 41322082Seschrock /* 4133789Sahrens * If the parent mirror/replacing vdev only has one child, 4134789Sahrens * the parent is no longer needed. Remove it from the tree. 4135789Sahrens */ 413613037SMark.Musante@Sun.COM if (pvd->vdev_children == 1) { 413713037SMark.Musante@Sun.COM if (pvd->vdev_ops == &vdev_spare_ops) 413813037SMark.Musante@Sun.COM cvd->vdev_unspare = B_FALSE; 4139789Sahrens vdev_remove_parent(cvd); 414013037SMark.Musante@Sun.COM cvd->vdev_resilvering = B_FALSE; 414113037SMark.Musante@Sun.COM } 414213037SMark.Musante@Sun.COM 4143789Sahrens 4144789Sahrens /* 4145789Sahrens * We don't set tvd until now because the parent we just removed 4146789Sahrens * may have been the previous top-level vdev. 4147789Sahrens */ 4148789Sahrens tvd = cvd->vdev_top; 4149789Sahrens ASSERT(tvd->vdev_parent == rvd); 4150789Sahrens 4151789Sahrens /* 41523377Seschrock * Reevaluate the parent vdev state. 4153789Sahrens */ 41544451Seschrock vdev_propagate_state(cvd); 4155789Sahrens 4156789Sahrens /* 41579816SGeorge.Wilson@Sun.COM * If the 'autoexpand' property is set on the pool then automatically 41589816SGeorge.Wilson@Sun.COM * try to expand the size of the pool. For example if the device we 41599816SGeorge.Wilson@Sun.COM * just detached was smaller than the others, it may be possible to 41609816SGeorge.Wilson@Sun.COM * add metaslabs (i.e. grow the pool). We need to reopen the vdev 41619816SGeorge.Wilson@Sun.COM * first so that we can obtain the updated sizes of the leaf vdevs. 4162789Sahrens */ 41639816SGeorge.Wilson@Sun.COM if (spa->spa_autoexpand) { 41649816SGeorge.Wilson@Sun.COM vdev_reopen(tvd); 41659816SGeorge.Wilson@Sun.COM vdev_expand(tvd, txg); 41669816SGeorge.Wilson@Sun.COM } 4167789Sahrens 4168789Sahrens vdev_config_dirty(tvd); 4169789Sahrens 4170789Sahrens /* 41713377Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 41723377Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 41733377Seschrock * But first make sure we're not on any *other* txg's DTL list, to 41743377Seschrock * prevent vd from being accessed after it's freed. 4175789Sahrens */ 417611422SMark.Musante@Sun.COM vdpath = spa_strdup(vd->vdev_path); 41778241SJeff.Bonwick@Sun.COM for (int t = 0; t < TXG_SIZE; t++) 4178789Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 41791732Sbonwick vd->vdev_detached = B_TRUE; 41801732Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 4181789Sahrens 41824451Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 41834451Seschrock 418413037SMark.Musante@Sun.COM /* hang on to the spa before we release the lock */ 418513037SMark.Musante@Sun.COM spa_open_ref(spa, FTAG); 418613037SMark.Musante@Sun.COM 41872082Seschrock error = spa_vdev_exit(spa, vd, txg, 0); 41882082Seschrock 418912296SLin.Ling@Sun.COM spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, 419011422SMark.Musante@Sun.COM "vdev=%s", vdpath); 419111422SMark.Musante@Sun.COM spa_strfree(vdpath); 419211422SMark.Musante@Sun.COM 41932082Seschrock /* 41943377Seschrock * If this was the removal of the original device in a hot spare vdev, 41953377Seschrock * then we want to go through and remove the device from the hot spare 41963377Seschrock * list of every other pool. 41972082Seschrock */ 41982082Seschrock if (unspare) { 419913037SMark.Musante@Sun.COM spa_t *altspa = NULL; 420013037SMark.Musante@Sun.COM 42012082Seschrock mutex_enter(&spa_namespace_lock); 420213037SMark.Musante@Sun.COM while ((altspa = spa_next(altspa)) != NULL) { 420313037SMark.Musante@Sun.COM if (altspa->spa_state != POOL_STATE_ACTIVE || 420413037SMark.Musante@Sun.COM altspa == spa) 42052082Seschrock continue; 420613037SMark.Musante@Sun.COM 420713037SMark.Musante@Sun.COM spa_open_ref(altspa, FTAG); 42087793SJeff.Bonwick@Sun.COM mutex_exit(&spa_namespace_lock); 420913037SMark.Musante@Sun.COM (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); 42107793SJeff.Bonwick@Sun.COM mutex_enter(&spa_namespace_lock); 421113037SMark.Musante@Sun.COM spa_close(altspa, FTAG); 42122082Seschrock } 42132082Seschrock mutex_exit(&spa_namespace_lock); 421413037SMark.Musante@Sun.COM 421513037SMark.Musante@Sun.COM /* search the rest of the vdevs for spares to remove */ 421613037SMark.Musante@Sun.COM spa_vdev_resilver_done(spa); 42172082Seschrock } 42182082Seschrock 421913037SMark.Musante@Sun.COM /* all done with the spa; OK to release */ 422013037SMark.Musante@Sun.COM mutex_enter(&spa_namespace_lock); 422113037SMark.Musante@Sun.COM spa_close(spa, FTAG); 422213037SMark.Musante@Sun.COM mutex_exit(&spa_namespace_lock); 422313037SMark.Musante@Sun.COM 42242082Seschrock return (error); 42252082Seschrock } 42262082Seschrock 422711422SMark.Musante@Sun.COM /* 422811422SMark.Musante@Sun.COM * Split a set of devices from their mirrors, and create a new pool from them. 422911422SMark.Musante@Sun.COM */ 423011422SMark.Musante@Sun.COM int 423111422SMark.Musante@Sun.COM spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, 423211422SMark.Musante@Sun.COM nvlist_t *props, boolean_t exp) 423311422SMark.Musante@Sun.COM { 423411422SMark.Musante@Sun.COM int error = 0; 423511422SMark.Musante@Sun.COM uint64_t txg, *glist; 423611422SMark.Musante@Sun.COM spa_t *newspa; 423711422SMark.Musante@Sun.COM uint_t c, children, lastlog; 423811422SMark.Musante@Sun.COM nvlist_t **child, *nvl, *tmp; 423911422SMark.Musante@Sun.COM dmu_tx_t *tx; 424011422SMark.Musante@Sun.COM char *altroot = NULL; 424111422SMark.Musante@Sun.COM vdev_t *rvd, **vml = NULL; /* vdev modify list */ 424211422SMark.Musante@Sun.COM boolean_t activate_slog; 424311422SMark.Musante@Sun.COM 4244*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 424511422SMark.Musante@Sun.COM 424611422SMark.Musante@Sun.COM txg = spa_vdev_enter(spa); 424711422SMark.Musante@Sun.COM 424811422SMark.Musante@Sun.COM /* clear the log and flush everything up to now */ 424911422SMark.Musante@Sun.COM activate_slog = spa_passivate_log(spa); 425011422SMark.Musante@Sun.COM (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 425111422SMark.Musante@Sun.COM error = spa_offline_log(spa); 425211422SMark.Musante@Sun.COM txg = spa_vdev_config_enter(spa); 425311422SMark.Musante@Sun.COM 425411422SMark.Musante@Sun.COM if (activate_slog) 425511422SMark.Musante@Sun.COM spa_activate_log(spa); 425611422SMark.Musante@Sun.COM 425711422SMark.Musante@Sun.COM if (error != 0) 425811422SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 425911422SMark.Musante@Sun.COM 426011422SMark.Musante@Sun.COM /* check new spa name before going any further */ 426111422SMark.Musante@Sun.COM if (spa_lookup(newname) != NULL) 426211422SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EEXIST)); 426311422SMark.Musante@Sun.COM 426411422SMark.Musante@Sun.COM /* 426511422SMark.Musante@Sun.COM * scan through all the children to ensure they're all mirrors 426611422SMark.Musante@Sun.COM */ 426711422SMark.Musante@Sun.COM if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || 426811422SMark.Musante@Sun.COM nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, 426911422SMark.Musante@Sun.COM &children) != 0) 427011422SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 427111422SMark.Musante@Sun.COM 427211422SMark.Musante@Sun.COM /* first, check to ensure we've got the right child count */ 427311422SMark.Musante@Sun.COM rvd = spa->spa_root_vdev; 427411422SMark.Musante@Sun.COM lastlog = 0; 427511422SMark.Musante@Sun.COM for (c = 0; c < rvd->vdev_children; c++) { 427611422SMark.Musante@Sun.COM vdev_t *vd = rvd->vdev_child[c]; 427711422SMark.Musante@Sun.COM 427811422SMark.Musante@Sun.COM /* don't count the holes & logs as children */ 427911422SMark.Musante@Sun.COM if (vd->vdev_islog || vd->vdev_ishole) { 428011422SMark.Musante@Sun.COM if (lastlog == 0) 428111422SMark.Musante@Sun.COM lastlog = c; 428211422SMark.Musante@Sun.COM continue; 428311422SMark.Musante@Sun.COM } 428411422SMark.Musante@Sun.COM 428511422SMark.Musante@Sun.COM lastlog = 0; 428611422SMark.Musante@Sun.COM } 428711422SMark.Musante@Sun.COM if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) 428811422SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 428911422SMark.Musante@Sun.COM 429011422SMark.Musante@Sun.COM /* next, ensure no spare or cache devices are part of the split */ 429111422SMark.Musante@Sun.COM if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || 429211422SMark.Musante@Sun.COM nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) 429311422SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 429411422SMark.Musante@Sun.COM 429511422SMark.Musante@Sun.COM vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); 429611422SMark.Musante@Sun.COM glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); 429711422SMark.Musante@Sun.COM 429811422SMark.Musante@Sun.COM /* then, loop over each vdev and validate it */ 429911422SMark.Musante@Sun.COM for (c = 0; c < children; c++) { 430011422SMark.Musante@Sun.COM uint64_t is_hole = 0; 430111422SMark.Musante@Sun.COM 430211422SMark.Musante@Sun.COM (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, 430311422SMark.Musante@Sun.COM &is_hole); 430411422SMark.Musante@Sun.COM 430511422SMark.Musante@Sun.COM if (is_hole != 0) { 430611422SMark.Musante@Sun.COM if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || 430711422SMark.Musante@Sun.COM spa->spa_root_vdev->vdev_child[c]->vdev_islog) { 430811422SMark.Musante@Sun.COM continue; 430911422SMark.Musante@Sun.COM } else { 431011422SMark.Musante@Sun.COM error = EINVAL; 431111422SMark.Musante@Sun.COM break; 431211422SMark.Musante@Sun.COM } 431311422SMark.Musante@Sun.COM } 431411422SMark.Musante@Sun.COM 431511422SMark.Musante@Sun.COM /* which disk is going to be split? */ 431611422SMark.Musante@Sun.COM if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, 431711422SMark.Musante@Sun.COM &glist[c]) != 0) { 431811422SMark.Musante@Sun.COM error = EINVAL; 431911422SMark.Musante@Sun.COM break; 432011422SMark.Musante@Sun.COM } 432111422SMark.Musante@Sun.COM 432211422SMark.Musante@Sun.COM /* look it up in the spa */ 432311422SMark.Musante@Sun.COM vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); 432411422SMark.Musante@Sun.COM if (vml[c] == NULL) { 432511422SMark.Musante@Sun.COM error = ENODEV; 432611422SMark.Musante@Sun.COM break; 432711422SMark.Musante@Sun.COM } 432811422SMark.Musante@Sun.COM 432911422SMark.Musante@Sun.COM /* make sure there's nothing stopping the split */ 433011422SMark.Musante@Sun.COM if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || 433111422SMark.Musante@Sun.COM vml[c]->vdev_islog || 433211422SMark.Musante@Sun.COM vml[c]->vdev_ishole || 433311422SMark.Musante@Sun.COM vml[c]->vdev_isspare || 433411422SMark.Musante@Sun.COM vml[c]->vdev_isl2cache || 433511422SMark.Musante@Sun.COM !vdev_writeable(vml[c]) || 433611497SMark.Musante@Sun.COM vml[c]->vdev_children != 0 || 433711422SMark.Musante@Sun.COM vml[c]->vdev_state != VDEV_STATE_HEALTHY || 433811422SMark.Musante@Sun.COM c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { 433911422SMark.Musante@Sun.COM error = EINVAL; 434011422SMark.Musante@Sun.COM break; 434111422SMark.Musante@Sun.COM } 434211422SMark.Musante@Sun.COM 434311422SMark.Musante@Sun.COM if (vdev_dtl_required(vml[c])) { 434411422SMark.Musante@Sun.COM error = EBUSY; 434511422SMark.Musante@Sun.COM break; 434611422SMark.Musante@Sun.COM } 434711422SMark.Musante@Sun.COM 434811422SMark.Musante@Sun.COM /* we need certain info from the top level */ 434911422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, 435011422SMark.Musante@Sun.COM vml[c]->vdev_top->vdev_ms_array) == 0); 435111422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, 435211422SMark.Musante@Sun.COM vml[c]->vdev_top->vdev_ms_shift) == 0); 435311422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, 435411422SMark.Musante@Sun.COM vml[c]->vdev_top->vdev_asize) == 0); 435511422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, 435611422SMark.Musante@Sun.COM vml[c]->vdev_top->vdev_ashift) == 0); 435711422SMark.Musante@Sun.COM } 435811422SMark.Musante@Sun.COM 435911422SMark.Musante@Sun.COM if (error != 0) { 436011422SMark.Musante@Sun.COM kmem_free(vml, children * sizeof (vdev_t *)); 436111422SMark.Musante@Sun.COM kmem_free(glist, children * sizeof (uint64_t)); 436211422SMark.Musante@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 436311422SMark.Musante@Sun.COM } 436411422SMark.Musante@Sun.COM 436511422SMark.Musante@Sun.COM /* stop writers from using the disks */ 436611422SMark.Musante@Sun.COM for (c = 0; c < children; c++) { 436711422SMark.Musante@Sun.COM if (vml[c] != NULL) 436811422SMark.Musante@Sun.COM vml[c]->vdev_offline = B_TRUE; 436911422SMark.Musante@Sun.COM } 437011422SMark.Musante@Sun.COM vdev_reopen(spa->spa_root_vdev); 437111422SMark.Musante@Sun.COM 437211422SMark.Musante@Sun.COM /* 437311422SMark.Musante@Sun.COM * Temporarily record the splitting vdevs in the spa config. This 437411422SMark.Musante@Sun.COM * will disappear once the config is regenerated. 437511422SMark.Musante@Sun.COM */ 437611422SMark.Musante@Sun.COM VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 437711422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, 437811422SMark.Musante@Sun.COM glist, children) == 0); 437911422SMark.Musante@Sun.COM kmem_free(glist, children * sizeof (uint64_t)); 438011422SMark.Musante@Sun.COM 438111864SMark.Musante@Sun.COM mutex_enter(&spa->spa_props_lock); 438211422SMark.Musante@Sun.COM VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, 438311422SMark.Musante@Sun.COM nvl) == 0); 438411864SMark.Musante@Sun.COM mutex_exit(&spa->spa_props_lock); 438511422SMark.Musante@Sun.COM spa->spa_config_splitting = nvl; 438611422SMark.Musante@Sun.COM vdev_config_dirty(spa->spa_root_vdev); 438711422SMark.Musante@Sun.COM 438811422SMark.Musante@Sun.COM /* configure and create the new pool */ 438911422SMark.Musante@Sun.COM VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); 439011422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 439111422SMark.Musante@Sun.COM exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); 439211422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 439311422SMark.Musante@Sun.COM spa_version(spa)) == 0); 439411422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, 439511422SMark.Musante@Sun.COM spa->spa_config_txg) == 0); 439611422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, 439711422SMark.Musante@Sun.COM spa_generate_guid(NULL)) == 0); 439811422SMark.Musante@Sun.COM (void) nvlist_lookup_string(props, 439911422SMark.Musante@Sun.COM zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 440011422SMark.Musante@Sun.COM 440111497SMark.Musante@Sun.COM /* add the new pool to the namespace */ 440211422SMark.Musante@Sun.COM newspa = spa_add(newname, config, altroot); 440311422SMark.Musante@Sun.COM newspa->spa_config_txg = spa->spa_config_txg; 440411422SMark.Musante@Sun.COM spa_set_log_state(newspa, SPA_LOG_CLEAR); 440511422SMark.Musante@Sun.COM 440611422SMark.Musante@Sun.COM /* release the spa config lock, retaining the namespace lock */ 440711422SMark.Musante@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 440811422SMark.Musante@Sun.COM 440911422SMark.Musante@Sun.COM if (zio_injection_enabled) 441011422SMark.Musante@Sun.COM zio_handle_panic_injection(spa, FTAG, 1); 441111422SMark.Musante@Sun.COM 441211422SMark.Musante@Sun.COM spa_activate(newspa, spa_mode_global); 441311422SMark.Musante@Sun.COM spa_async_suspend(newspa); 441411422SMark.Musante@Sun.COM 441511422SMark.Musante@Sun.COM /* create the new pool from the disks of the original pool */ 441611422SMark.Musante@Sun.COM error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); 441711422SMark.Musante@Sun.COM if (error) 441811422SMark.Musante@Sun.COM goto out; 441911422SMark.Musante@Sun.COM 442011422SMark.Musante@Sun.COM /* if that worked, generate a real config for the new pool */ 442111422SMark.Musante@Sun.COM if (newspa->spa_root_vdev != NULL) { 442211422SMark.Musante@Sun.COM VERIFY(nvlist_alloc(&newspa->spa_config_splitting, 442311422SMark.Musante@Sun.COM NV_UNIQUE_NAME, KM_SLEEP) == 0); 442411422SMark.Musante@Sun.COM VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, 442511422SMark.Musante@Sun.COM ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); 442611422SMark.Musante@Sun.COM spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, 442711422SMark.Musante@Sun.COM B_TRUE)); 442811422SMark.Musante@Sun.COM } 442911422SMark.Musante@Sun.COM 443011422SMark.Musante@Sun.COM /* set the props */ 443111422SMark.Musante@Sun.COM if (props != NULL) { 443211422SMark.Musante@Sun.COM spa_configfile_set(newspa, props, B_FALSE); 443311422SMark.Musante@Sun.COM error = spa_prop_set(newspa, props); 443411422SMark.Musante@Sun.COM if (error) 443511422SMark.Musante@Sun.COM goto out; 443611422SMark.Musante@Sun.COM } 443711422SMark.Musante@Sun.COM 443811422SMark.Musante@Sun.COM /* flush everything */ 443911422SMark.Musante@Sun.COM txg = spa_vdev_config_enter(newspa); 444011422SMark.Musante@Sun.COM vdev_config_dirty(newspa->spa_root_vdev); 444111422SMark.Musante@Sun.COM (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); 444211422SMark.Musante@Sun.COM 444311422SMark.Musante@Sun.COM if (zio_injection_enabled) 444411422SMark.Musante@Sun.COM zio_handle_panic_injection(spa, FTAG, 2); 444511422SMark.Musante@Sun.COM 444611422SMark.Musante@Sun.COM spa_async_resume(newspa); 444711422SMark.Musante@Sun.COM 444811422SMark.Musante@Sun.COM /* finally, update the original pool's config */ 444911422SMark.Musante@Sun.COM txg = spa_vdev_config_enter(spa); 445011422SMark.Musante@Sun.COM tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 445111422SMark.Musante@Sun.COM error = dmu_tx_assign(tx, TXG_WAIT); 445211422SMark.Musante@Sun.COM if (error != 0) 445311422SMark.Musante@Sun.COM dmu_tx_abort(tx); 445411422SMark.Musante@Sun.COM for (c = 0; c < children; c++) { 445511422SMark.Musante@Sun.COM if (vml[c] != NULL) { 445611422SMark.Musante@Sun.COM vdev_split(vml[c]); 445711422SMark.Musante@Sun.COM if (error == 0) 445812296SLin.Ling@Sun.COM spa_history_log_internal(LOG_POOL_VDEV_DETACH, 445912296SLin.Ling@Sun.COM spa, tx, "vdev=%s", 446011422SMark.Musante@Sun.COM vml[c]->vdev_path); 446111422SMark.Musante@Sun.COM vdev_free(vml[c]); 446211422SMark.Musante@Sun.COM } 446311422SMark.Musante@Sun.COM } 446411422SMark.Musante@Sun.COM vdev_config_dirty(spa->spa_root_vdev); 446511422SMark.Musante@Sun.COM spa->spa_config_splitting = NULL; 446611422SMark.Musante@Sun.COM nvlist_free(nvl); 446711422SMark.Musante@Sun.COM if (error == 0) 446811422SMark.Musante@Sun.COM dmu_tx_commit(tx); 446911422SMark.Musante@Sun.COM (void) spa_vdev_exit(spa, NULL, txg, 0); 447011422SMark.Musante@Sun.COM 447111422SMark.Musante@Sun.COM if (zio_injection_enabled) 447211422SMark.Musante@Sun.COM zio_handle_panic_injection(spa, FTAG, 3); 447311422SMark.Musante@Sun.COM 447411422SMark.Musante@Sun.COM /* split is complete; log a history record */ 447512296SLin.Ling@Sun.COM spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, 447611422SMark.Musante@Sun.COM "split new pool %s from pool %s", newname, spa_name(spa)); 447711422SMark.Musante@Sun.COM 447811422SMark.Musante@Sun.COM kmem_free(vml, children * sizeof (vdev_t *)); 447911422SMark.Musante@Sun.COM 448011422SMark.Musante@Sun.COM /* if we're not going to mount the filesystems in userland, export */ 448111422SMark.Musante@Sun.COM if (exp) 448211422SMark.Musante@Sun.COM error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, 448311422SMark.Musante@Sun.COM B_FALSE, B_FALSE); 448411422SMark.Musante@Sun.COM 448511422SMark.Musante@Sun.COM return (error); 448611422SMark.Musante@Sun.COM 448711422SMark.Musante@Sun.COM out: 448811422SMark.Musante@Sun.COM spa_unload(newspa); 448911422SMark.Musante@Sun.COM spa_deactivate(newspa); 449011422SMark.Musante@Sun.COM spa_remove(newspa); 449111422SMark.Musante@Sun.COM 449211422SMark.Musante@Sun.COM txg = spa_vdev_config_enter(spa); 449311864SMark.Musante@Sun.COM 449411864SMark.Musante@Sun.COM /* re-online all offlined disks */ 449511864SMark.Musante@Sun.COM for (c = 0; c < children; c++) { 449611864SMark.Musante@Sun.COM if (vml[c] != NULL) 449711864SMark.Musante@Sun.COM vml[c]->vdev_offline = B_FALSE; 449811864SMark.Musante@Sun.COM } 449911864SMark.Musante@Sun.COM vdev_reopen(spa->spa_root_vdev); 450011864SMark.Musante@Sun.COM 450111422SMark.Musante@Sun.COM nvlist_free(spa->spa_config_splitting); 450211422SMark.Musante@Sun.COM spa->spa_config_splitting = NULL; 450311497SMark.Musante@Sun.COM (void) spa_vdev_exit(spa, NULL, txg, error); 450411422SMark.Musante@Sun.COM 450511422SMark.Musante@Sun.COM kmem_free(vml, children * sizeof (vdev_t *)); 450611422SMark.Musante@Sun.COM return (error); 450711422SMark.Musante@Sun.COM } 450811422SMark.Musante@Sun.COM 45097754SJeff.Bonwick@Sun.COM static nvlist_t * 45107754SJeff.Bonwick@Sun.COM spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 45112082Seschrock { 45127754SJeff.Bonwick@Sun.COM for (int i = 0; i < count; i++) { 45137754SJeff.Bonwick@Sun.COM uint64_t guid; 45147754SJeff.Bonwick@Sun.COM 45157754SJeff.Bonwick@Sun.COM VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 45167754SJeff.Bonwick@Sun.COM &guid) == 0); 45177754SJeff.Bonwick@Sun.COM 45187754SJeff.Bonwick@Sun.COM if (guid == target_guid) 45197754SJeff.Bonwick@Sun.COM return (nvpp[i]); 45202082Seschrock } 45212082Seschrock 45227754SJeff.Bonwick@Sun.COM return (NULL); 45235450Sbrendan } 45245450Sbrendan 45257754SJeff.Bonwick@Sun.COM static void 45267754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 45277754SJeff.Bonwick@Sun.COM nvlist_t *dev_to_remove) 45285450Sbrendan { 45297754SJeff.Bonwick@Sun.COM nvlist_t **newdev = NULL; 45307754SJeff.Bonwick@Sun.COM 45317754SJeff.Bonwick@Sun.COM if (count > 1) 45327754SJeff.Bonwick@Sun.COM newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 45337754SJeff.Bonwick@Sun.COM 45347754SJeff.Bonwick@Sun.COM for (int i = 0, j = 0; i < count; i++) { 45357754SJeff.Bonwick@Sun.COM if (dev[i] == dev_to_remove) 45367754SJeff.Bonwick@Sun.COM continue; 45377754SJeff.Bonwick@Sun.COM VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 45385450Sbrendan } 45395450Sbrendan 45407754SJeff.Bonwick@Sun.COM VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 45417754SJeff.Bonwick@Sun.COM VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 45427754SJeff.Bonwick@Sun.COM 45437754SJeff.Bonwick@Sun.COM for (int i = 0; i < count - 1; i++) 45447754SJeff.Bonwick@Sun.COM nvlist_free(newdev[i]); 45457754SJeff.Bonwick@Sun.COM 45467754SJeff.Bonwick@Sun.COM if (count > 1) 45477754SJeff.Bonwick@Sun.COM kmem_free(newdev, (count - 1) * sizeof (void *)); 45485450Sbrendan } 45495450Sbrendan 45505450Sbrendan /* 455110594SGeorge.Wilson@Sun.COM * Evacuate the device. 455210594SGeorge.Wilson@Sun.COM */ 455312296SLin.Ling@Sun.COM static int 455410594SGeorge.Wilson@Sun.COM spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 455510594SGeorge.Wilson@Sun.COM { 455612296SLin.Ling@Sun.COM uint64_t txg; 455710974SJeff.Bonwick@Sun.COM int error = 0; 455810594SGeorge.Wilson@Sun.COM 455910594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 456010594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 456110922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 456210594SGeorge.Wilson@Sun.COM 456310594SGeorge.Wilson@Sun.COM /* 456410594SGeorge.Wilson@Sun.COM * Evacuate the device. We don't hold the config lock as writer 456510594SGeorge.Wilson@Sun.COM * since we need to do I/O but we do keep the 456610594SGeorge.Wilson@Sun.COM * spa_namespace_lock held. Once this completes the device 456710594SGeorge.Wilson@Sun.COM * should no longer have any blocks allocated on it. 456810594SGeorge.Wilson@Sun.COM */ 456910594SGeorge.Wilson@Sun.COM if (vd->vdev_islog) { 457012296SLin.Ling@Sun.COM if (vd->vdev_stat.vs_alloc != 0) 457112296SLin.Ling@Sun.COM error = spa_offline_log(spa); 457210974SJeff.Bonwick@Sun.COM } else { 457312296SLin.Ling@Sun.COM error = ENOTSUP; 457410594SGeorge.Wilson@Sun.COM } 457510594SGeorge.Wilson@Sun.COM 457610974SJeff.Bonwick@Sun.COM if (error) 457710974SJeff.Bonwick@Sun.COM return (error); 457810974SJeff.Bonwick@Sun.COM 457910594SGeorge.Wilson@Sun.COM /* 458010974SJeff.Bonwick@Sun.COM * The evacuation succeeded. Remove any remaining MOS metadata 458110974SJeff.Bonwick@Sun.COM * associated with this vdev, and wait for these changes to sync. 458210594SGeorge.Wilson@Sun.COM */ 458312296SLin.Ling@Sun.COM ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); 458410594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 458510594SGeorge.Wilson@Sun.COM vd->vdev_removing = B_TRUE; 458610594SGeorge.Wilson@Sun.COM vdev_dirty(vd, 0, NULL, txg); 458710594SGeorge.Wilson@Sun.COM vdev_config_dirty(vd); 458810594SGeorge.Wilson@Sun.COM spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 458910594SGeorge.Wilson@Sun.COM 459010594SGeorge.Wilson@Sun.COM return (0); 459110594SGeorge.Wilson@Sun.COM } 459210594SGeorge.Wilson@Sun.COM 459310594SGeorge.Wilson@Sun.COM /* 459410594SGeorge.Wilson@Sun.COM * Complete the removal by cleaning up the namespace. 459510594SGeorge.Wilson@Sun.COM */ 459612296SLin.Ling@Sun.COM static void 459710974SJeff.Bonwick@Sun.COM spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) 459810594SGeorge.Wilson@Sun.COM { 459910594SGeorge.Wilson@Sun.COM vdev_t *rvd = spa->spa_root_vdev; 460010594SGeorge.Wilson@Sun.COM uint64_t id = vd->vdev_id; 460110594SGeorge.Wilson@Sun.COM boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 460210594SGeorge.Wilson@Sun.COM 460310594SGeorge.Wilson@Sun.COM ASSERT(MUTEX_HELD(&spa_namespace_lock)); 460410594SGeorge.Wilson@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 460510922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 460610594SGeorge.Wilson@Sun.COM 460712296SLin.Ling@Sun.COM /* 460812296SLin.Ling@Sun.COM * Only remove any devices which are empty. 460912296SLin.Ling@Sun.COM */ 461012296SLin.Ling@Sun.COM if (vd->vdev_stat.vs_alloc != 0) 461112296SLin.Ling@Sun.COM return; 461212296SLin.Ling@Sun.COM 461310594SGeorge.Wilson@Sun.COM (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 461410922SJeff.Bonwick@Sun.COM 461510922SJeff.Bonwick@Sun.COM if (list_link_active(&vd->vdev_state_dirty_node)) 461610922SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 461710922SJeff.Bonwick@Sun.COM if (list_link_active(&vd->vdev_config_dirty_node)) 461810922SJeff.Bonwick@Sun.COM vdev_config_clean(vd); 461910922SJeff.Bonwick@Sun.COM 462010594SGeorge.Wilson@Sun.COM vdev_free(vd); 462110594SGeorge.Wilson@Sun.COM 462210594SGeorge.Wilson@Sun.COM if (last_vdev) { 462310594SGeorge.Wilson@Sun.COM vdev_compact_children(rvd); 462410594SGeorge.Wilson@Sun.COM } else { 462510594SGeorge.Wilson@Sun.COM vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 462610594SGeorge.Wilson@Sun.COM vdev_add_child(rvd, vd); 462710594SGeorge.Wilson@Sun.COM } 462812352SLin.Ling@Sun.COM vdev_config_dirty(rvd); 462912352SLin.Ling@Sun.COM 463012352SLin.Ling@Sun.COM /* 463112352SLin.Ling@Sun.COM * Reassess the health of our root vdev. 463212352SLin.Ling@Sun.COM */ 463312352SLin.Ling@Sun.COM vdev_reopen(rvd); 463410594SGeorge.Wilson@Sun.COM } 463510594SGeorge.Wilson@Sun.COM 463610594SGeorge.Wilson@Sun.COM /* 463712296SLin.Ling@Sun.COM * Remove a device from the pool - 463812296SLin.Ling@Sun.COM * 463912296SLin.Ling@Sun.COM * Removing a device from the vdev namespace requires several steps 464012296SLin.Ling@Sun.COM * and can take a significant amount of time. As a result we use 464112296SLin.Ling@Sun.COM * the spa_vdev_config_[enter/exit] functions which allow us to 464212296SLin.Ling@Sun.COM * grab and release the spa_config_lock while still holding the namespace 464312296SLin.Ling@Sun.COM * lock. During each step the configuration is synced out. 464412296SLin.Ling@Sun.COM */ 464512296SLin.Ling@Sun.COM 464612296SLin.Ling@Sun.COM /* 46475450Sbrendan * Remove a device from the pool. Currently, this supports removing only hot 464810594SGeorge.Wilson@Sun.COM * spares, slogs, and level 2 ARC devices. 46495450Sbrendan */ 46505450Sbrendan int 46515450Sbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 46525450Sbrendan { 46535450Sbrendan vdev_t *vd; 465410974SJeff.Bonwick@Sun.COM metaslab_group_t *mg; 46557754SJeff.Bonwick@Sun.COM nvlist_t **spares, **l2cache, *nv; 465610594SGeorge.Wilson@Sun.COM uint64_t txg = 0; 46575450Sbrendan uint_t nspares, nl2cache; 46585450Sbrendan int error = 0; 46598241SJeff.Bonwick@Sun.COM boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 46608241SJeff.Bonwick@Sun.COM 4661*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 4662*13049SGeorge.Wilson@Sun.COM 46638241SJeff.Bonwick@Sun.COM if (!locked) 46648241SJeff.Bonwick@Sun.COM txg = spa_vdev_enter(spa); 46655450Sbrendan 46666643Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 46675450Sbrendan 46685450Sbrendan if (spa->spa_spares.sav_vdevs != NULL && 46695450Sbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 46707754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 46717754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 46727754SJeff.Bonwick@Sun.COM /* 46737754SJeff.Bonwick@Sun.COM * Only remove the hot spare if it's not currently in use 46747754SJeff.Bonwick@Sun.COM * in this pool. 46757754SJeff.Bonwick@Sun.COM */ 46767754SJeff.Bonwick@Sun.COM if (vd == NULL || unspare) { 46777754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_spares.sav_config, 46787754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_SPARES, spares, nspares, nv); 46797754SJeff.Bonwick@Sun.COM spa_load_spares(spa); 46807754SJeff.Bonwick@Sun.COM spa->spa_spares.sav_sync = B_TRUE; 46817754SJeff.Bonwick@Sun.COM } else { 46827754SJeff.Bonwick@Sun.COM error = EBUSY; 46837754SJeff.Bonwick@Sun.COM } 46847754SJeff.Bonwick@Sun.COM } else if (spa->spa_l2cache.sav_vdevs != NULL && 46855450Sbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 46867754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 46877754SJeff.Bonwick@Sun.COM (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 46887754SJeff.Bonwick@Sun.COM /* 46897754SJeff.Bonwick@Sun.COM * Cache devices can always be removed. 46907754SJeff.Bonwick@Sun.COM */ 46917754SJeff.Bonwick@Sun.COM spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 46927754SJeff.Bonwick@Sun.COM ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 46935450Sbrendan spa_load_l2cache(spa); 46945450Sbrendan spa->spa_l2cache.sav_sync = B_TRUE; 469510594SGeorge.Wilson@Sun.COM } else if (vd != NULL && vd->vdev_islog) { 469610594SGeorge.Wilson@Sun.COM ASSERT(!locked); 469710922SJeff.Bonwick@Sun.COM ASSERT(vd == vd->vdev_top); 469810594SGeorge.Wilson@Sun.COM 469910594SGeorge.Wilson@Sun.COM /* 470010594SGeorge.Wilson@Sun.COM * XXX - Once we have bp-rewrite this should 470110594SGeorge.Wilson@Sun.COM * become the common case. 470210594SGeorge.Wilson@Sun.COM */ 470310594SGeorge.Wilson@Sun.COM 470410974SJeff.Bonwick@Sun.COM mg = vd->vdev_mg; 470510974SJeff.Bonwick@Sun.COM 470610594SGeorge.Wilson@Sun.COM /* 470710974SJeff.Bonwick@Sun.COM * Stop allocating from this vdev. 470810594SGeorge.Wilson@Sun.COM */ 470910974SJeff.Bonwick@Sun.COM metaslab_group_passivate(mg); 471010594SGeorge.Wilson@Sun.COM 471110922SJeff.Bonwick@Sun.COM /* 471210922SJeff.Bonwick@Sun.COM * Wait for the youngest allocations and frees to sync, 471310922SJeff.Bonwick@Sun.COM * and then wait for the deferral of those frees to finish. 471410922SJeff.Bonwick@Sun.COM */ 471510922SJeff.Bonwick@Sun.COM spa_vdev_config_exit(spa, NULL, 471610922SJeff.Bonwick@Sun.COM txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 471710922SJeff.Bonwick@Sun.COM 471810974SJeff.Bonwick@Sun.COM /* 471910974SJeff.Bonwick@Sun.COM * Attempt to evacuate the vdev. 472010974SJeff.Bonwick@Sun.COM */ 472110974SJeff.Bonwick@Sun.COM error = spa_vdev_remove_evacuate(spa, vd); 472210974SJeff.Bonwick@Sun.COM 472310594SGeorge.Wilson@Sun.COM txg = spa_vdev_config_enter(spa); 472410594SGeorge.Wilson@Sun.COM 472510974SJeff.Bonwick@Sun.COM /* 472610974SJeff.Bonwick@Sun.COM * If we couldn't evacuate the vdev, unwind. 472710974SJeff.Bonwick@Sun.COM */ 472810974SJeff.Bonwick@Sun.COM if (error) { 472910974SJeff.Bonwick@Sun.COM metaslab_group_activate(mg); 473010974SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 473110974SJeff.Bonwick@Sun.COM } 473210974SJeff.Bonwick@Sun.COM 473310974SJeff.Bonwick@Sun.COM /* 473410974SJeff.Bonwick@Sun.COM * Clean up the vdev namespace. 473510974SJeff.Bonwick@Sun.COM */ 473610974SJeff.Bonwick@Sun.COM spa_vdev_remove_from_namespace(spa, vd); 473710594SGeorge.Wilson@Sun.COM 47387754SJeff.Bonwick@Sun.COM } else if (vd != NULL) { 47397754SJeff.Bonwick@Sun.COM /* 47407754SJeff.Bonwick@Sun.COM * Normal vdevs cannot be removed (yet). 47417754SJeff.Bonwick@Sun.COM */ 47427754SJeff.Bonwick@Sun.COM error = ENOTSUP; 47437754SJeff.Bonwick@Sun.COM } else { 47447754SJeff.Bonwick@Sun.COM /* 47457754SJeff.Bonwick@Sun.COM * There is no vdev of any kind with the specified guid. 47467754SJeff.Bonwick@Sun.COM */ 47477754SJeff.Bonwick@Sun.COM error = ENOENT; 47485450Sbrendan } 47492082Seschrock 47508241SJeff.Bonwick@Sun.COM if (!locked) 47518241SJeff.Bonwick@Sun.COM return (spa_vdev_exit(spa, NULL, txg, error)); 47528241SJeff.Bonwick@Sun.COM 47538241SJeff.Bonwick@Sun.COM return (error); 4754789Sahrens } 4755789Sahrens 4756789Sahrens /* 47574451Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 47584451Seschrock * current spared, so we can detach it. 4759789Sahrens */ 47601544Seschrock static vdev_t * 47614451Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 4762789Sahrens { 47631544Seschrock vdev_t *newvd, *oldvd; 47649816SGeorge.Wilson@Sun.COM 47659816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 47664451Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 47671544Seschrock if (oldvd != NULL) 47681544Seschrock return (oldvd); 47691544Seschrock } 4770789Sahrens 47714451Seschrock /* 477213037SMark.Musante@Sun.COM * Check for a completed replacement. We always consider the first 477313037SMark.Musante@Sun.COM * vdev in the list to be the oldest vdev, and the last one to be 477413037SMark.Musante@Sun.COM * the newest (see spa_vdev_attach() for how that works). In 477513037SMark.Musante@Sun.COM * the case where the newest vdev is faulted, we will not automatically 477613037SMark.Musante@Sun.COM * remove it after a resilver completes. This is OK as it will require 477713037SMark.Musante@Sun.COM * user intervention to determine which disk the admin wishes to keep. 47784451Seschrock */ 477913037SMark.Musante@Sun.COM if (vd->vdev_ops == &vdev_replacing_ops) { 478013037SMark.Musante@Sun.COM ASSERT(vd->vdev_children > 1); 478113037SMark.Musante@Sun.COM 478213037SMark.Musante@Sun.COM newvd = vd->vdev_child[vd->vdev_children - 1]; 47831544Seschrock oldvd = vd->vdev_child[0]; 4784789Sahrens 47858241SJeff.Bonwick@Sun.COM if (vdev_dtl_empty(newvd, DTL_MISSING) && 478611820SVictor.Latushkin@Sun.COM vdev_dtl_empty(newvd, DTL_OUTAGE) && 47878241SJeff.Bonwick@Sun.COM !vdev_dtl_required(oldvd)) 47881544Seschrock return (oldvd); 47891544Seschrock } 4790789Sahrens 47914451Seschrock /* 47924451Seschrock * Check for a completed resilver with the 'unspare' flag set. 47934451Seschrock */ 479413037SMark.Musante@Sun.COM if (vd->vdev_ops == &vdev_spare_ops) { 479513037SMark.Musante@Sun.COM vdev_t *first = vd->vdev_child[0]; 479613037SMark.Musante@Sun.COM vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; 479713037SMark.Musante@Sun.COM 479813037SMark.Musante@Sun.COM if (last->vdev_unspare) { 479913037SMark.Musante@Sun.COM oldvd = first; 480013037SMark.Musante@Sun.COM newvd = last; 480113037SMark.Musante@Sun.COM } else if (first->vdev_unspare) { 480213037SMark.Musante@Sun.COM oldvd = last; 480313037SMark.Musante@Sun.COM newvd = first; 480413037SMark.Musante@Sun.COM } else { 480513037SMark.Musante@Sun.COM oldvd = NULL; 480613037SMark.Musante@Sun.COM } 480713037SMark.Musante@Sun.COM 480813037SMark.Musante@Sun.COM if (oldvd != NULL && 48098241SJeff.Bonwick@Sun.COM vdev_dtl_empty(newvd, DTL_MISSING) && 481011820SVictor.Latushkin@Sun.COM vdev_dtl_empty(newvd, DTL_OUTAGE) && 481113037SMark.Musante@Sun.COM !vdev_dtl_required(oldvd)) 48124451Seschrock return (oldvd); 481313037SMark.Musante@Sun.COM 481413037SMark.Musante@Sun.COM /* 481513037SMark.Musante@Sun.COM * If there are more than two spares attached to a disk, 481613037SMark.Musante@Sun.COM * and those spares are not required, then we want to 481713037SMark.Musante@Sun.COM * attempt to free them up now so that they can be used 481813037SMark.Musante@Sun.COM * by other pools. Once we're back down to a single 481913037SMark.Musante@Sun.COM * disk+spare, we stop removing them. 482013037SMark.Musante@Sun.COM */ 482113037SMark.Musante@Sun.COM if (vd->vdev_children > 2) { 482213037SMark.Musante@Sun.COM newvd = vd->vdev_child[1]; 482313037SMark.Musante@Sun.COM 482413037SMark.Musante@Sun.COM if (newvd->vdev_isspare && last->vdev_isspare && 482513037SMark.Musante@Sun.COM vdev_dtl_empty(last, DTL_MISSING) && 482613037SMark.Musante@Sun.COM vdev_dtl_empty(last, DTL_OUTAGE) && 482713037SMark.Musante@Sun.COM !vdev_dtl_required(newvd)) 482813037SMark.Musante@Sun.COM return (newvd); 48294451Seschrock } 48304451Seschrock } 48314451Seschrock 48321544Seschrock return (NULL); 4833789Sahrens } 4834789Sahrens 48351544Seschrock static void 48364451Seschrock spa_vdev_resilver_done(spa_t *spa) 4837789Sahrens { 48388241SJeff.Bonwick@Sun.COM vdev_t *vd, *pvd, *ppvd; 48398241SJeff.Bonwick@Sun.COM uint64_t guid, sguid, pguid, ppguid; 48408241SJeff.Bonwick@Sun.COM 48418241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4842789Sahrens 48434451Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 48448241SJeff.Bonwick@Sun.COM pvd = vd->vdev_parent; 48458241SJeff.Bonwick@Sun.COM ppvd = pvd->vdev_parent; 48461544Seschrock guid = vd->vdev_guid; 48478241SJeff.Bonwick@Sun.COM pguid = pvd->vdev_guid; 48488241SJeff.Bonwick@Sun.COM ppguid = ppvd->vdev_guid; 48498241SJeff.Bonwick@Sun.COM sguid = 0; 48502082Seschrock /* 48512082Seschrock * If we have just finished replacing a hot spared device, then 48522082Seschrock * we need to detach the parent's first child (the original hot 48532082Seschrock * spare) as well. 48542082Seschrock */ 485513037SMark.Musante@Sun.COM if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && 485613037SMark.Musante@Sun.COM ppvd->vdev_children == 2) { 48572082Seschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 48588241SJeff.Bonwick@Sun.COM sguid = ppvd->vdev_child[1]->vdev_guid; 48592082Seschrock } 48608241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 48618241SJeff.Bonwick@Sun.COM if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 48621544Seschrock return; 48638241SJeff.Bonwick@Sun.COM if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 48642082Seschrock return; 48658241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4866789Sahrens } 4867789Sahrens 48688241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 4869789Sahrens } 4870789Sahrens 4871789Sahrens /* 487211041SEric.Taylor@Sun.COM * Update the stored path or FRU for this vdev. 48731354Seschrock */ 48741354Seschrock int 48759425SEric.Schrock@Sun.COM spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 48769425SEric.Schrock@Sun.COM boolean_t ispath) 48771354Seschrock { 48786643Seschrock vdev_t *vd; 487911817SGeorge.Wilson@Sun.COM boolean_t sync = B_FALSE; 488011041SEric.Taylor@Sun.COM 4881*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 4882*13049SGeorge.Wilson@Sun.COM 488311041SEric.Taylor@Sun.COM spa_vdev_state_enter(spa, SCL_ALL); 48841354Seschrock 48859425SEric.Schrock@Sun.COM if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 488611041SEric.Taylor@Sun.COM return (spa_vdev_state_exit(spa, NULL, ENOENT)); 48871354Seschrock 48881585Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 488911041SEric.Taylor@Sun.COM return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 48901585Sbonwick 48919425SEric.Schrock@Sun.COM if (ispath) { 489211817SGeorge.Wilson@Sun.COM if (strcmp(value, vd->vdev_path) != 0) { 489311817SGeorge.Wilson@Sun.COM spa_strfree(vd->vdev_path); 489411817SGeorge.Wilson@Sun.COM vd->vdev_path = spa_strdup(value); 489511817SGeorge.Wilson@Sun.COM sync = B_TRUE; 489611817SGeorge.Wilson@Sun.COM } 48979425SEric.Schrock@Sun.COM } else { 489811817SGeorge.Wilson@Sun.COM if (vd->vdev_fru == NULL) { 489911817SGeorge.Wilson@Sun.COM vd->vdev_fru = spa_strdup(value); 490011817SGeorge.Wilson@Sun.COM sync = B_TRUE; 490111817SGeorge.Wilson@Sun.COM } else if (strcmp(value, vd->vdev_fru) != 0) { 49029425SEric.Schrock@Sun.COM spa_strfree(vd->vdev_fru); 490311817SGeorge.Wilson@Sun.COM vd->vdev_fru = spa_strdup(value); 490411817SGeorge.Wilson@Sun.COM sync = B_TRUE; 490511817SGeorge.Wilson@Sun.COM } 49069425SEric.Schrock@Sun.COM } 49071354Seschrock 490811817SGeorge.Wilson@Sun.COM return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); 49091354Seschrock } 49101354Seschrock 49119425SEric.Schrock@Sun.COM int 49129425SEric.Schrock@Sun.COM spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 49139425SEric.Schrock@Sun.COM { 49149425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 49159425SEric.Schrock@Sun.COM } 49169425SEric.Schrock@Sun.COM 49179425SEric.Schrock@Sun.COM int 49189425SEric.Schrock@Sun.COM spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 49199425SEric.Schrock@Sun.COM { 49209425SEric.Schrock@Sun.COM return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 49219425SEric.Schrock@Sun.COM } 49229425SEric.Schrock@Sun.COM 49231354Seschrock /* 4924789Sahrens * ========================================================================== 492512296SLin.Ling@Sun.COM * SPA Scanning 4926789Sahrens * ========================================================================== 4927789Sahrens */ 4928789Sahrens 49297046Sahrens int 493012296SLin.Ling@Sun.COM spa_scan_stop(spa_t *spa) 4931789Sahrens { 49327754SJeff.Bonwick@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 493312296SLin.Ling@Sun.COM if (dsl_scan_resilvering(spa->spa_dsl_pool)) 493412296SLin.Ling@Sun.COM return (EBUSY); 493512296SLin.Ling@Sun.COM return (dsl_scan_cancel(spa->spa_dsl_pool)); 493612296SLin.Ling@Sun.COM } 493712296SLin.Ling@Sun.COM 493812296SLin.Ling@Sun.COM int 493912296SLin.Ling@Sun.COM spa_scan(spa_t *spa, pool_scan_func_t func) 494012296SLin.Ling@Sun.COM { 494112296SLin.Ling@Sun.COM ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 494212296SLin.Ling@Sun.COM 494312296SLin.Ling@Sun.COM if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) 4944789Sahrens return (ENOTSUP); 4945789Sahrens 4946789Sahrens /* 49477046Sahrens * If a resilver was requested, but there is no DTL on a 49487046Sahrens * writeable leaf device, we have nothing to do. 4949789Sahrens */ 495012296SLin.Ling@Sun.COM if (func == POOL_SCAN_RESILVER && 49517046Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 49527046Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 49531544Seschrock return (0); 49541544Seschrock } 4955789Sahrens 495612296SLin.Ling@Sun.COM return (dsl_scan(spa->spa_dsl_pool, func)); 4957789Sahrens } 4958789Sahrens 49591544Seschrock /* 49601544Seschrock * ========================================================================== 49611544Seschrock * SPA async task processing 49621544Seschrock * ========================================================================== 49631544Seschrock */ 49641544Seschrock 49651544Seschrock static void 49664451Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 4967789Sahrens { 49687361SBrendan.Gregg@Sun.COM if (vd->vdev_remove_wanted) { 496912247SGeorge.Wilson@Sun.COM vd->vdev_remove_wanted = B_FALSE; 497012247SGeorge.Wilson@Sun.COM vd->vdev_delayed_close = B_FALSE; 49717361SBrendan.Gregg@Sun.COM vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 497210575SEric.Schrock@Sun.COM 497310575SEric.Schrock@Sun.COM /* 497410575SEric.Schrock@Sun.COM * We want to clear the stats, but we don't want to do a full 497510575SEric.Schrock@Sun.COM * vdev_clear() as that will cause us to throw away 497610575SEric.Schrock@Sun.COM * degraded/faulted state as well as attempt to reopen the 497710575SEric.Schrock@Sun.COM * device, all of which is a waste. 497810575SEric.Schrock@Sun.COM */ 497910575SEric.Schrock@Sun.COM vd->vdev_stat.vs_read_errors = 0; 498010575SEric.Schrock@Sun.COM vd->vdev_stat.vs_write_errors = 0; 498110575SEric.Schrock@Sun.COM vd->vdev_stat.vs_checksum_errors = 0; 498210575SEric.Schrock@Sun.COM 49837754SJeff.Bonwick@Sun.COM vdev_state_dirty(vd->vdev_top); 49841544Seschrock } 49857361SBrendan.Gregg@Sun.COM 49867754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 49877361SBrendan.Gregg@Sun.COM spa_async_remove(spa, vd->vdev_child[c]); 49881544Seschrock } 49891544Seschrock 49901544Seschrock static void 49917754SJeff.Bonwick@Sun.COM spa_async_probe(spa_t *spa, vdev_t *vd) 49927754SJeff.Bonwick@Sun.COM { 49937754SJeff.Bonwick@Sun.COM if (vd->vdev_probe_wanted) { 499412247SGeorge.Wilson@Sun.COM vd->vdev_probe_wanted = B_FALSE; 49957754SJeff.Bonwick@Sun.COM vdev_reopen(vd); /* vdev_open() does the actual probe */ 49967754SJeff.Bonwick@Sun.COM } 49977754SJeff.Bonwick@Sun.COM 49987754SJeff.Bonwick@Sun.COM for (int c = 0; c < vd->vdev_children; c++) 49997754SJeff.Bonwick@Sun.COM spa_async_probe(spa, vd->vdev_child[c]); 50007754SJeff.Bonwick@Sun.COM } 50017754SJeff.Bonwick@Sun.COM 50027754SJeff.Bonwick@Sun.COM static void 50039816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa_t *spa, vdev_t *vd) 50049816SGeorge.Wilson@Sun.COM { 50059816SGeorge.Wilson@Sun.COM sysevent_id_t eid; 50069816SGeorge.Wilson@Sun.COM nvlist_t *attr; 50079816SGeorge.Wilson@Sun.COM char *physpath; 50089816SGeorge.Wilson@Sun.COM 50099816SGeorge.Wilson@Sun.COM if (!spa->spa_autoexpand) 50109816SGeorge.Wilson@Sun.COM return; 50119816SGeorge.Wilson@Sun.COM 50129816SGeorge.Wilson@Sun.COM for (int c = 0; c < vd->vdev_children; c++) { 50139816SGeorge.Wilson@Sun.COM vdev_t *cvd = vd->vdev_child[c]; 50149816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, cvd); 50159816SGeorge.Wilson@Sun.COM } 50169816SGeorge.Wilson@Sun.COM 50179816SGeorge.Wilson@Sun.COM if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 50189816SGeorge.Wilson@Sun.COM return; 50199816SGeorge.Wilson@Sun.COM 50209816SGeorge.Wilson@Sun.COM physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 50219816SGeorge.Wilson@Sun.COM (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 50229816SGeorge.Wilson@Sun.COM 50239816SGeorge.Wilson@Sun.COM VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 50249816SGeorge.Wilson@Sun.COM VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 50259816SGeorge.Wilson@Sun.COM 50269816SGeorge.Wilson@Sun.COM (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 50279816SGeorge.Wilson@Sun.COM ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 50289816SGeorge.Wilson@Sun.COM 50299816SGeorge.Wilson@Sun.COM nvlist_free(attr); 50309816SGeorge.Wilson@Sun.COM kmem_free(physpath, MAXPATHLEN); 50319816SGeorge.Wilson@Sun.COM } 50329816SGeorge.Wilson@Sun.COM 50339816SGeorge.Wilson@Sun.COM static void 50341544Seschrock spa_async_thread(spa_t *spa) 50351544Seschrock { 50367754SJeff.Bonwick@Sun.COM int tasks; 50371544Seschrock 50381544Seschrock ASSERT(spa->spa_sync_on); 5039789Sahrens 50401544Seschrock mutex_enter(&spa->spa_async_lock); 50411544Seschrock tasks = spa->spa_async_tasks; 50421544Seschrock spa->spa_async_tasks = 0; 50431544Seschrock mutex_exit(&spa->spa_async_lock); 50441544Seschrock 50451544Seschrock /* 50461635Sbonwick * See if the config needs to be updated. 50471635Sbonwick */ 50481635Sbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 504910922SJeff.Bonwick@Sun.COM uint64_t old_space, new_space; 50509816SGeorge.Wilson@Sun.COM 50511635Sbonwick mutex_enter(&spa_namespace_lock); 505210922SJeff.Bonwick@Sun.COM old_space = metaslab_class_get_space(spa_normal_class(spa)); 50531635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 505410922SJeff.Bonwick@Sun.COM new_space = metaslab_class_get_space(spa_normal_class(spa)); 50551635Sbonwick mutex_exit(&spa_namespace_lock); 50569816SGeorge.Wilson@Sun.COM 50579816SGeorge.Wilson@Sun.COM /* 50589816SGeorge.Wilson@Sun.COM * If the pool grew as a result of the config update, 50599816SGeorge.Wilson@Sun.COM * then log an internal history event. 50609816SGeorge.Wilson@Sun.COM */ 506110922SJeff.Bonwick@Sun.COM if (new_space != old_space) { 506212296SLin.Ling@Sun.COM spa_history_log_internal(LOG_POOL_VDEV_ONLINE, 506312296SLin.Ling@Sun.COM spa, NULL, 50649946SMark.Musante@Sun.COM "pool '%s' size: %llu(+%llu)", 506510922SJeff.Bonwick@Sun.COM spa_name(spa), new_space, new_space - old_space); 50669816SGeorge.Wilson@Sun.COM } 50671635Sbonwick } 50681635Sbonwick 50691635Sbonwick /* 50704451Seschrock * See if any devices need to be marked REMOVED. 50711544Seschrock */ 50727754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_REMOVE) { 507310685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 50744451Seschrock spa_async_remove(spa, spa->spa_root_vdev); 50757754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 50767361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 50777754SJeff.Bonwick@Sun.COM for (int i = 0; i < spa->spa_spares.sav_count; i++) 50787361SBrendan.Gregg@Sun.COM spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 50797754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 50807754SJeff.Bonwick@Sun.COM } 50817754SJeff.Bonwick@Sun.COM 50829816SGeorge.Wilson@Sun.COM if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 50839816SGeorge.Wilson@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 50849816SGeorge.Wilson@Sun.COM spa_async_autoexpand(spa, spa->spa_root_vdev); 50859816SGeorge.Wilson@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 50869816SGeorge.Wilson@Sun.COM } 50879816SGeorge.Wilson@Sun.COM 50887754SJeff.Bonwick@Sun.COM /* 50897754SJeff.Bonwick@Sun.COM * See if any devices need to be probed. 50907754SJeff.Bonwick@Sun.COM */ 50917754SJeff.Bonwick@Sun.COM if (tasks & SPA_ASYNC_PROBE) { 509210685SGeorge.Wilson@Sun.COM spa_vdev_state_enter(spa, SCL_NONE); 50937754SJeff.Bonwick@Sun.COM spa_async_probe(spa, spa->spa_root_vdev); 50947754SJeff.Bonwick@Sun.COM (void) spa_vdev_state_exit(spa, NULL, 0); 50954451Seschrock } 50961544Seschrock 50971544Seschrock /* 50981544Seschrock * If any devices are done replacing, detach them. 50991544Seschrock */ 51004451Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 51014451Seschrock spa_vdev_resilver_done(spa); 5102789Sahrens 51031544Seschrock /* 51041544Seschrock * Kick off a resilver. 51051544Seschrock */ 51067046Sahrens if (tasks & SPA_ASYNC_RESILVER) 510712296SLin.Ling@Sun.COM dsl_resilver_restart(spa->spa_dsl_pool, 0); 51081544Seschrock 51091544Seschrock /* 51101544Seschrock * Let the world know that we're done. 51111544Seschrock */ 51121544Seschrock mutex_enter(&spa->spa_async_lock); 51131544Seschrock spa->spa_async_thread = NULL; 51141544Seschrock cv_broadcast(&spa->spa_async_cv); 51151544Seschrock mutex_exit(&spa->spa_async_lock); 51161544Seschrock thread_exit(); 51171544Seschrock } 51181544Seschrock 51191544Seschrock void 51201544Seschrock spa_async_suspend(spa_t *spa) 51211544Seschrock { 51221544Seschrock mutex_enter(&spa->spa_async_lock); 51231544Seschrock spa->spa_async_suspended++; 51241544Seschrock while (spa->spa_async_thread != NULL) 51251544Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 51261544Seschrock mutex_exit(&spa->spa_async_lock); 51271544Seschrock } 51281544Seschrock 51291544Seschrock void 51301544Seschrock spa_async_resume(spa_t *spa) 51311544Seschrock { 51321544Seschrock mutex_enter(&spa->spa_async_lock); 51331544Seschrock ASSERT(spa->spa_async_suspended != 0); 51341544Seschrock spa->spa_async_suspended--; 51351544Seschrock mutex_exit(&spa->spa_async_lock); 51361544Seschrock } 51371544Seschrock 51381544Seschrock static void 51391544Seschrock spa_async_dispatch(spa_t *spa) 51401544Seschrock { 51411544Seschrock mutex_enter(&spa->spa_async_lock); 51421544Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 51431635Sbonwick spa->spa_async_thread == NULL && 51441635Sbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 51451544Seschrock spa->spa_async_thread = thread_create(NULL, 0, 51461544Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 51471544Seschrock mutex_exit(&spa->spa_async_lock); 51481544Seschrock } 51491544Seschrock 51501544Seschrock void 51511544Seschrock spa_async_request(spa_t *spa, int task) 51521544Seschrock { 515312296SLin.Ling@Sun.COM zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); 51541544Seschrock mutex_enter(&spa->spa_async_lock); 51551544Seschrock spa->spa_async_tasks |= task; 51561544Seschrock mutex_exit(&spa->spa_async_lock); 5157789Sahrens } 5158789Sahrens 5159789Sahrens /* 5160789Sahrens * ========================================================================== 5161789Sahrens * SPA syncing routines 5162789Sahrens * ========================================================================== 5163789Sahrens */ 516412470SMatthew.Ahrens@Sun.COM 516512470SMatthew.Ahrens@Sun.COM static int 516612470SMatthew.Ahrens@Sun.COM bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 5167789Sahrens { 516812470SMatthew.Ahrens@Sun.COM bpobj_t *bpo = arg; 516912470SMatthew.Ahrens@Sun.COM bpobj_enqueue(bpo, bp, tx); 517012470SMatthew.Ahrens@Sun.COM return (0); 517110922SJeff.Bonwick@Sun.COM } 517210922SJeff.Bonwick@Sun.COM 517312470SMatthew.Ahrens@Sun.COM static int 517412470SMatthew.Ahrens@Sun.COM spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 517510922SJeff.Bonwick@Sun.COM { 517610922SJeff.Bonwick@Sun.COM zio_t *zio = arg; 517710922SJeff.Bonwick@Sun.COM 517810922SJeff.Bonwick@Sun.COM zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 517910922SJeff.Bonwick@Sun.COM zio->io_flags)); 518012470SMatthew.Ahrens@Sun.COM return (0); 5181789Sahrens } 5182789Sahrens 5183789Sahrens static void 51842082Seschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 51852082Seschrock { 51862082Seschrock char *packed = NULL; 51877497STim.Haley@Sun.COM size_t bufsize; 51882082Seschrock size_t nvsize = 0; 51892082Seschrock dmu_buf_t *db; 51902082Seschrock 51912082Seschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 51922082Seschrock 51937497STim.Haley@Sun.COM /* 51947497STim.Haley@Sun.COM * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 51957497STim.Haley@Sun.COM * information. This avoids the dbuf_will_dirty() path and 51967497STim.Haley@Sun.COM * saves us a pre-read to get data we don't actually care about. 51977497STim.Haley@Sun.COM */ 51987497STim.Haley@Sun.COM bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 51997497STim.Haley@Sun.COM packed = kmem_alloc(bufsize, KM_SLEEP); 52002082Seschrock 52012082Seschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 52022082Seschrock KM_SLEEP) == 0); 52037497STim.Haley@Sun.COM bzero(packed + nvsize, bufsize - nvsize); 52047497STim.Haley@Sun.COM 52057497STim.Haley@Sun.COM dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 52067497STim.Haley@Sun.COM 52077497STim.Haley@Sun.COM kmem_free(packed, bufsize); 52082082Seschrock 52092082Seschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 52102082Seschrock dmu_buf_will_dirty(db, tx); 52112082Seschrock *(uint64_t *)db->db_data = nvsize; 52122082Seschrock dmu_buf_rele(db, FTAG); 52132082Seschrock } 52142082Seschrock 52152082Seschrock static void 52165450Sbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 52175450Sbrendan const char *config, const char *entry) 52182082Seschrock { 52192082Seschrock nvlist_t *nvroot; 52205450Sbrendan nvlist_t **list; 52212082Seschrock int i; 52222082Seschrock 52235450Sbrendan if (!sav->sav_sync) 52242082Seschrock return; 52252082Seschrock 52262082Seschrock /* 52275450Sbrendan * Update the MOS nvlist describing the list of available devices. 52285450Sbrendan * spa_validate_aux() will have already made sure this nvlist is 52294451Seschrock * valid and the vdevs are labeled appropriately. 52302082Seschrock */ 52315450Sbrendan if (sav->sav_object == 0) { 52325450Sbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 52335450Sbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 52345450Sbrendan sizeof (uint64_t), tx); 52352082Seschrock VERIFY(zap_update(spa->spa_meta_objset, 52365450Sbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 52375450Sbrendan &sav->sav_object, tx) == 0); 52382082Seschrock } 52392082Seschrock 52402082Seschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 52415450Sbrendan if (sav->sav_count == 0) { 52425450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 52432082Seschrock } else { 52445450Sbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 52455450Sbrendan for (i = 0; i < sav->sav_count; i++) 52465450Sbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 524712296SLin.Ling@Sun.COM B_FALSE, VDEV_CONFIG_L2CACHE); 52485450Sbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 52495450Sbrendan sav->sav_count) == 0); 52505450Sbrendan for (i = 0; i < sav->sav_count; i++) 52515450Sbrendan nvlist_free(list[i]); 52525450Sbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 52532082Seschrock } 52542082Seschrock 52555450Sbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 52562926Sek110237 nvlist_free(nvroot); 52572082Seschrock 52585450Sbrendan sav->sav_sync = B_FALSE; 52592082Seschrock } 52602082Seschrock 52612082Seschrock static void 5262789Sahrens spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 5263789Sahrens { 5264789Sahrens nvlist_t *config; 5265789Sahrens 52667754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) 5267789Sahrens return; 5268789Sahrens 52697754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 52707754SJeff.Bonwick@Sun.COM 52717754SJeff.Bonwick@Sun.COM config = spa_config_generate(spa, spa->spa_root_vdev, 52727754SJeff.Bonwick@Sun.COM dmu_tx_get_txg(tx), B_FALSE); 52737754SJeff.Bonwick@Sun.COM 52747754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 5275789Sahrens 52761635Sbonwick if (spa->spa_config_syncing) 52771635Sbonwick nvlist_free(spa->spa_config_syncing); 52781635Sbonwick spa->spa_config_syncing = config; 5279789Sahrens 52802082Seschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 5281789Sahrens } 5282789Sahrens 52835094Slling /* 52845094Slling * Set zpool properties. 52855094Slling */ 52863912Slling static void 528712296SLin.Ling@Sun.COM spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) 52883912Slling { 52893912Slling spa_t *spa = arg1; 52905094Slling objset_t *mos = spa->spa_meta_objset; 52913912Slling nvlist_t *nvp = arg2; 52925094Slling nvpair_t *elem; 52934451Seschrock uint64_t intval; 52946643Seschrock char *strval; 52955094Slling zpool_prop_t prop; 52965094Slling const char *propname; 52975094Slling zprop_type_t proptype; 52985094Slling 52997754SJeff.Bonwick@Sun.COM mutex_enter(&spa->spa_props_lock); 53007754SJeff.Bonwick@Sun.COM 53015094Slling elem = NULL; 53025094Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 53035094Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 53045094Slling case ZPOOL_PROP_VERSION: 53055094Slling /* 53065094Slling * Only set version for non-zpool-creation cases 53075094Slling * (set/import). spa_create() needs special care 53085094Slling * for version setting. 53095094Slling */ 53105094Slling if (tx->tx_txg != TXG_INITIAL) { 53115094Slling VERIFY(nvpair_value_uint64(elem, 53125094Slling &intval) == 0); 53135094Slling ASSERT(intval <= SPA_VERSION); 53145094Slling ASSERT(intval >= spa_version(spa)); 53155094Slling spa->spa_uberblock.ub_version = intval; 53165094Slling vdev_config_dirty(spa->spa_root_vdev); 53175094Slling } 53185094Slling break; 53195094Slling 53205094Slling case ZPOOL_PROP_ALTROOT: 53215094Slling /* 53225094Slling * 'altroot' is a non-persistent property. It should 53235094Slling * have been set temporarily at creation or import time. 53245094Slling */ 53255094Slling ASSERT(spa->spa_root != NULL); 53265094Slling break; 53275094Slling 5328*13049SGeorge.Wilson@Sun.COM case ZPOOL_PROP_READONLY: 53295363Seschrock case ZPOOL_PROP_CACHEFILE: 53305094Slling /* 5331*13049SGeorge.Wilson@Sun.COM * 'readonly' and 'cachefile' are also non-persisitent 5332*13049SGeorge.Wilson@Sun.COM * properties. 53335094Slling */ 53344543Smarks break; 53355094Slling default: 53365094Slling /* 53375094Slling * Set pool property values in the poolprops mos object. 53385094Slling */ 53395094Slling if (spa->spa_pool_props_object == 0) { 53405094Slling VERIFY((spa->spa_pool_props_object = 53415094Slling zap_create(mos, DMU_OT_POOL_PROPS, 53425094Slling DMU_OT_NONE, 0, tx)) > 0); 53435094Slling 53445094Slling VERIFY(zap_update(mos, 53455094Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 53465094Slling 8, 1, &spa->spa_pool_props_object, tx) 53475094Slling == 0); 53485094Slling } 53495094Slling 53505094Slling /* normalize the property name */ 53515094Slling propname = zpool_prop_to_name(prop); 53525094Slling proptype = zpool_prop_get_type(prop); 53535094Slling 53545094Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 53555094Slling ASSERT(proptype == PROP_TYPE_STRING); 53565094Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 53575094Slling VERIFY(zap_update(mos, 53585094Slling spa->spa_pool_props_object, propname, 53595094Slling 1, strlen(strval) + 1, strval, tx) == 0); 53605094Slling 53615094Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 53625094Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 53635094Slling 53645094Slling if (proptype == PROP_TYPE_INDEX) { 53655094Slling const char *unused; 53665094Slling VERIFY(zpool_prop_index_to_string( 53675094Slling prop, intval, &unused) == 0); 53685094Slling } 53695094Slling VERIFY(zap_update(mos, 53705094Slling spa->spa_pool_props_object, propname, 53715094Slling 8, 1, &intval, tx) == 0); 53725094Slling } else { 53735094Slling ASSERT(0); /* not allowed */ 53745094Slling } 53755094Slling 53765329Sgw25295 switch (prop) { 53775329Sgw25295 case ZPOOL_PROP_DELEGATION: 53785094Slling spa->spa_delegation = intval; 53795329Sgw25295 break; 53805329Sgw25295 case ZPOOL_PROP_BOOTFS: 53815094Slling spa->spa_bootfs = intval; 53825329Sgw25295 break; 53835329Sgw25295 case ZPOOL_PROP_FAILUREMODE: 53845329Sgw25295 spa->spa_failmode = intval; 53855329Sgw25295 break; 53869816SGeorge.Wilson@Sun.COM case ZPOOL_PROP_AUTOEXPAND: 53879816SGeorge.Wilson@Sun.COM spa->spa_autoexpand = intval; 538812318SEric.Taylor@Sun.COM if (tx->tx_txg != TXG_INITIAL) 538912318SEric.Taylor@Sun.COM spa_async_request(spa, 539012318SEric.Taylor@Sun.COM SPA_ASYNC_AUTOEXPAND); 53919816SGeorge.Wilson@Sun.COM break; 539210922SJeff.Bonwick@Sun.COM case ZPOOL_PROP_DEDUPDITTO: 539310922SJeff.Bonwick@Sun.COM spa->spa_dedup_ditto = intval; 539410922SJeff.Bonwick@Sun.COM break; 53955329Sgw25295 default: 53965329Sgw25295 break; 53975329Sgw25295 } 53983912Slling } 53995094Slling 54005094Slling /* log internal history if this is not a zpool create */ 54015094Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 54025094Slling tx->tx_txg != TXG_INITIAL) { 540312296SLin.Ling@Sun.COM spa_history_log_internal(LOG_POOL_PROPSET, 540412296SLin.Ling@Sun.COM spa, tx, "%s %lld %s", 54057754SJeff.Bonwick@Sun.COM nvpair_name(elem), intval, spa_name(spa)); 54065094Slling } 54073912Slling } 54087754SJeff.Bonwick@Sun.COM 54097754SJeff.Bonwick@Sun.COM mutex_exit(&spa->spa_props_lock); 54103912Slling } 54113912Slling 5412789Sahrens /* 541312470SMatthew.Ahrens@Sun.COM * Perform one-time upgrade on-disk changes. spa_version() does not 541412470SMatthew.Ahrens@Sun.COM * reflect the new version this txg, so there must be no changes this 541512470SMatthew.Ahrens@Sun.COM * txg to anything that the upgrade code depends on after it executes. 541612470SMatthew.Ahrens@Sun.COM * Therefore this must be called after dsl_pool_sync() does the sync 541712470SMatthew.Ahrens@Sun.COM * tasks. 541812470SMatthew.Ahrens@Sun.COM */ 541912470SMatthew.Ahrens@Sun.COM static void 542012470SMatthew.Ahrens@Sun.COM spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) 542112470SMatthew.Ahrens@Sun.COM { 542212470SMatthew.Ahrens@Sun.COM dsl_pool_t *dp = spa->spa_dsl_pool; 542312470SMatthew.Ahrens@Sun.COM 542412470SMatthew.Ahrens@Sun.COM ASSERT(spa->spa_sync_pass == 1); 542512470SMatthew.Ahrens@Sun.COM 542612470SMatthew.Ahrens@Sun.COM if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 542712470SMatthew.Ahrens@Sun.COM spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 542812470SMatthew.Ahrens@Sun.COM dsl_pool_create_origin(dp, tx); 542912470SMatthew.Ahrens@Sun.COM 543012470SMatthew.Ahrens@Sun.COM /* Keeping the origin open increases spa_minref */ 543112470SMatthew.Ahrens@Sun.COM spa->spa_minref += 3; 543212470SMatthew.Ahrens@Sun.COM } 543312470SMatthew.Ahrens@Sun.COM 543412470SMatthew.Ahrens@Sun.COM if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 543512470SMatthew.Ahrens@Sun.COM spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 543612470SMatthew.Ahrens@Sun.COM dsl_pool_upgrade_clones(dp, tx); 543712470SMatthew.Ahrens@Sun.COM } 543812470SMatthew.Ahrens@Sun.COM 543912470SMatthew.Ahrens@Sun.COM if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && 544012470SMatthew.Ahrens@Sun.COM spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { 544112470SMatthew.Ahrens@Sun.COM dsl_pool_upgrade_dir_clones(dp, tx); 544212470SMatthew.Ahrens@Sun.COM 544312470SMatthew.Ahrens@Sun.COM /* Keeping the freedir open increases spa_minref */ 544412470SMatthew.Ahrens@Sun.COM spa->spa_minref += 3; 544512470SMatthew.Ahrens@Sun.COM } 544612470SMatthew.Ahrens@Sun.COM } 544712470SMatthew.Ahrens@Sun.COM 544812470SMatthew.Ahrens@Sun.COM /* 5449789Sahrens * Sync the specified transaction group. New blocks may be dirtied as 5450789Sahrens * part of the process, so we iterate until it converges. 5451789Sahrens */ 5452789Sahrens void 5453789Sahrens spa_sync(spa_t *spa, uint64_t txg) 5454789Sahrens { 5455789Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 5456789Sahrens objset_t *mos = spa->spa_meta_objset; 545712470SMatthew.Ahrens@Sun.COM bpobj_t *defer_bpo = &spa->spa_deferred_bpobj; 545810922SJeff.Bonwick@Sun.COM bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 54591635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 5460789Sahrens vdev_t *vd; 5461789Sahrens dmu_tx_t *tx; 54627754SJeff.Bonwick@Sun.COM int error; 5463789Sahrens 5464*13049SGeorge.Wilson@Sun.COM VERIFY(spa_writeable(spa)); 5465*13049SGeorge.Wilson@Sun.COM 5466789Sahrens /* 5467789Sahrens * Lock out configuration changes. 5468789Sahrens */ 54697754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 5470789Sahrens 5471789Sahrens spa->spa_syncing_txg = txg; 5472789Sahrens spa->spa_sync_pass = 0; 5473789Sahrens 54747754SJeff.Bonwick@Sun.COM /* 54757754SJeff.Bonwick@Sun.COM * If there are any pending vdev state changes, convert them 54767754SJeff.Bonwick@Sun.COM * into config changes that go out with this transaction group. 54777754SJeff.Bonwick@Sun.COM */ 54787754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 54798241SJeff.Bonwick@Sun.COM while (list_head(&spa->spa_state_dirty_list) != NULL) { 54808241SJeff.Bonwick@Sun.COM /* 54818241SJeff.Bonwick@Sun.COM * We need the write lock here because, for aux vdevs, 54828241SJeff.Bonwick@Sun.COM * calling vdev_config_dirty() modifies sav_config. 54838241SJeff.Bonwick@Sun.COM * This is ugly and will become unnecessary when we 54848241SJeff.Bonwick@Sun.COM * eliminate the aux vdev wart by integrating all vdevs 54858241SJeff.Bonwick@Sun.COM * into the root vdev tree. 54868241SJeff.Bonwick@Sun.COM */ 54878241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 54888241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 54898241SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 54908241SJeff.Bonwick@Sun.COM vdev_state_clean(vd); 54918241SJeff.Bonwick@Sun.COM vdev_config_dirty(vd); 54928241SJeff.Bonwick@Sun.COM } 54938241SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 54948241SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 54957754SJeff.Bonwick@Sun.COM } 54967754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 54977754SJeff.Bonwick@Sun.COM 54982082Seschrock tx = dmu_tx_create_assigned(dp, txg); 54992082Seschrock 55002082Seschrock /* 55014577Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 55022082Seschrock * set spa_deflate if we have no raid-z vdevs. 55032082Seschrock */ 55044577Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 55054577Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 55062082Seschrock int i; 55072082Seschrock 55082082Seschrock for (i = 0; i < rvd->vdev_children; i++) { 55092082Seschrock vd = rvd->vdev_child[i]; 55102082Seschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 55112082Seschrock break; 55122082Seschrock } 55132082Seschrock if (i == rvd->vdev_children) { 55142082Seschrock spa->spa_deflate = TRUE; 55152082Seschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 55162082Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 55172082Seschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 55182082Seschrock } 55192082Seschrock } 55202082Seschrock 5521789Sahrens /* 552212296SLin.Ling@Sun.COM * If anything has changed in this txg, or if someone is waiting 552312296SLin.Ling@Sun.COM * for this txg to sync (eg, spa_vdev_remove()), push the 552412296SLin.Ling@Sun.COM * deferred frees from the previous txg. If not, leave them 552512296SLin.Ling@Sun.COM * alone so that we don't generate work on an otherwise idle 552612296SLin.Ling@Sun.COM * system. 5527789Sahrens */ 5528789Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 55292329Sek110237 !txg_list_empty(&dp->dp_dirty_dirs, txg) || 553012296SLin.Ling@Sun.COM !txg_list_empty(&dp->dp_sync_tasks, txg) || 553112470SMatthew.Ahrens@Sun.COM ((dsl_scan_active(dp->dp_scan) || 553212470SMatthew.Ahrens@Sun.COM txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { 553312470SMatthew.Ahrens@Sun.COM zio_t *zio = zio_root(spa, NULL, NULL, 0); 553412470SMatthew.Ahrens@Sun.COM VERIFY3U(bpobj_iterate(defer_bpo, 553512470SMatthew.Ahrens@Sun.COM spa_free_sync_cb, zio, tx), ==, 0); 553612470SMatthew.Ahrens@Sun.COM VERIFY3U(zio_wait(zio), ==, 0); 553712470SMatthew.Ahrens@Sun.COM } 5538789Sahrens 5539789Sahrens /* 5540789Sahrens * Iterate to convergence. 5541789Sahrens */ 5542789Sahrens do { 554310922SJeff.Bonwick@Sun.COM int pass = ++spa->spa_sync_pass; 5544789Sahrens 5545789Sahrens spa_sync_config_object(spa, tx); 55465450Sbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 55475450Sbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 55485450Sbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 55495450Sbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 55501544Seschrock spa_errlog_sync(spa, txg); 5551789Sahrens dsl_pool_sync(dp, txg); 5552789Sahrens 555310922SJeff.Bonwick@Sun.COM if (pass <= SYNC_PASS_DEFERRED_FREE) { 555410922SJeff.Bonwick@Sun.COM zio_t *zio = zio_root(spa, NULL, NULL, 0); 555512470SMatthew.Ahrens@Sun.COM bplist_iterate(free_bpl, spa_free_sync_cb, 555612470SMatthew.Ahrens@Sun.COM zio, tx); 555710922SJeff.Bonwick@Sun.COM VERIFY(zio_wait(zio) == 0); 555810922SJeff.Bonwick@Sun.COM } else { 555912470SMatthew.Ahrens@Sun.COM bplist_iterate(free_bpl, bpobj_enqueue_cb, 556012470SMatthew.Ahrens@Sun.COM defer_bpo, tx); 5561789Sahrens } 5562789Sahrens 556310922SJeff.Bonwick@Sun.COM ddt_sync(spa, txg); 556412296SLin.Ling@Sun.COM dsl_scan_sync(dp, tx); 556511619SGeorge.Wilson@Sun.COM 556610922SJeff.Bonwick@Sun.COM while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 556710922SJeff.Bonwick@Sun.COM vdev_sync(vd, txg); 556810922SJeff.Bonwick@Sun.COM 556912470SMatthew.Ahrens@Sun.COM if (pass == 1) 557012470SMatthew.Ahrens@Sun.COM spa_sync_upgrades(spa, tx); 557112470SMatthew.Ahrens@Sun.COM 557210922SJeff.Bonwick@Sun.COM } while (dmu_objset_is_dirty(mos, txg)); 557310922SJeff.Bonwick@Sun.COM 5574789Sahrens /* 5575789Sahrens * Rewrite the vdev configuration (which includes the uberblock) 5576789Sahrens * to commit the transaction group. 55771635Sbonwick * 55785688Sbonwick * If there are no dirty vdevs, we sync the uberblock to a few 55795688Sbonwick * random top-level vdevs that are known to be visible in the 55807754SJeff.Bonwick@Sun.COM * config cache (see spa_vdev_add() for a complete description). 55817754SJeff.Bonwick@Sun.COM * If there *are* dirty vdevs, sync the uberblock to all vdevs. 5582789Sahrens */ 55837754SJeff.Bonwick@Sun.COM for (;;) { 55847754SJeff.Bonwick@Sun.COM /* 55857754SJeff.Bonwick@Sun.COM * We hold SCL_STATE to prevent vdev open/close/etc. 55867754SJeff.Bonwick@Sun.COM * while we're attempting to write the vdev labels. 55877754SJeff.Bonwick@Sun.COM */ 55887754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 55897754SJeff.Bonwick@Sun.COM 55907754SJeff.Bonwick@Sun.COM if (list_is_empty(&spa->spa_config_dirty_list)) { 55917754SJeff.Bonwick@Sun.COM vdev_t *svd[SPA_DVAS_PER_BP]; 55927754SJeff.Bonwick@Sun.COM int svdcount = 0; 55937754SJeff.Bonwick@Sun.COM int children = rvd->vdev_children; 55947754SJeff.Bonwick@Sun.COM int c0 = spa_get_random(children); 55959816SGeorge.Wilson@Sun.COM 55969816SGeorge.Wilson@Sun.COM for (int c = 0; c < children; c++) { 55977754SJeff.Bonwick@Sun.COM vd = rvd->vdev_child[(c0 + c) % children]; 55987754SJeff.Bonwick@Sun.COM if (vd->vdev_ms_array == 0 || vd->vdev_islog) 55997754SJeff.Bonwick@Sun.COM continue; 56007754SJeff.Bonwick@Sun.COM svd[svdcount++] = vd; 56017754SJeff.Bonwick@Sun.COM if (svdcount == SPA_DVAS_PER_BP) 56027754SJeff.Bonwick@Sun.COM break; 56037754SJeff.Bonwick@Sun.COM } 56049725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 56059725SEric.Schrock@Sun.COM if (error != 0) 56069725SEric.Schrock@Sun.COM error = vdev_config_sync(svd, svdcount, txg, 56079725SEric.Schrock@Sun.COM B_TRUE); 56087754SJeff.Bonwick@Sun.COM } else { 56097754SJeff.Bonwick@Sun.COM error = vdev_config_sync(rvd->vdev_child, 56109725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_FALSE); 56119725SEric.Schrock@Sun.COM if (error != 0) 56129725SEric.Schrock@Sun.COM error = vdev_config_sync(rvd->vdev_child, 56139725SEric.Schrock@Sun.COM rvd->vdev_children, txg, B_TRUE); 56141635Sbonwick } 56157754SJeff.Bonwick@Sun.COM 56167754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_STATE, FTAG); 56177754SJeff.Bonwick@Sun.COM 56187754SJeff.Bonwick@Sun.COM if (error == 0) 56197754SJeff.Bonwick@Sun.COM break; 56207754SJeff.Bonwick@Sun.COM zio_suspend(spa, NULL); 56217754SJeff.Bonwick@Sun.COM zio_resume_wait(spa); 56221635Sbonwick } 56232082Seschrock dmu_tx_commit(tx); 56242082Seschrock 56251635Sbonwick /* 56261635Sbonwick * Clear the dirty config list. 56271635Sbonwick */ 56287754SJeff.Bonwick@Sun.COM while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 56291635Sbonwick vdev_config_clean(vd); 56301635Sbonwick 56311635Sbonwick /* 56321635Sbonwick * Now that the new config has synced transactionally, 56331635Sbonwick * let it become visible to the config cache. 56341635Sbonwick */ 56351635Sbonwick if (spa->spa_config_syncing != NULL) { 56361635Sbonwick spa_config_set(spa, spa->spa_config_syncing); 56371635Sbonwick spa->spa_config_txg = txg; 56381635Sbonwick spa->spa_config_syncing = NULL; 56391635Sbonwick } 5640789Sahrens 5641789Sahrens spa->spa_ubsync = spa->spa_uberblock; 5642789Sahrens 564310922SJeff.Bonwick@Sun.COM dsl_pool_sync_done(dp, txg); 5644789Sahrens 5645789Sahrens /* 5646789Sahrens * Update usable space statistics. 5647789Sahrens */ 5648789Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 5649789Sahrens vdev_sync_done(vd, txg); 5650789Sahrens 565110956SGeorge.Wilson@Sun.COM spa_update_dspace(spa); 565210956SGeorge.Wilson@Sun.COM 5653789Sahrens /* 5654789Sahrens * It had better be the case that we didn't dirty anything 56552082Seschrock * since vdev_config_sync(). 5656789Sahrens */ 5657789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 5658789Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 5659789Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 566010922SJeff.Bonwick@Sun.COM 566110922SJeff.Bonwick@Sun.COM spa->spa_sync_pass = 0; 5662789Sahrens 56637754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_CONFIG, FTAG); 56641544Seschrock 566510921STim.Haley@Sun.COM spa_handle_ignored_writes(spa); 566610921STim.Haley@Sun.COM 56671544Seschrock /* 56681544Seschrock * If any async tasks have been requested, kick them off. 56691544Seschrock */ 56701544Seschrock spa_async_dispatch(spa); 5671789Sahrens } 5672789Sahrens 5673789Sahrens /* 5674789Sahrens * Sync all pools. We don't want to hold the namespace lock across these 5675789Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 5676789Sahrens * sync. 5677789Sahrens */ 5678789Sahrens void 5679789Sahrens spa_sync_allpools(void) 5680789Sahrens { 5681789Sahrens spa_t *spa = NULL; 5682789Sahrens mutex_enter(&spa_namespace_lock); 5683789Sahrens while ((spa = spa_next(spa)) != NULL) { 5684*13049SGeorge.Wilson@Sun.COM if (spa_state(spa) != POOL_STATE_ACTIVE || 5685*13049SGeorge.Wilson@Sun.COM !spa_writeable(spa) || spa_suspended(spa)) 5686789Sahrens continue; 5687789Sahrens spa_open_ref(spa, FTAG); 5688789Sahrens mutex_exit(&spa_namespace_lock); 5689789Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 5690789Sahrens mutex_enter(&spa_namespace_lock); 5691789Sahrens spa_close(spa, FTAG); 5692789Sahrens } 5693789Sahrens mutex_exit(&spa_namespace_lock); 5694789Sahrens } 5695789Sahrens 5696789Sahrens /* 5697789Sahrens * ========================================================================== 5698789Sahrens * Miscellaneous routines 5699789Sahrens * ========================================================================== 5700789Sahrens */ 5701789Sahrens 5702789Sahrens /* 5703789Sahrens * Remove all pools in the system. 5704789Sahrens */ 5705789Sahrens void 5706789Sahrens spa_evict_all(void) 5707789Sahrens { 5708789Sahrens spa_t *spa; 5709789Sahrens 5710789Sahrens /* 5711789Sahrens * Remove all cached state. All pools should be closed now, 5712789Sahrens * so every spa in the AVL tree should be unreferenced. 5713789Sahrens */ 5714789Sahrens mutex_enter(&spa_namespace_lock); 5715789Sahrens while ((spa = spa_next(NULL)) != NULL) { 5716789Sahrens /* 57171544Seschrock * Stop async tasks. The async thread may need to detach 57181544Seschrock * a device that's been replaced, which requires grabbing 57191544Seschrock * spa_namespace_lock, so we must drop it here. 5720789Sahrens */ 5721789Sahrens spa_open_ref(spa, FTAG); 5722789Sahrens mutex_exit(&spa_namespace_lock); 57231544Seschrock spa_async_suspend(spa); 57244808Sek110237 mutex_enter(&spa_namespace_lock); 5725789Sahrens spa_close(spa, FTAG); 5726789Sahrens 5727789Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 5728789Sahrens spa_unload(spa); 5729789Sahrens spa_deactivate(spa); 5730789Sahrens } 5731789Sahrens spa_remove(spa); 5732789Sahrens } 5733789Sahrens mutex_exit(&spa_namespace_lock); 5734789Sahrens } 57351544Seschrock 57361544Seschrock vdev_t * 57379425SEric.Schrock@Sun.COM spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 57381544Seschrock { 57396643Seschrock vdev_t *vd; 57406643Seschrock int i; 57416643Seschrock 57426643Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 57436643Seschrock return (vd); 57446643Seschrock 57459425SEric.Schrock@Sun.COM if (aux) { 57466643Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 57476643Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 57486643Seschrock if (vd->vdev_guid == guid) 57496643Seschrock return (vd); 57506643Seschrock } 57519425SEric.Schrock@Sun.COM 57529425SEric.Schrock@Sun.COM for (i = 0; i < spa->spa_spares.sav_count; i++) { 57539425SEric.Schrock@Sun.COM vd = spa->spa_spares.sav_vdevs[i]; 57549425SEric.Schrock@Sun.COM if (vd->vdev_guid == guid) 57559425SEric.Schrock@Sun.COM return (vd); 57569425SEric.Schrock@Sun.COM } 57576643Seschrock } 57586643Seschrock 57596643Seschrock return (NULL); 57601544Seschrock } 57611760Seschrock 57621760Seschrock void 57635094Slling spa_upgrade(spa_t *spa, uint64_t version) 57641760Seschrock { 5765*13049SGeorge.Wilson@Sun.COM ASSERT(spa_writeable(spa)); 5766*13049SGeorge.Wilson@Sun.COM 57677754SJeff.Bonwick@Sun.COM spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 57681760Seschrock 57691760Seschrock /* 57701760Seschrock * This should only be called for a non-faulted pool, and since a 57711760Seschrock * future version would result in an unopenable pool, this shouldn't be 57721760Seschrock * possible. 57731760Seschrock */ 57744577Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 57755094Slling ASSERT(version >= spa->spa_uberblock.ub_version); 57765094Slling 57775094Slling spa->spa_uberblock.ub_version = version; 57781760Seschrock vdev_config_dirty(spa->spa_root_vdev); 57791760Seschrock 57807754SJeff.Bonwick@Sun.COM spa_config_exit(spa, SCL_ALL, FTAG); 57812082Seschrock 57822082Seschrock txg_wait_synced(spa_get_dsl(spa), 0); 57831760Seschrock } 57842082Seschrock 57852082Seschrock boolean_t 57862082Seschrock spa_has_spare(spa_t *spa, uint64_t guid) 57872082Seschrock { 57882082Seschrock int i; 57893377Seschrock uint64_t spareguid; 57905450Sbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 57915450Sbrendan 57925450Sbrendan for (i = 0; i < sav->sav_count; i++) 57935450Sbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 57942082Seschrock return (B_TRUE); 57952082Seschrock 57965450Sbrendan for (i = 0; i < sav->sav_npending; i++) { 57975450Sbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 57985450Sbrendan &spareguid) == 0 && spareguid == guid) 57993377Seschrock return (B_TRUE); 58003377Seschrock } 58013377Seschrock 58022082Seschrock return (B_FALSE); 58032082Seschrock } 58043912Slling 58054451Seschrock /* 58067214Slling * Check if a pool has an active shared spare device. 58077214Slling * Note: reference count of an active spare is 2, as a spare and as a replace 58087214Slling */ 58097214Slling static boolean_t 58107214Slling spa_has_active_shared_spare(spa_t *spa) 58117214Slling { 58127214Slling int i, refcnt; 58137214Slling uint64_t pool; 58147214Slling spa_aux_vdev_t *sav = &spa->spa_spares; 58157214Slling 58167214Slling for (i = 0; i < sav->sav_count; i++) { 58177214Slling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 58187214Slling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 58197214Slling refcnt > 2) 58207214Slling return (B_TRUE); 58217214Slling } 58227214Slling 58237214Slling return (B_FALSE); 58247214Slling } 58257214Slling 58267214Slling /* 58274451Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 58284451Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 58294451Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 58304451Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 58314451Seschrock * or zdb as real changes. 58324451Seschrock */ 58334451Seschrock void 58344451Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 58354451Seschrock { 58364451Seschrock #ifdef _KERNEL 58374451Seschrock sysevent_t *ev; 58384451Seschrock sysevent_attr_list_t *attr = NULL; 58394451Seschrock sysevent_value_t value; 58404451Seschrock sysevent_id_t eid; 58414451Seschrock 58424451Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 58434451Seschrock SE_SLEEP); 58444451Seschrock 58454451Seschrock value.value_type = SE_DATA_TYPE_STRING; 58464451Seschrock value.value.sv_string = spa_name(spa); 58474451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 58484451Seschrock goto done; 58494451Seschrock 58504451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 58514451Seschrock value.value.sv_uint64 = spa_guid(spa); 58524451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 58534451Seschrock goto done; 58544451Seschrock 58554451Seschrock if (vd) { 58564451Seschrock value.value_type = SE_DATA_TYPE_UINT64; 58574451Seschrock value.value.sv_uint64 = vd->vdev_guid; 58584451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 58594451Seschrock SE_SLEEP) != 0) 58604451Seschrock goto done; 58614451Seschrock 58624451Seschrock if (vd->vdev_path) { 58634451Seschrock value.value_type = SE_DATA_TYPE_STRING; 58644451Seschrock value.value.sv_string = vd->vdev_path; 58654451Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 58664451Seschrock &value, SE_SLEEP) != 0) 58674451Seschrock goto done; 58684451Seschrock } 58694451Seschrock } 58704451Seschrock 58715756Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 58725756Seschrock goto done; 58735756Seschrock attr = NULL; 58745756Seschrock 58754451Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 58764451Seschrock 58774451Seschrock done: 58784451Seschrock if (attr) 58794451Seschrock sysevent_free_attr(attr); 58804451Seschrock sysevent_free(ev); 58814451Seschrock #endif 58824451Seschrock } 5883