1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 212082Seschrock 22789Sahrens /* 233912Slling * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28789Sahrens 29789Sahrens #include <sys/spa.h> 30789Sahrens #include <sys/spa_impl.h> 31789Sahrens #include <sys/nvpair.h> 32789Sahrens #include <sys/uio.h> 33789Sahrens #include <sys/fs/zfs.h> 34789Sahrens #include <sys/vdev_impl.h> 35789Sahrens #include <sys/zfs_ioctl.h> 363975Sek110237 #include <sys/utsname.h> 373975Sek110237 #include <sys/systeminfo.h> 383975Sek110237 #include <sys/sunddi.h> 391544Seschrock #ifdef _KERNEL 401544Seschrock #include <sys/kobj.h> 411544Seschrock #endif 421544Seschrock 43789Sahrens /* 44789Sahrens * Pool configuration repository. 45789Sahrens * 46*5363Seschrock * Pool configuration is stored as a packed nvlist on the filesystem. By 47*5363Seschrock * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot 48*5363Seschrock * (when the ZFS module is loaded). Pools can also have the 'cachefile' 49*5363Seschrock * property set that allows them to be stored in an alternate location until 50*5363Seschrock * the control of external software. 51789Sahrens * 52*5363Seschrock * For each cache file, we have a single nvlist which holds all the 53*5363Seschrock * configuration information. When the module loads, we read this information 54*5363Seschrock * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is 55*5363Seschrock * maintained independently in spa.c. Whenever the namespace is modified, or 56*5363Seschrock * the configuration of a pool is changed, we call spa_config_sync(), which 57*5363Seschrock * walks through all the active pools and writes the configuration to disk. 58789Sahrens */ 59789Sahrens 60789Sahrens static uint64_t spa_config_generation = 1; 61789Sahrens 62789Sahrens /* 63789Sahrens * This can be overridden in userland to preserve an alternate namespace for 64789Sahrens * userland pools when doing testing. 65789Sahrens */ 66789Sahrens const char *spa_config_dir = ZPOOL_CACHE_DIR; 67789Sahrens 68789Sahrens /* 69789Sahrens * Called when the module is first loaded, this routine loads the configuration 70789Sahrens * file into the SPA namespace. It does not actually open or load the pools; it 71789Sahrens * only populates the namespace. 72789Sahrens */ 73789Sahrens void 74789Sahrens spa_config_load(void) 75789Sahrens { 76789Sahrens void *buf = NULL; 77789Sahrens nvlist_t *nvlist, *child; 78789Sahrens nvpair_t *nvpair; 79789Sahrens spa_t *spa; 80789Sahrens char pathname[128]; 811544Seschrock struct _buf *file; 823912Slling uint64_t fsize; 83789Sahrens 84789Sahrens /* 85789Sahrens * Open the configuration file. 86789Sahrens */ 871544Seschrock (void) snprintf(pathname, sizeof (pathname), "%s%s/%s", 881635Sbonwick (rootdir != NULL) ? "./" : "", spa_config_dir, ZPOOL_CACHE_FILE); 891544Seschrock 901544Seschrock file = kobj_open_file(pathname); 911544Seschrock if (file == (struct _buf *)-1) 92789Sahrens return; 93789Sahrens 943912Slling if (kobj_get_filesize(file, &fsize) != 0) 951544Seschrock goto out; 961544Seschrock 973912Slling buf = kmem_alloc(fsize, KM_SLEEP); 981544Seschrock 99789Sahrens /* 100789Sahrens * Read the nvlist from the file. 101789Sahrens */ 1023912Slling if (kobj_read_file(file, buf, fsize, 0) < 0) 103789Sahrens goto out; 104789Sahrens 105789Sahrens /* 106789Sahrens * Unpack the nvlist. 107789Sahrens */ 1083912Slling if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) 109789Sahrens goto out; 110789Sahrens 111789Sahrens /* 112789Sahrens * Iterate over all elements in the nvlist, creating a new spa_t for 113789Sahrens * each one with the specified configuration. 114789Sahrens */ 115789Sahrens mutex_enter(&spa_namespace_lock); 116789Sahrens nvpair = NULL; 117789Sahrens while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { 118789Sahrens 119789Sahrens if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) 120789Sahrens continue; 121789Sahrens 122789Sahrens VERIFY(nvpair_value_nvlist(nvpair, &child) == 0); 123789Sahrens 124789Sahrens if (spa_lookup(nvpair_name(nvpair)) != NULL) 125789Sahrens continue; 1261635Sbonwick spa = spa_add(nvpair_name(nvpair), NULL); 127789Sahrens 128789Sahrens /* 129789Sahrens * We blindly duplicate the configuration here. If it's 130789Sahrens * invalid, we will catch it when the pool is first opened. 131789Sahrens */ 132789Sahrens VERIFY(nvlist_dup(child, &spa->spa_config, 0) == 0); 133789Sahrens } 134789Sahrens mutex_exit(&spa_namespace_lock); 135789Sahrens 136789Sahrens nvlist_free(nvlist); 137789Sahrens 138789Sahrens out: 139789Sahrens if (buf != NULL) 1403912Slling kmem_free(buf, fsize); 141789Sahrens 1421544Seschrock kobj_close_file(file); 143789Sahrens } 144789Sahrens 145789Sahrens /* 146*5363Seschrock * This function is called when destroying or exporting a pool. It walks the 147*5363Seschrock * list of active pools, and searches for any that match the given cache file. 148*5363Seschrock * If there is only one cachefile, then the file is removed immediately, 149*5363Seschrock * because we won't see the pool when iterating in spa_config_sync(). 150789Sahrens */ 151789Sahrens void 152*5363Seschrock spa_config_check(const char *dir, const char *file) 153*5363Seschrock { 154*5363Seschrock size_t count = 0; 155*5363Seschrock char pathname[128]; 156*5363Seschrock spa_t *spa; 157*5363Seschrock 158*5363Seschrock if (dir != NULL && strcmp(dir, "none") == 0) 159*5363Seschrock return; 160*5363Seschrock 161*5363Seschrock ASSERT(MUTEX_HELD(&spa_namespace_lock)); 162*5363Seschrock spa = NULL; 163*5363Seschrock while ((spa = spa_next(spa)) != NULL) { 164*5363Seschrock if (dir == NULL) { 165*5363Seschrock if (spa->spa_config_dir == NULL) 166*5363Seschrock count++; 167*5363Seschrock } else { 168*5363Seschrock if (spa->spa_config_dir && 169*5363Seschrock strcmp(spa->spa_config_dir, dir) == 0 && 170*5363Seschrock strcmp(spa->spa_config_file, file) == 0) 171*5363Seschrock count++; 172*5363Seschrock } 173*5363Seschrock } 174*5363Seschrock 175*5363Seschrock if (count == 1) { 176*5363Seschrock if (dir == NULL) { 177*5363Seschrock dir = spa_config_dir; 178*5363Seschrock file = ZPOOL_CACHE_FILE; 179*5363Seschrock } 180*5363Seschrock 181*5363Seschrock (void) snprintf(pathname, sizeof (pathname), 182*5363Seschrock "%s/%s", dir, file); 183*5363Seschrock (void) vn_remove(pathname, UIO_SYSSPACE, RMFILE); 184*5363Seschrock } 185*5363Seschrock } 186*5363Seschrock 187*5363Seschrock typedef struct spa_config_entry { 188*5363Seschrock list_t sc_link; 189*5363Seschrock const char *sc_dir; 190*5363Seschrock const char *sc_file; 191*5363Seschrock nvlist_t *sc_nvl; 192*5363Seschrock } spa_config_entry_t; 193*5363Seschrock 194*5363Seschrock static void 195*5363Seschrock spa_config_entry_add(list_t *listp, spa_t *spa) 196789Sahrens { 197*5363Seschrock spa_config_entry_t *entry; 198*5363Seschrock const char *dir, *file; 199*5363Seschrock 200*5363Seschrock mutex_enter(&spa->spa_config_cache_lock); 201*5363Seschrock if (!spa->spa_config || !spa->spa_name) { 202*5363Seschrock mutex_exit(&spa->spa_config_cache_lock); 203*5363Seschrock return; 204*5363Seschrock } 205*5363Seschrock 206*5363Seschrock if (spa->spa_config_dir) { 207*5363Seschrock dir = spa->spa_config_dir; 208*5363Seschrock file = spa->spa_config_file; 209*5363Seschrock } else { 210*5363Seschrock dir = spa_config_dir; 211*5363Seschrock file = ZPOOL_CACHE_FILE; 212*5363Seschrock } 213*5363Seschrock 214*5363Seschrock if (strcmp(dir, "none") == 0) { 215*5363Seschrock mutex_exit(&spa->spa_config_cache_lock); 216*5363Seschrock return; 217*5363Seschrock } 218*5363Seschrock 219*5363Seschrock for (entry = list_head(listp); entry != NULL; 220*5363Seschrock entry = list_next(listp, entry)) { 221*5363Seschrock if (strcmp(entry->sc_dir, dir) == 0 && 222*5363Seschrock strcmp(entry->sc_file, file) == 0) 223*5363Seschrock break; 224*5363Seschrock } 225*5363Seschrock 226*5363Seschrock if (entry == NULL) { 227*5363Seschrock entry = kmem_alloc(sizeof (spa_config_entry_t), KM_SLEEP); 228*5363Seschrock entry->sc_dir = dir; 229*5363Seschrock entry->sc_file = file; 230*5363Seschrock VERIFY(nvlist_alloc(&entry->sc_nvl, NV_UNIQUE_NAME, 231*5363Seschrock KM_SLEEP) == 0); 232*5363Seschrock list_insert_tail(listp, entry); 233*5363Seschrock } 234*5363Seschrock 235*5363Seschrock VERIFY(nvlist_add_nvlist(entry->sc_nvl, spa->spa_name, 236*5363Seschrock spa->spa_config) == 0); 237*5363Seschrock mutex_exit(&spa->spa_config_cache_lock); 238*5363Seschrock } 239*5363Seschrock 240*5363Seschrock static void 241*5363Seschrock spa_config_entry_write(spa_config_entry_t *entry) 242*5363Seschrock { 243*5363Seschrock nvlist_t *config = entry->sc_nvl; 244789Sahrens size_t buflen; 245789Sahrens char *buf; 246789Sahrens vnode_t *vp; 247789Sahrens int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; 248789Sahrens char pathname[128]; 249789Sahrens char pathname2[128]; 250789Sahrens 251789Sahrens /* 252789Sahrens * Pack the configuration into a buffer. 253789Sahrens */ 254789Sahrens VERIFY(nvlist_size(config, &buflen, NV_ENCODE_XDR) == 0); 255789Sahrens 256789Sahrens buf = kmem_alloc(buflen, KM_SLEEP); 257789Sahrens 2581544Seschrock VERIFY(nvlist_pack(config, &buf, &buflen, NV_ENCODE_XDR, 2591544Seschrock KM_SLEEP) == 0); 260789Sahrens 261789Sahrens /* 262789Sahrens * Write the configuration to disk. We need to do the traditional 263789Sahrens * 'write to temporary file, sync, move over original' to make sure we 264789Sahrens * always have a consistent view of the data. 265789Sahrens */ 266*5363Seschrock (void) snprintf(pathname, sizeof (pathname), "%s/.%s", entry->sc_dir, 267*5363Seschrock entry->sc_file); 268789Sahrens 269789Sahrens if (vn_open(pathname, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) != 0) 270789Sahrens goto out; 271789Sahrens 272789Sahrens if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, 273789Sahrens 0, RLIM64_INFINITY, kcred, NULL) == 0 && 2745331Samw VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) { 275789Sahrens (void) snprintf(pathname2, sizeof (pathname2), "%s/%s", 276*5363Seschrock entry->sc_dir, entry->sc_file); 277789Sahrens (void) vn_rename(pathname, pathname2, UIO_SYSSPACE); 278789Sahrens } 279789Sahrens 2805331Samw (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); 281789Sahrens VN_RELE(vp); 282789Sahrens 283789Sahrens out: 284789Sahrens (void) vn_remove(pathname, UIO_SYSSPACE, RMFILE); 285*5363Seschrock kmem_free(buf, buflen); 286*5363Seschrock } 287*5363Seschrock 288*5363Seschrock /* 289*5363Seschrock * Synchronize all pools to disk. This must be called with the namespace lock 290*5363Seschrock * held. 291*5363Seschrock */ 292*5363Seschrock void 293*5363Seschrock spa_config_sync(void) 294*5363Seschrock { 295*5363Seschrock spa_t *spa = NULL; 296*5363Seschrock list_t files = { 0 }; 297*5363Seschrock spa_config_entry_t *entry; 298*5363Seschrock 299*5363Seschrock ASSERT(MUTEX_HELD(&spa_namespace_lock)); 300789Sahrens 301*5363Seschrock list_create(&files, sizeof (spa_config_entry_t), 302*5363Seschrock offsetof(spa_config_entry_t, sc_link)); 303*5363Seschrock 304*5363Seschrock /* 305*5363Seschrock * Add all known pools to the configuration list, ignoring those with 306*5363Seschrock * alternate root paths. 307*5363Seschrock */ 308*5363Seschrock spa = NULL; 309*5363Seschrock while ((spa = spa_next(spa)) != NULL) 310*5363Seschrock spa_config_entry_add(&files, spa); 311*5363Seschrock 312*5363Seschrock while ((entry = list_head(&files)) != NULL) { 313*5363Seschrock spa_config_entry_write(entry); 314*5363Seschrock list_remove(&files, entry); 315*5363Seschrock nvlist_free(entry->sc_nvl); 316*5363Seschrock kmem_free(entry, sizeof (spa_config_entry_t)); 317*5363Seschrock } 318*5363Seschrock 319*5363Seschrock spa_config_generation++; 320789Sahrens } 321789Sahrens 322789Sahrens /* 3231635Sbonwick * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, 324789Sahrens * and we don't want to allow the local zone to see all the pools anyway. 325789Sahrens * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration 326789Sahrens * information for all pool visible within the zone. 327789Sahrens */ 328789Sahrens nvlist_t * 329789Sahrens spa_all_configs(uint64_t *generation) 330789Sahrens { 331789Sahrens nvlist_t *pools; 332789Sahrens spa_t *spa; 333789Sahrens 334789Sahrens if (*generation == spa_config_generation) 335789Sahrens return (NULL); 336789Sahrens 3371544Seschrock VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); 338789Sahrens 339789Sahrens spa = NULL; 340789Sahrens mutex_enter(&spa_namespace_lock); 341789Sahrens while ((spa = spa_next(spa)) != NULL) { 342789Sahrens if (INGLOBALZONE(curproc) || 343789Sahrens zone_dataset_visible(spa_name(spa), NULL)) { 344789Sahrens mutex_enter(&spa->spa_config_cache_lock); 345789Sahrens VERIFY(nvlist_add_nvlist(pools, spa_name(spa), 346789Sahrens spa->spa_config) == 0); 347789Sahrens mutex_exit(&spa->spa_config_cache_lock); 348789Sahrens } 349789Sahrens } 350789Sahrens mutex_exit(&spa_namespace_lock); 351789Sahrens 352789Sahrens *generation = spa_config_generation; 353789Sahrens 354789Sahrens return (pools); 355789Sahrens } 356789Sahrens 357789Sahrens void 358789Sahrens spa_config_set(spa_t *spa, nvlist_t *config) 359789Sahrens { 360789Sahrens mutex_enter(&spa->spa_config_cache_lock); 361789Sahrens if (spa->spa_config != NULL) 362789Sahrens nvlist_free(spa->spa_config); 363789Sahrens spa->spa_config = config; 364789Sahrens mutex_exit(&spa->spa_config_cache_lock); 365789Sahrens } 366789Sahrens 367789Sahrens /* 368789Sahrens * Generate the pool's configuration based on the current in-core state. 369789Sahrens * We infer whether to generate a complete config or just one top-level config 370789Sahrens * based on whether vd is the root vdev. 371789Sahrens */ 372789Sahrens nvlist_t * 373789Sahrens spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) 374789Sahrens { 375789Sahrens nvlist_t *config, *nvroot; 376789Sahrens vdev_t *rvd = spa->spa_root_vdev; 3773975Sek110237 unsigned long hostid = 0; 378789Sahrens 3794787Sahrens ASSERT(spa_config_held(spa, RW_READER) || 3804787Sahrens spa_config_held(spa, RW_WRITER)); 3811635Sbonwick 382789Sahrens if (vd == NULL) 383789Sahrens vd = rvd; 384789Sahrens 385789Sahrens /* 386789Sahrens * If txg is -1, report the current value of spa->spa_config_txg. 387789Sahrens */ 388789Sahrens if (txg == -1ULL) 389789Sahrens txg = spa->spa_config_txg; 390789Sahrens 3911544Seschrock VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); 392789Sahrens 393789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 3942082Seschrock spa_version(spa)) == 0); 395789Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 396789Sahrens spa_name(spa)) == 0); 397789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 398789Sahrens spa_state(spa)) == 0); 399789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, 400789Sahrens txg) == 0); 401789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, 402789Sahrens spa_guid(spa)) == 0); 4033975Sek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); 4044178Slling if (hostid != 0) { 4054178Slling VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, 4064527Sperrin hostid) == 0); 4074178Slling } 4083975Sek110237 VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, 4093975Sek110237 utsname.nodename) == 0); 410789Sahrens 411789Sahrens if (vd != rvd) { 412789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, 413789Sahrens vd->vdev_top->vdev_guid) == 0); 414789Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, 415789Sahrens vd->vdev_guid) == 0); 4162082Seschrock if (vd->vdev_isspare) 4172082Seschrock VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 4182082Seschrock 1ULL) == 0); 4194527Sperrin if (vd->vdev_islog) 4204527Sperrin VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, 4214527Sperrin 1ULL) == 0); 422789Sahrens vd = vd->vdev_top; /* label contains top config */ 423789Sahrens } 424789Sahrens 4252082Seschrock nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE); 426789Sahrens VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 427789Sahrens nvlist_free(nvroot); 428789Sahrens 429789Sahrens return (config); 430789Sahrens } 4311635Sbonwick 4321635Sbonwick /* 4331635Sbonwick * Update all disk labels, generate a fresh config based on the current 4341635Sbonwick * in-core state, and sync the global config cache. 4351635Sbonwick */ 4361635Sbonwick void 4371635Sbonwick spa_config_update(spa_t *spa, int what) 4381635Sbonwick { 4391635Sbonwick vdev_t *rvd = spa->spa_root_vdev; 4401635Sbonwick uint64_t txg; 4411635Sbonwick int c; 4421635Sbonwick 4431635Sbonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 4441635Sbonwick 4451635Sbonwick spa_config_enter(spa, RW_WRITER, FTAG); 4461635Sbonwick txg = spa_last_synced_txg(spa) + 1; 4471635Sbonwick if (what == SPA_CONFIG_UPDATE_POOL) { 4481635Sbonwick vdev_config_dirty(rvd); 4491635Sbonwick } else { 4501635Sbonwick /* 4511635Sbonwick * If we have top-level vdevs that were added but have 4521635Sbonwick * not yet been prepared for allocation, do that now. 4531635Sbonwick * (It's safe now because the config cache is up to date, 4541635Sbonwick * so it will be able to translate the new DVAs.) 4551635Sbonwick * See comments in spa_vdev_add() for full details. 4561635Sbonwick */ 4571635Sbonwick for (c = 0; c < rvd->vdev_children; c++) { 4581635Sbonwick vdev_t *tvd = rvd->vdev_child[c]; 4591635Sbonwick if (tvd->vdev_ms_array == 0) { 4601635Sbonwick vdev_init(tvd, txg); 4611635Sbonwick vdev_config_dirty(tvd); 4621635Sbonwick } 4631635Sbonwick } 4641635Sbonwick } 4651635Sbonwick spa_config_exit(spa, FTAG); 4661635Sbonwick 4671635Sbonwick /* 4681635Sbonwick * Wait for the mosconfig to be regenerated and synced. 4691635Sbonwick */ 4701635Sbonwick txg_wait_synced(spa->spa_dsl_pool, txg); 4711635Sbonwick 4721635Sbonwick /* 4731635Sbonwick * Update the global config cache to reflect the new mosconfig. 4741635Sbonwick */ 4751635Sbonwick spa_config_sync(); 4761635Sbonwick 4771635Sbonwick if (what == SPA_CONFIG_UPDATE_POOL) 4781635Sbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); 4791635Sbonwick } 480