1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221544Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/dmu_objset.h> 30789Sahrens #include <sys/dsl_dir.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/dsl_pool.h> 342199Sahrens #include <sys/dsl_synctask.h> 35789Sahrens #include <sys/dnode.h> 36789Sahrens #include <sys/dbuf.h> 37*2885Sahrens #include <sys/zvol.h> 38789Sahrens #include <sys/dmu_tx.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zap.h> 41789Sahrens #include <sys/zil.h> 42789Sahrens #include <sys/dmu_impl.h> 43789Sahrens 44789Sahrens 45789Sahrens spa_t * 46789Sahrens dmu_objset_spa(objset_t *os) 47789Sahrens { 48789Sahrens return (os->os->os_spa); 49789Sahrens } 50789Sahrens 51789Sahrens zilog_t * 52789Sahrens dmu_objset_zil(objset_t *os) 53789Sahrens { 54789Sahrens return (os->os->os_zil); 55789Sahrens } 56789Sahrens 57789Sahrens dsl_pool_t * 58789Sahrens dmu_objset_pool(objset_t *os) 59789Sahrens { 60789Sahrens dsl_dataset_t *ds; 61789Sahrens 62789Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 63789Sahrens return (ds->ds_dir->dd_pool); 64789Sahrens else 65789Sahrens return (spa_get_dsl(os->os->os_spa)); 66789Sahrens } 67789Sahrens 68789Sahrens dsl_dataset_t * 69789Sahrens dmu_objset_ds(objset_t *os) 70789Sahrens { 71789Sahrens return (os->os->os_dsl_dataset); 72789Sahrens } 73789Sahrens 74789Sahrens dmu_objset_type_t 75789Sahrens dmu_objset_type(objset_t *os) 76789Sahrens { 77789Sahrens return (os->os->os_phys->os_type); 78789Sahrens } 79789Sahrens 80789Sahrens void 81789Sahrens dmu_objset_name(objset_t *os, char *buf) 82789Sahrens { 83789Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 84789Sahrens } 85789Sahrens 86789Sahrens uint64_t 87789Sahrens dmu_objset_id(objset_t *os) 88789Sahrens { 89789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 90789Sahrens 91789Sahrens return (ds ? ds->ds_object : 0); 92789Sahrens } 93789Sahrens 94789Sahrens static void 95789Sahrens checksum_changed_cb(void *arg, uint64_t newval) 96789Sahrens { 97789Sahrens objset_impl_t *osi = arg; 98789Sahrens 99789Sahrens /* 100789Sahrens * Inheritance should have been done by now. 101789Sahrens */ 102789Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 103789Sahrens 104789Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 105789Sahrens } 106789Sahrens 107789Sahrens static void 108789Sahrens compression_changed_cb(void *arg, uint64_t newval) 109789Sahrens { 110789Sahrens objset_impl_t *osi = arg; 111789Sahrens 112789Sahrens /* 113789Sahrens * Inheritance and range checking should have been done by now. 114789Sahrens */ 115789Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 116789Sahrens 117789Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 118789Sahrens } 119789Sahrens 120789Sahrens void 121789Sahrens dmu_objset_byteswap(void *buf, size_t size) 122789Sahrens { 123789Sahrens objset_phys_t *osp = buf; 124789Sahrens 125789Sahrens ASSERT(size == sizeof (objset_phys_t)); 126789Sahrens dnode_byteswap(&osp->os_meta_dnode); 127789Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 128789Sahrens osp->os_type = BSWAP_64(osp->os_type); 129789Sahrens } 130789Sahrens 1311544Seschrock int 1321544Seschrock dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 1331544Seschrock objset_impl_t **osip) 134789Sahrens { 135789Sahrens objset_impl_t *winner, *osi; 136789Sahrens int i, err, checksum; 137789Sahrens 138789Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 139789Sahrens osi->os.os = osi; 140789Sahrens osi->os_dsl_dataset = ds; 141789Sahrens osi->os_spa = spa; 142789Sahrens if (bp) 143789Sahrens osi->os_rootbp = *bp; 144789Sahrens osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); 145789Sahrens if (!BP_IS_HOLE(&osi->os_rootbp)) { 1462391Smaybee uint32_t aflags = ARC_WAIT; 1471544Seschrock zbookmark_t zb; 1481544Seschrock zb.zb_objset = ds ? ds->ds_object : 0; 1491544Seschrock zb.zb_object = 0; 1501544Seschrock zb.zb_level = -1; 1511544Seschrock zb.zb_blkid = 0; 1521544Seschrock 153789Sahrens dprintf_bp(&osi->os_rootbp, "reading %s", ""); 1541544Seschrock err = arc_read(NULL, spa, &osi->os_rootbp, 155789Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 156789Sahrens arc_bcopy_func, osi->os_phys, 1572391Smaybee ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 1581544Seschrock if (err) { 1591544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1601544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1611544Seschrock return (err); 1621544Seschrock } 163789Sahrens } else { 164789Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 165789Sahrens } 166789Sahrens 167789Sahrens /* 168789Sahrens * Note: the changed_cb will be called once before the register 169789Sahrens * func returns, thus changing the checksum/compression from the 1702082Seschrock * default (fletcher2/off). Snapshots don't need to know, and 1712082Seschrock * registering would complicate clone promotion. 172789Sahrens */ 1732082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 174789Sahrens err = dsl_prop_register(ds, "checksum", 175789Sahrens checksum_changed_cb, osi); 1761544Seschrock if (err == 0) 1771544Seschrock err = dsl_prop_register(ds, "compression", 1781544Seschrock compression_changed_cb, osi); 1791544Seschrock if (err) { 1801544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1811544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1821544Seschrock return (err); 1831544Seschrock } 1842082Seschrock } else if (ds == NULL) { 185789Sahrens /* It's the meta-objset. */ 186789Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 1871544Seschrock osi->os_compress = ZIO_COMPRESS_LZJB; 188789Sahrens } 189789Sahrens 1901544Seschrock osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 1911544Seschrock 192789Sahrens /* 193789Sahrens * Metadata always gets compressed and checksummed. 194789Sahrens * If the data checksum is multi-bit correctable, and it's not 195789Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 196789Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 197789Sahrens */ 198789Sahrens checksum = osi->os_checksum; 199789Sahrens 200789Sahrens if (zio_checksum_table[checksum].ci_correctable && 201789Sahrens !zio_checksum_table[checksum].ci_zbt) 202789Sahrens osi->os_md_checksum = checksum; 203789Sahrens else 204789Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 2051544Seschrock osi->os_md_compress = ZIO_COMPRESS_LZJB; 206789Sahrens 207789Sahrens for (i = 0; i < TXG_SIZE; i++) { 208789Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 209789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 210789Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 211789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 212789Sahrens } 213789Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 214789Sahrens offsetof(dnode_t, dn_link)); 215789Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 216789Sahrens offsetof(dmu_buf_impl_t, db_link)); 217789Sahrens 2182856Snd150628 mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); 2192856Snd150628 mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 2202856Snd150628 221789Sahrens osi->os_meta_dnode = dnode_special_open(osi, 222789Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 223789Sahrens 224789Sahrens if (ds != NULL) { 225789Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 226789Sahrens if (winner) { 227789Sahrens dmu_objset_evict(ds, osi); 228789Sahrens osi = winner; 229789Sahrens } 230789Sahrens } 231789Sahrens 2321544Seschrock *osip = osi; 2331544Seschrock return (0); 234789Sahrens } 235789Sahrens 236789Sahrens /* called from zpl */ 237789Sahrens int 238789Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 239789Sahrens objset_t **osp) 240789Sahrens { 241789Sahrens dsl_dataset_t *ds; 242789Sahrens int err; 243789Sahrens objset_t *os; 244789Sahrens objset_impl_t *osi; 245789Sahrens 246789Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 247789Sahrens err = dsl_dataset_open(name, mode, os, &ds); 248789Sahrens if (err) { 249789Sahrens kmem_free(os, sizeof (objset_t)); 250789Sahrens return (err); 251789Sahrens } 252789Sahrens 253789Sahrens osi = dsl_dataset_get_user_ptr(ds); 254789Sahrens if (osi == NULL) { 255789Sahrens blkptr_t bp; 256789Sahrens 257789Sahrens dsl_dataset_get_blkptr(ds, &bp); 2581544Seschrock err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 2591544Seschrock ds, &bp, &osi); 2601544Seschrock if (err) { 2611544Seschrock dsl_dataset_close(ds, mode, os); 2621544Seschrock kmem_free(os, sizeof (objset_t)); 2631544Seschrock return (err); 2641544Seschrock } 265789Sahrens } 266789Sahrens 267789Sahrens os->os = osi; 268789Sahrens os->os_mode = mode; 269789Sahrens 270789Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 271789Sahrens dmu_objset_close(os); 272789Sahrens return (EINVAL); 273789Sahrens } 274789Sahrens *osp = os; 275789Sahrens return (0); 276789Sahrens } 277789Sahrens 278789Sahrens void 279789Sahrens dmu_objset_close(objset_t *os) 280789Sahrens { 281789Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 282789Sahrens kmem_free(os, sizeof (objset_t)); 283789Sahrens } 284789Sahrens 2851646Sperrin int 2861646Sperrin dmu_objset_evict_dbufs(objset_t *os, int try) 2871544Seschrock { 2881544Seschrock objset_impl_t *osi = os->os; 2891544Seschrock dnode_t *dn; 2901596Sahrens 2911596Sahrens mutex_enter(&osi->os_lock); 2921596Sahrens 2931596Sahrens /* process the mdn last, since the other dnodes have holds on it */ 2941596Sahrens list_remove(&osi->os_dnodes, osi->os_meta_dnode); 2951596Sahrens list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 2961544Seschrock 2971544Seschrock /* 2981596Sahrens * Find the first dnode with holds. We have to do this dance 2991596Sahrens * because dnode_add_ref() only works if you already have a 3001596Sahrens * hold. If there are no holds then it has no dbufs so OK to 3011596Sahrens * skip. 3021544Seschrock */ 3031596Sahrens for (dn = list_head(&osi->os_dnodes); 3041596Sahrens dn && refcount_is_zero(&dn->dn_holds); 3051596Sahrens dn = list_next(&osi->os_dnodes, dn)) 3061596Sahrens continue; 3071596Sahrens if (dn) 3081596Sahrens dnode_add_ref(dn, FTAG); 3091596Sahrens 3101596Sahrens while (dn) { 3111596Sahrens dnode_t *next_dn = dn; 3121596Sahrens 3131596Sahrens do { 3141596Sahrens next_dn = list_next(&osi->os_dnodes, next_dn); 3151596Sahrens } while (next_dn && refcount_is_zero(&next_dn->dn_holds)); 3161596Sahrens if (next_dn) 3171596Sahrens dnode_add_ref(next_dn, FTAG); 3181596Sahrens 3191596Sahrens mutex_exit(&osi->os_lock); 3201646Sperrin if (dnode_evict_dbufs(dn, try)) { 3211646Sperrin dnode_rele(dn, FTAG); 3221646Sperrin if (next_dn) 3231646Sperrin dnode_rele(next_dn, FTAG); 3241646Sperrin return (1); 3251646Sperrin } 3261596Sahrens dnode_rele(dn, FTAG); 3271596Sahrens mutex_enter(&osi->os_lock); 3281596Sahrens dn = next_dn; 3291544Seschrock } 3301544Seschrock mutex_exit(&osi->os_lock); 3311646Sperrin return (0); 3321544Seschrock } 3331544Seschrock 3341544Seschrock void 335789Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 336789Sahrens { 337789Sahrens objset_impl_t *osi = arg; 3381544Seschrock objset_t os; 3392082Seschrock int i; 340789Sahrens 341789Sahrens for (i = 0; i < TXG_SIZE; i++) { 342789Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 343789Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 344789Sahrens } 345789Sahrens 3462082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 3472082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "checksum", 3482082Seschrock checksum_changed_cb, osi)); 3492082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "compression", 3502082Seschrock compression_changed_cb, osi)); 351789Sahrens } 352789Sahrens 3531544Seschrock /* 3541544Seschrock * We should need only a single pass over the dnode list, since 3551544Seschrock * nothing can be added to the list at this point. 3561544Seschrock */ 3571544Seschrock os.os = osi; 3581646Sperrin (void) dmu_objset_evict_dbufs(&os, 0); 3591544Seschrock 360789Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 361789Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 362789Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 363789Sahrens 364789Sahrens dnode_special_close(osi->os_meta_dnode); 365789Sahrens zil_free(osi->os_zil); 366789Sahrens 367789Sahrens zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 3682856Snd150628 mutex_destroy(&osi->os_lock); 3692856Snd150628 mutex_destroy(&osi->os_obj_lock); 370789Sahrens kmem_free(osi, sizeof (objset_impl_t)); 371789Sahrens } 372789Sahrens 373789Sahrens /* called from dsl for meta-objset */ 374789Sahrens objset_impl_t * 375789Sahrens dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, 376789Sahrens dmu_tx_t *tx) 377789Sahrens { 378789Sahrens objset_impl_t *osi; 379789Sahrens dnode_t *mdn; 380789Sahrens 381789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 3821544Seschrock VERIFY(0 == dmu_objset_open_impl(spa, ds, NULL, &osi)); 383789Sahrens mdn = osi->os_meta_dnode; 384789Sahrens 385789Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 386789Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 387789Sahrens 388789Sahrens /* 389789Sahrens * We don't want to have to increase the meta-dnode's nlevels 390789Sahrens * later, because then we could do it in quescing context while 391789Sahrens * we are also accessing it in open context. 392789Sahrens * 393789Sahrens * This precaution is not necessary for the MOS (ds == NULL), 394789Sahrens * because the MOS is only updated in syncing context. 395789Sahrens * This is most fortunate: the MOS is the only objset that 396789Sahrens * needs to be synced multiple times as spa_sync() iterates 397789Sahrens * to convergence, so minimizing its dn_nlevels matters. 398789Sahrens */ 3991544Seschrock if (ds != NULL) { 4001544Seschrock int levels = 1; 4011544Seschrock 4021544Seschrock /* 4031544Seschrock * Determine the number of levels necessary for the meta-dnode 4041544Seschrock * to contain DN_MAX_OBJECT dnodes. 4051544Seschrock */ 4061544Seschrock while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 4071544Seschrock (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 4081544Seschrock DN_MAX_OBJECT * sizeof (dnode_phys_t)) 4091544Seschrock levels++; 4101544Seschrock 411789Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 4121544Seschrock mdn->dn_nlevels = levels; 4131544Seschrock } 414789Sahrens 415789Sahrens ASSERT(type != DMU_OST_NONE); 416789Sahrens ASSERT(type != DMU_OST_ANY); 417789Sahrens ASSERT(type < DMU_OST_NUMTYPES); 418789Sahrens osi->os_phys->os_type = type; 419789Sahrens 420789Sahrens dsl_dataset_dirty(ds, tx); 421789Sahrens 422789Sahrens return (osi); 423789Sahrens } 424789Sahrens 425789Sahrens struct oscarg { 426789Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 427789Sahrens void *userarg; 428789Sahrens dsl_dataset_t *clone_parent; 429789Sahrens const char *lastname; 430789Sahrens dmu_objset_type_t type; 431789Sahrens }; 432789Sahrens 4332199Sahrens /* ARGSUSED */ 434789Sahrens static int 4352199Sahrens dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 436789Sahrens { 4372199Sahrens dsl_dir_t *dd = arg1; 4382199Sahrens struct oscarg *oa = arg2; 4392199Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 4402199Sahrens int err; 4412199Sahrens uint64_t ddobj; 4422199Sahrens 4432199Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 4442199Sahrens oa->lastname, sizeof (uint64_t), 1, &ddobj); 4452199Sahrens if (err != ENOENT) 4462199Sahrens return (err ? err : EEXIST); 4472199Sahrens 4482199Sahrens if (oa->clone_parent != NULL) { 4492199Sahrens /* 4502199Sahrens * You can't clone across pools. 4512199Sahrens */ 4522199Sahrens if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) 4532199Sahrens return (EXDEV); 4542199Sahrens 4552199Sahrens /* 4562199Sahrens * You can only clone snapshots, not the head datasets. 4572199Sahrens */ 4582199Sahrens if (oa->clone_parent->ds_phys->ds_num_children == 0) 4592199Sahrens return (EINVAL); 4602199Sahrens } 4612199Sahrens return (0); 4622199Sahrens } 4632199Sahrens 4642199Sahrens static void 4652199Sahrens dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 4662199Sahrens { 4672199Sahrens dsl_dir_t *dd = arg1; 4682199Sahrens struct oscarg *oa = arg2; 469789Sahrens dsl_dataset_t *ds; 470789Sahrens blkptr_t bp; 4712199Sahrens uint64_t dsobj; 472789Sahrens 473789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 474789Sahrens 4752199Sahrens dsobj = dsl_dataset_create_sync(dd, oa->lastname, 476789Sahrens oa->clone_parent, tx); 477789Sahrens 4782199Sahrens VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 4791544Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); 480789Sahrens dsl_dataset_get_blkptr(ds, &bp); 481789Sahrens if (BP_IS_HOLE(&bp)) { 482789Sahrens objset_impl_t *osi; 483789Sahrens 484789Sahrens /* This is an empty dmu_objset; not a clone. */ 485789Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 486789Sahrens ds, oa->type, tx); 487789Sahrens 488789Sahrens if (oa->userfunc) 489789Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 490789Sahrens } 491789Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 492789Sahrens } 493789Sahrens 494789Sahrens int 495789Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 496789Sahrens objset_t *clone_parent, 497789Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 498789Sahrens { 4992199Sahrens dsl_dir_t *pdd; 500789Sahrens const char *tail; 501789Sahrens int err = 0; 5022199Sahrens struct oscarg oa = { 0 }; 503789Sahrens 5042199Sahrens ASSERT(strchr(name, '@') == NULL); 5052199Sahrens err = dsl_dir_open(name, FTAG, &pdd, &tail); 5061544Seschrock if (err) 5071544Seschrock return (err); 508789Sahrens if (tail == NULL) { 5092199Sahrens dsl_dir_close(pdd, FTAG); 510789Sahrens return (EEXIST); 511789Sahrens } 512789Sahrens 513789Sahrens dprintf("name=%s\n", name); 514789Sahrens 5152199Sahrens oa.userfunc = func; 5162199Sahrens oa.userarg = arg; 5172199Sahrens oa.lastname = tail; 5182199Sahrens oa.type = type; 5192199Sahrens if (clone_parent != NULL) { 520789Sahrens /* 5212199Sahrens * You can't clone to a different type. 522789Sahrens */ 5232199Sahrens if (clone_parent->os->os_phys->os_type != type) { 5242199Sahrens dsl_dir_close(pdd, FTAG); 5252199Sahrens return (EINVAL); 526789Sahrens } 5272199Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 528789Sahrens } 5292199Sahrens err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 5302199Sahrens dmu_objset_create_sync, pdd, &oa, 5); 5312199Sahrens dsl_dir_close(pdd, FTAG); 532789Sahrens return (err); 533789Sahrens } 534789Sahrens 535789Sahrens int 536789Sahrens dmu_objset_destroy(const char *name) 537789Sahrens { 538789Sahrens objset_t *os; 539789Sahrens int error; 540789Sahrens 541789Sahrens /* 542789Sahrens * If it looks like we'll be able to destroy it, and there's 543789Sahrens * an unplayed replay log sitting around, destroy the log. 544789Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 545789Sahrens * but the replay log objset is modified in open context. 546789Sahrens */ 547789Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 548789Sahrens if (error == 0) { 5491807Sbonwick zil_destroy(dmu_objset_zil(os), B_FALSE); 550789Sahrens dmu_objset_close(os); 551789Sahrens } 552789Sahrens 553789Sahrens return (dsl_dataset_destroy(name)); 554789Sahrens } 555789Sahrens 556789Sahrens int 557789Sahrens dmu_objset_rollback(const char *name) 558789Sahrens { 559789Sahrens int err; 560789Sahrens objset_t *os; 561789Sahrens 5622199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, 5632199Sahrens DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 564789Sahrens if (err == 0) { 565789Sahrens err = zil_suspend(dmu_objset_zil(os)); 566789Sahrens if (err == 0) 567789Sahrens zil_resume(dmu_objset_zil(os)); 568789Sahrens if (err == 0) { 569789Sahrens /* XXX uncache everything? */ 5702199Sahrens err = dsl_dataset_rollback(os->os->os_dsl_dataset); 571789Sahrens } 5722199Sahrens dmu_objset_close(os); 573789Sahrens } 574789Sahrens return (err); 575789Sahrens } 576789Sahrens 5772199Sahrens struct snaparg { 5782199Sahrens dsl_sync_task_group_t *dstg; 5792199Sahrens char *snapname; 5802199Sahrens char failed[MAXPATHLEN]; 5812199Sahrens }; 5822199Sahrens 5832199Sahrens static int 5842199Sahrens dmu_objset_snapshot_one(char *name, void *arg) 5852199Sahrens { 5862199Sahrens struct snaparg *sn = arg; 5872199Sahrens objset_t *os; 5882199Sahrens int err; 5892199Sahrens 5902199Sahrens (void) strcpy(sn->failed, name); 5912199Sahrens 5922199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); 5932199Sahrens if (err != 0) 5942199Sahrens return (err); 5952199Sahrens 5962199Sahrens /* 5972199Sahrens * NB: we need to wait for all in-flight changes to get to disk, 5982199Sahrens * so that we snapshot those changes. zil_suspend does this as 5992199Sahrens * a side effect. 6002199Sahrens */ 6012199Sahrens err = zil_suspend(dmu_objset_zil(os)); 6022199Sahrens if (err == 0) { 6032199Sahrens dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, 6042199Sahrens dsl_dataset_snapshot_sync, os, sn->snapname, 3); 6052199Sahrens } 6062199Sahrens return (err); 6072199Sahrens } 6082199Sahrens 6092199Sahrens int 6102199Sahrens dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) 6112199Sahrens { 6122199Sahrens dsl_sync_task_t *dst; 6132199Sahrens struct snaparg sn = { 0 }; 6142199Sahrens char *cp; 6152199Sahrens spa_t *spa; 6162199Sahrens int err; 6172199Sahrens 6182199Sahrens (void) strcpy(sn.failed, fsname); 6192199Sahrens 6202199Sahrens cp = strchr(fsname, '/'); 6212199Sahrens if (cp) { 6222199Sahrens *cp = '\0'; 6232199Sahrens err = spa_open(fsname, &spa, FTAG); 6242199Sahrens *cp = '/'; 6252199Sahrens } else { 6262199Sahrens err = spa_open(fsname, &spa, FTAG); 6272199Sahrens } 6282199Sahrens if (err) 6292199Sahrens return (err); 6302199Sahrens 6312199Sahrens sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 6322199Sahrens sn.snapname = snapname; 6332199Sahrens 6342417Sahrens if (recursive) { 6352417Sahrens err = dmu_objset_find(fsname, 6362417Sahrens dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 6372417Sahrens } else { 6382199Sahrens err = dmu_objset_snapshot_one(fsname, &sn); 6392417Sahrens } 6402199Sahrens 6412199Sahrens if (err) 6422199Sahrens goto out; 6432199Sahrens 6442199Sahrens err = dsl_sync_task_group_wait(sn.dstg); 6452199Sahrens 6462199Sahrens for (dst = list_head(&sn.dstg->dstg_tasks); dst; 6472199Sahrens dst = list_next(&sn.dstg->dstg_tasks, dst)) { 6482199Sahrens objset_t *os = dst->dst_arg1; 6492199Sahrens if (dst->dst_err) 6502199Sahrens dmu_objset_name(os, sn.failed); 6512199Sahrens zil_resume(dmu_objset_zil(os)); 6522199Sahrens dmu_objset_close(os); 6532199Sahrens } 6542199Sahrens out: 6552199Sahrens if (err) 6562199Sahrens (void) strcpy(fsname, sn.failed); 6572199Sahrens dsl_sync_task_group_destroy(sn.dstg); 6582199Sahrens spa_close(spa, FTAG); 6592199Sahrens return (err); 6602199Sahrens } 6612199Sahrens 662789Sahrens static void 663789Sahrens dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) 664789Sahrens { 665789Sahrens dnode_t *dn = list_head(list); 666789Sahrens int level, err; 667789Sahrens 668789Sahrens for (level = 0; dn = list_head(list); level++) { 669789Sahrens zio_t *zio; 670789Sahrens zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 671789Sahrens 672789Sahrens ASSERT3U(level, <=, DN_MAX_LEVELS); 673789Sahrens 674789Sahrens while (dn) { 675789Sahrens dnode_t *next = list_next(list, dn); 676789Sahrens 677789Sahrens list_remove(list, dn); 678789Sahrens if (dnode_sync(dn, level, zio, tx) == 0) { 679789Sahrens /* 680789Sahrens * This dnode requires syncing at higher 681789Sahrens * levels; put it back onto the list. 682789Sahrens */ 683789Sahrens if (next) 684789Sahrens list_insert_before(list, next, dn); 685789Sahrens else 686789Sahrens list_insert_tail(list, dn); 687789Sahrens } 688789Sahrens dn = next; 689789Sahrens } 690789Sahrens err = zio_wait(zio); 691789Sahrens ASSERT(err == 0); 692789Sahrens } 693789Sahrens } 694789Sahrens 695789Sahrens /* ARGSUSED */ 696789Sahrens static void 697789Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 698789Sahrens { 699789Sahrens objset_impl_t *os = arg; 700789Sahrens objset_phys_t *osphys = zio->io_data; 701789Sahrens dnode_phys_t *dnp = &osphys->os_meta_dnode; 702789Sahrens int i; 703789Sahrens 704789Sahrens ASSERT3U(zio->io_error, ==, 0); 705789Sahrens 706789Sahrens /* 707789Sahrens * Update rootbp fill count. 708789Sahrens */ 709789Sahrens os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ 710789Sahrens for (i = 0; i < dnp->dn_nblkptr; i++) 711789Sahrens os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; 712789Sahrens 713789Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 714789Sahrens BP_SET_LEVEL(zio->io_bp, 0); 715789Sahrens 716789Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 717789Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 718789Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, 719789Sahrens os->os_synctx); 720789Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 721789Sahrens os->os_synctx); 722789Sahrens } 723789Sahrens } 724789Sahrens 725789Sahrens /* called from dsl */ 726789Sahrens void 727789Sahrens dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) 728789Sahrens { 729789Sahrens extern taskq_t *dbuf_tq; 730789Sahrens int txgoff; 731789Sahrens list_t *dirty_list; 732789Sahrens int err; 7331544Seschrock zbookmark_t zb; 734789Sahrens arc_buf_t *abuf = 735789Sahrens arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); 736789Sahrens 737789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 738789Sahrens ASSERT(os->os_synctx == NULL); 739789Sahrens /* XXX the write_done callback should really give us the tx... */ 740789Sahrens os->os_synctx = tx; 741789Sahrens 742789Sahrens dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 743789Sahrens 744789Sahrens txgoff = tx->tx_txg & TXG_MASK; 745789Sahrens 746789Sahrens dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); 747789Sahrens dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); 748789Sahrens 749789Sahrens /* 750789Sahrens * Free intent log blocks up to this tx. 751789Sahrens */ 752789Sahrens zil_sync(os->os_zil, tx); 753789Sahrens 754789Sahrens /* 755789Sahrens * Sync meta-dnode 756789Sahrens */ 757789Sahrens dirty_list = &os->os_dirty_dnodes[txgoff]; 758789Sahrens ASSERT(list_head(dirty_list) == NULL); 759789Sahrens list_insert_tail(dirty_list, os->os_meta_dnode); 760789Sahrens dmu_objset_sync_dnodes(os, dirty_list, tx); 761789Sahrens 762789Sahrens /* 763789Sahrens * Sync the root block. 764789Sahrens */ 765789Sahrens bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); 7661544Seschrock zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 7671544Seschrock zb.zb_object = 0; 7681544Seschrock zb.zb_level = -1; 7691544Seschrock zb.zb_blkid = 0; 770789Sahrens err = arc_write(NULL, os->os_spa, os->os_md_checksum, 7711775Sbillm os->os_md_compress, 7721775Sbillm dmu_get_replication_level(os->os_spa, &zb, DMU_OT_OBJSET), 7731775Sbillm tx->tx_txg, &os->os_rootbp, abuf, killer, os, 7741544Seschrock ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT, &zb); 775789Sahrens ASSERT(err == 0); 7761544Seschrock VERIFY(arc_buf_remove_ref(abuf, FTAG) == 1); 777789Sahrens 778789Sahrens dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); 779789Sahrens 780789Sahrens ASSERT3P(os->os_synctx, ==, tx); 781789Sahrens taskq_wait(dbuf_tq); 782789Sahrens os->os_synctx = NULL; 783789Sahrens } 784789Sahrens 785789Sahrens void 786*2885Sahrens dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 787*2885Sahrens uint64_t *usedobjsp, uint64_t *availobjsp) 788*2885Sahrens { 789*2885Sahrens dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp, 790*2885Sahrens usedobjsp, availobjsp); 791*2885Sahrens } 792*2885Sahrens 793*2885Sahrens uint64_t 794*2885Sahrens dmu_objset_fsid_guid(objset_t *os) 795*2885Sahrens { 796*2885Sahrens return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset)); 797*2885Sahrens } 798*2885Sahrens 799*2885Sahrens void 800*2885Sahrens dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 801789Sahrens { 802*2885Sahrens stat->dds_type = os->os->os_phys->os_type; 803*2885Sahrens if (os->os->os_dsl_dataset) 804*2885Sahrens dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat); 805*2885Sahrens } 806*2885Sahrens 807*2885Sahrens void 808*2885Sahrens dmu_objset_stats(objset_t *os, nvlist_t *nv) 809*2885Sahrens { 810*2885Sahrens ASSERT(os->os->os_dsl_dataset || 811*2885Sahrens os->os->os_phys->os_type == DMU_OST_META); 812*2885Sahrens 813*2885Sahrens if (os->os->os_dsl_dataset != NULL) 814*2885Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, nv); 815*2885Sahrens 816*2885Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 817*2885Sahrens os->os->os_phys->os_type); 818789Sahrens } 819789Sahrens 820789Sahrens int 821789Sahrens dmu_objset_is_snapshot(objset_t *os) 822789Sahrens { 823789Sahrens if (os->os->os_dsl_dataset != NULL) 824789Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 825789Sahrens else 826789Sahrens return (B_FALSE); 827789Sahrens } 828789Sahrens 829789Sahrens int 830789Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 831885Sahrens uint64_t *idp, uint64_t *offp) 832789Sahrens { 833789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 834789Sahrens zap_cursor_t cursor; 835789Sahrens zap_attribute_t attr; 836789Sahrens 837789Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 838789Sahrens return (ENOENT); 839789Sahrens 840789Sahrens zap_cursor_init_serialized(&cursor, 841789Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 842789Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 843789Sahrens 844885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 845885Sahrens zap_cursor_fini(&cursor); 846885Sahrens return (ENOENT); 847885Sahrens } 848885Sahrens 849885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 850885Sahrens zap_cursor_fini(&cursor); 851885Sahrens return (ENAMETOOLONG); 852885Sahrens } 853885Sahrens 854885Sahrens (void) strcpy(name, attr.za_name); 855885Sahrens if (idp) 856885Sahrens *idp = attr.za_first_integer; 857885Sahrens zap_cursor_advance(&cursor); 858885Sahrens *offp = zap_cursor_serialize(&cursor); 859885Sahrens zap_cursor_fini(&cursor); 860885Sahrens 861885Sahrens return (0); 862885Sahrens } 863885Sahrens 864885Sahrens int 865885Sahrens dmu_dir_list_next(objset_t *os, int namelen, char *name, 866885Sahrens uint64_t *idp, uint64_t *offp) 867885Sahrens { 868885Sahrens dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 869885Sahrens zap_cursor_t cursor; 870885Sahrens zap_attribute_t attr; 871885Sahrens 872885Sahrens /* there is no next dir on a snapshot! */ 873885Sahrens if (os->os->os_dsl_dataset->ds_object != 874885Sahrens dd->dd_phys->dd_head_dataset_obj) 875885Sahrens return (ENOENT); 876885Sahrens 877885Sahrens zap_cursor_init_serialized(&cursor, 878885Sahrens dd->dd_pool->dp_meta_objset, 879885Sahrens dd->dd_phys->dd_child_dir_zapobj, *offp); 880885Sahrens 881885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 882885Sahrens zap_cursor_fini(&cursor); 883885Sahrens return (ENOENT); 884885Sahrens } 885885Sahrens 886885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 887885Sahrens zap_cursor_fini(&cursor); 888789Sahrens return (ENAMETOOLONG); 889885Sahrens } 890789Sahrens 891789Sahrens (void) strcpy(name, attr.za_name); 892885Sahrens if (idp) 893885Sahrens *idp = attr.za_first_integer; 894789Sahrens zap_cursor_advance(&cursor); 895789Sahrens *offp = zap_cursor_serialize(&cursor); 896885Sahrens zap_cursor_fini(&cursor); 897789Sahrens 898789Sahrens return (0); 899789Sahrens } 900789Sahrens 901789Sahrens /* 902789Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 903789Sahrens */ 9042199Sahrens int 9052199Sahrens dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 906789Sahrens { 907789Sahrens dsl_dir_t *dd; 908789Sahrens objset_t *os; 909789Sahrens uint64_t snapobj; 910789Sahrens zap_cursor_t zc; 911789Sahrens zap_attribute_t attr; 912789Sahrens char *child; 9131544Seschrock int do_self, err; 914789Sahrens 9151544Seschrock err = dsl_dir_open(name, FTAG, &dd, NULL); 9161544Seschrock if (err) 9172199Sahrens return (err); 918789Sahrens 9192199Sahrens /* NB: the $MOS dir doesn't have a head dataset */ 920789Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 921789Sahrens 922789Sahrens /* 923789Sahrens * Iterate over all children. 924789Sahrens */ 9252417Sahrens if (flags & DS_FIND_CHILDREN) { 9262417Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 9272417Sahrens dd->dd_phys->dd_child_dir_zapobj); 9282417Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 9292417Sahrens (void) zap_cursor_advance(&zc)) { 9302417Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 9312417Sahrens ASSERT(attr.za_num_integers == 1); 932789Sahrens 9332417Sahrens /* 9342417Sahrens * No separating '/' because parent's name ends in /. 9352417Sahrens */ 9362417Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 9372417Sahrens /* XXX could probably just use name here */ 9382417Sahrens dsl_dir_name(dd, child); 9392417Sahrens (void) strcat(child, "/"); 9402417Sahrens (void) strcat(child, attr.za_name); 9412417Sahrens err = dmu_objset_find(child, func, arg, flags); 9422417Sahrens kmem_free(child, MAXPATHLEN); 9432417Sahrens if (err) 9442417Sahrens break; 9452417Sahrens } 9462417Sahrens zap_cursor_fini(&zc); 9472199Sahrens 9482417Sahrens if (err) { 9492417Sahrens dsl_dir_close(dd, FTAG); 9502417Sahrens return (err); 9512417Sahrens } 952789Sahrens } 953789Sahrens 954789Sahrens /* 955789Sahrens * Iterate over all snapshots. 956789Sahrens */ 957789Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 958789Sahrens dmu_objset_open(name, DMU_OST_ANY, 959789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 960789Sahrens 961789Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 962789Sahrens dmu_objset_close(os); 963789Sahrens 964789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 965789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 966789Sahrens (void) zap_cursor_advance(&zc)) { 967789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 968789Sahrens ASSERT(attr.za_num_integers == 1); 969789Sahrens 970789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 971789Sahrens /* XXX could probably just use name here */ 972789Sahrens dsl_dir_name(dd, child); 973789Sahrens (void) strcat(child, "@"); 974789Sahrens (void) strcat(child, attr.za_name); 9752199Sahrens err = func(child, arg); 976789Sahrens kmem_free(child, MAXPATHLEN); 9772199Sahrens if (err) 9782199Sahrens break; 979789Sahrens } 980885Sahrens zap_cursor_fini(&zc); 981789Sahrens } 982789Sahrens 983789Sahrens dsl_dir_close(dd, FTAG); 984789Sahrens 9852199Sahrens if (err) 9862199Sahrens return (err); 9872199Sahrens 988789Sahrens /* 989789Sahrens * Apply to self if appropriate. 990789Sahrens */ 991789Sahrens if (do_self) 9922199Sahrens err = func(name, arg); 9932199Sahrens return (err); 994789Sahrens } 995