1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221544Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/dmu_objset.h> 30789Sahrens #include <sys/dsl_dir.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/dsl_pool.h> 342199Sahrens #include <sys/dsl_synctask.h> 35789Sahrens #include <sys/dnode.h> 36789Sahrens #include <sys/dbuf.h> 372885Sahrens #include <sys/zvol.h> 38789Sahrens #include <sys/dmu_tx.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zap.h> 41789Sahrens #include <sys/zil.h> 42789Sahrens #include <sys/dmu_impl.h> 43789Sahrens 44789Sahrens 45789Sahrens spa_t * 46789Sahrens dmu_objset_spa(objset_t *os) 47789Sahrens { 48789Sahrens return (os->os->os_spa); 49789Sahrens } 50789Sahrens 51789Sahrens zilog_t * 52789Sahrens dmu_objset_zil(objset_t *os) 53789Sahrens { 54789Sahrens return (os->os->os_zil); 55789Sahrens } 56789Sahrens 57789Sahrens dsl_pool_t * 58789Sahrens dmu_objset_pool(objset_t *os) 59789Sahrens { 60789Sahrens dsl_dataset_t *ds; 61789Sahrens 62789Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 63789Sahrens return (ds->ds_dir->dd_pool); 64789Sahrens else 65789Sahrens return (spa_get_dsl(os->os->os_spa)); 66789Sahrens } 67789Sahrens 68789Sahrens dsl_dataset_t * 69789Sahrens dmu_objset_ds(objset_t *os) 70789Sahrens { 71789Sahrens return (os->os->os_dsl_dataset); 72789Sahrens } 73789Sahrens 74789Sahrens dmu_objset_type_t 75789Sahrens dmu_objset_type(objset_t *os) 76789Sahrens { 77789Sahrens return (os->os->os_phys->os_type); 78789Sahrens } 79789Sahrens 80789Sahrens void 81789Sahrens dmu_objset_name(objset_t *os, char *buf) 82789Sahrens { 83789Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 84789Sahrens } 85789Sahrens 86789Sahrens uint64_t 87789Sahrens dmu_objset_id(objset_t *os) 88789Sahrens { 89789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 90789Sahrens 91789Sahrens return (ds ? ds->ds_object : 0); 92789Sahrens } 93789Sahrens 94789Sahrens static void 95789Sahrens checksum_changed_cb(void *arg, uint64_t newval) 96789Sahrens { 97789Sahrens objset_impl_t *osi = arg; 98789Sahrens 99789Sahrens /* 100789Sahrens * Inheritance should have been done by now. 101789Sahrens */ 102789Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 103789Sahrens 104789Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 105789Sahrens } 106789Sahrens 107789Sahrens static void 108789Sahrens compression_changed_cb(void *arg, uint64_t newval) 109789Sahrens { 110789Sahrens objset_impl_t *osi = arg; 111789Sahrens 112789Sahrens /* 113789Sahrens * Inheritance and range checking should have been done by now. 114789Sahrens */ 115789Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 116789Sahrens 117789Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 118789Sahrens } 119789Sahrens 120789Sahrens void 121789Sahrens dmu_objset_byteswap(void *buf, size_t size) 122789Sahrens { 123789Sahrens objset_phys_t *osp = buf; 124789Sahrens 125789Sahrens ASSERT(size == sizeof (objset_phys_t)); 126789Sahrens dnode_byteswap(&osp->os_meta_dnode); 127789Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 128789Sahrens osp->os_type = BSWAP_64(osp->os_type); 129789Sahrens } 130789Sahrens 1311544Seschrock int 1321544Seschrock dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 1331544Seschrock objset_impl_t **osip) 134789Sahrens { 135789Sahrens objset_impl_t *winner, *osi; 136789Sahrens int i, err, checksum; 137789Sahrens 138789Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 139789Sahrens osi->os.os = osi; 140789Sahrens osi->os_dsl_dataset = ds; 141789Sahrens osi->os_spa = spa; 142789Sahrens if (bp) 143789Sahrens osi->os_rootbp = *bp; 144789Sahrens osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); 145789Sahrens if (!BP_IS_HOLE(&osi->os_rootbp)) { 1462391Smaybee uint32_t aflags = ARC_WAIT; 1471544Seschrock zbookmark_t zb; 1481544Seschrock zb.zb_objset = ds ? ds->ds_object : 0; 1491544Seschrock zb.zb_object = 0; 1501544Seschrock zb.zb_level = -1; 1511544Seschrock zb.zb_blkid = 0; 1521544Seschrock 153789Sahrens dprintf_bp(&osi->os_rootbp, "reading %s", ""); 1541544Seschrock err = arc_read(NULL, spa, &osi->os_rootbp, 155789Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 156789Sahrens arc_bcopy_func, osi->os_phys, 1572391Smaybee ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 1581544Seschrock if (err) { 1591544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1601544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1611544Seschrock return (err); 1621544Seschrock } 163789Sahrens } else { 164789Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 165789Sahrens } 166789Sahrens 167789Sahrens /* 168789Sahrens * Note: the changed_cb will be called once before the register 169789Sahrens * func returns, thus changing the checksum/compression from the 1702082Seschrock * default (fletcher2/off). Snapshots don't need to know, and 1712082Seschrock * registering would complicate clone promotion. 172789Sahrens */ 1732082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 174789Sahrens err = dsl_prop_register(ds, "checksum", 175789Sahrens checksum_changed_cb, osi); 1761544Seschrock if (err == 0) 1771544Seschrock err = dsl_prop_register(ds, "compression", 1781544Seschrock compression_changed_cb, osi); 1791544Seschrock if (err) { 1801544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1811544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1821544Seschrock return (err); 1831544Seschrock } 1842082Seschrock } else if (ds == NULL) { 185789Sahrens /* It's the meta-objset. */ 186789Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 1871544Seschrock osi->os_compress = ZIO_COMPRESS_LZJB; 188789Sahrens } 189789Sahrens 1901544Seschrock osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 1911544Seschrock 192789Sahrens /* 193789Sahrens * Metadata always gets compressed and checksummed. 194789Sahrens * If the data checksum is multi-bit correctable, and it's not 195789Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 196789Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 197789Sahrens */ 198789Sahrens checksum = osi->os_checksum; 199789Sahrens 200789Sahrens if (zio_checksum_table[checksum].ci_correctable && 201789Sahrens !zio_checksum_table[checksum].ci_zbt) 202789Sahrens osi->os_md_checksum = checksum; 203789Sahrens else 204789Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 2051544Seschrock osi->os_md_compress = ZIO_COMPRESS_LZJB; 206789Sahrens 207789Sahrens for (i = 0; i < TXG_SIZE; i++) { 208789Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 209789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 210789Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 211789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 212789Sahrens } 213789Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 214789Sahrens offsetof(dnode_t, dn_link)); 215789Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 216789Sahrens offsetof(dmu_buf_impl_t, db_link)); 217789Sahrens 2182856Snd150628 mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); 2192856Snd150628 mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 2202856Snd150628 221789Sahrens osi->os_meta_dnode = dnode_special_open(osi, 222789Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 223789Sahrens 224789Sahrens if (ds != NULL) { 225789Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 226789Sahrens if (winner) { 227789Sahrens dmu_objset_evict(ds, osi); 228789Sahrens osi = winner; 229789Sahrens } 230789Sahrens } 231789Sahrens 2321544Seschrock *osip = osi; 2331544Seschrock return (0); 234789Sahrens } 235789Sahrens 236789Sahrens /* called from zpl */ 237789Sahrens int 238789Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 239789Sahrens objset_t **osp) 240789Sahrens { 241789Sahrens dsl_dataset_t *ds; 242789Sahrens int err; 243789Sahrens objset_t *os; 244789Sahrens objset_impl_t *osi; 245789Sahrens 246789Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 247789Sahrens err = dsl_dataset_open(name, mode, os, &ds); 248789Sahrens if (err) { 249789Sahrens kmem_free(os, sizeof (objset_t)); 250789Sahrens return (err); 251789Sahrens } 252789Sahrens 253789Sahrens osi = dsl_dataset_get_user_ptr(ds); 254789Sahrens if (osi == NULL) { 255789Sahrens blkptr_t bp; 256789Sahrens 257789Sahrens dsl_dataset_get_blkptr(ds, &bp); 2581544Seschrock err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 2591544Seschrock ds, &bp, &osi); 2601544Seschrock if (err) { 2611544Seschrock dsl_dataset_close(ds, mode, os); 2621544Seschrock kmem_free(os, sizeof (objset_t)); 2631544Seschrock return (err); 2641544Seschrock } 265789Sahrens } 266789Sahrens 267789Sahrens os->os = osi; 268789Sahrens os->os_mode = mode; 269789Sahrens 270789Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 271789Sahrens dmu_objset_close(os); 272789Sahrens return (EINVAL); 273789Sahrens } 274789Sahrens *osp = os; 275789Sahrens return (0); 276789Sahrens } 277789Sahrens 278789Sahrens void 279789Sahrens dmu_objset_close(objset_t *os) 280789Sahrens { 281789Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 282789Sahrens kmem_free(os, sizeof (objset_t)); 283789Sahrens } 284789Sahrens 2851646Sperrin int 2861646Sperrin dmu_objset_evict_dbufs(objset_t *os, int try) 2871544Seschrock { 2881544Seschrock objset_impl_t *osi = os->os; 2891544Seschrock dnode_t *dn; 2901596Sahrens 2911596Sahrens mutex_enter(&osi->os_lock); 2921596Sahrens 2931596Sahrens /* process the mdn last, since the other dnodes have holds on it */ 2941596Sahrens list_remove(&osi->os_dnodes, osi->os_meta_dnode); 2951596Sahrens list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 2961544Seschrock 2971544Seschrock /* 2981596Sahrens * Find the first dnode with holds. We have to do this dance 2991596Sahrens * because dnode_add_ref() only works if you already have a 3001596Sahrens * hold. If there are no holds then it has no dbufs so OK to 3011596Sahrens * skip. 3021544Seschrock */ 3031596Sahrens for (dn = list_head(&osi->os_dnodes); 3041596Sahrens dn && refcount_is_zero(&dn->dn_holds); 3051596Sahrens dn = list_next(&osi->os_dnodes, dn)) 3061596Sahrens continue; 3071596Sahrens if (dn) 3081596Sahrens dnode_add_ref(dn, FTAG); 3091596Sahrens 3101596Sahrens while (dn) { 3111596Sahrens dnode_t *next_dn = dn; 3121596Sahrens 3131596Sahrens do { 3141596Sahrens next_dn = list_next(&osi->os_dnodes, next_dn); 3151596Sahrens } while (next_dn && refcount_is_zero(&next_dn->dn_holds)); 3161596Sahrens if (next_dn) 3171596Sahrens dnode_add_ref(next_dn, FTAG); 3181596Sahrens 3191596Sahrens mutex_exit(&osi->os_lock); 3201646Sperrin if (dnode_evict_dbufs(dn, try)) { 3211646Sperrin dnode_rele(dn, FTAG); 3221646Sperrin if (next_dn) 3231646Sperrin dnode_rele(next_dn, FTAG); 3241646Sperrin return (1); 3251646Sperrin } 3261596Sahrens dnode_rele(dn, FTAG); 3271596Sahrens mutex_enter(&osi->os_lock); 3281596Sahrens dn = next_dn; 3291544Seschrock } 3301544Seschrock mutex_exit(&osi->os_lock); 3311646Sperrin return (0); 3321544Seschrock } 3331544Seschrock 3341544Seschrock void 335789Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 336789Sahrens { 337789Sahrens objset_impl_t *osi = arg; 3381544Seschrock objset_t os; 3392082Seschrock int i; 340789Sahrens 341789Sahrens for (i = 0; i < TXG_SIZE; i++) { 342789Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 343789Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 344789Sahrens } 345789Sahrens 3462082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 3472082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "checksum", 3482082Seschrock checksum_changed_cb, osi)); 3492082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "compression", 3502082Seschrock compression_changed_cb, osi)); 351789Sahrens } 352789Sahrens 3531544Seschrock /* 3541544Seschrock * We should need only a single pass over the dnode list, since 3551544Seschrock * nothing can be added to the list at this point. 3561544Seschrock */ 3571544Seschrock os.os = osi; 3581646Sperrin (void) dmu_objset_evict_dbufs(&os, 0); 3591544Seschrock 360789Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 361789Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 362789Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 363789Sahrens 364789Sahrens dnode_special_close(osi->os_meta_dnode); 365789Sahrens zil_free(osi->os_zil); 366789Sahrens 367789Sahrens zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 3682856Snd150628 mutex_destroy(&osi->os_lock); 3692856Snd150628 mutex_destroy(&osi->os_obj_lock); 370789Sahrens kmem_free(osi, sizeof (objset_impl_t)); 371789Sahrens } 372789Sahrens 373789Sahrens /* called from dsl for meta-objset */ 374789Sahrens objset_impl_t * 375789Sahrens dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, 376789Sahrens dmu_tx_t *tx) 377789Sahrens { 378789Sahrens objset_impl_t *osi; 379789Sahrens dnode_t *mdn; 380789Sahrens 381789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 3821544Seschrock VERIFY(0 == dmu_objset_open_impl(spa, ds, NULL, &osi)); 383789Sahrens mdn = osi->os_meta_dnode; 384789Sahrens 385789Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 386789Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 387789Sahrens 388789Sahrens /* 389789Sahrens * We don't want to have to increase the meta-dnode's nlevels 390789Sahrens * later, because then we could do it in quescing context while 391789Sahrens * we are also accessing it in open context. 392789Sahrens * 393789Sahrens * This precaution is not necessary for the MOS (ds == NULL), 394789Sahrens * because the MOS is only updated in syncing context. 395789Sahrens * This is most fortunate: the MOS is the only objset that 396789Sahrens * needs to be synced multiple times as spa_sync() iterates 397789Sahrens * to convergence, so minimizing its dn_nlevels matters. 398789Sahrens */ 3991544Seschrock if (ds != NULL) { 4001544Seschrock int levels = 1; 4011544Seschrock 4021544Seschrock /* 4031544Seschrock * Determine the number of levels necessary for the meta-dnode 4041544Seschrock * to contain DN_MAX_OBJECT dnodes. 4051544Seschrock */ 4061544Seschrock while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 4071544Seschrock (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 4081544Seschrock DN_MAX_OBJECT * sizeof (dnode_phys_t)) 4091544Seschrock levels++; 4101544Seschrock 411789Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 4121544Seschrock mdn->dn_nlevels = levels; 4131544Seschrock } 414789Sahrens 415789Sahrens ASSERT(type != DMU_OST_NONE); 416789Sahrens ASSERT(type != DMU_OST_ANY); 417789Sahrens ASSERT(type < DMU_OST_NUMTYPES); 418789Sahrens osi->os_phys->os_type = type; 419789Sahrens 420789Sahrens dsl_dataset_dirty(ds, tx); 421789Sahrens 422789Sahrens return (osi); 423789Sahrens } 424789Sahrens 425789Sahrens struct oscarg { 426789Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 427789Sahrens void *userarg; 428789Sahrens dsl_dataset_t *clone_parent; 429789Sahrens const char *lastname; 430789Sahrens dmu_objset_type_t type; 431789Sahrens }; 432789Sahrens 4332199Sahrens /* ARGSUSED */ 434789Sahrens static int 4352199Sahrens dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 436789Sahrens { 4372199Sahrens dsl_dir_t *dd = arg1; 4382199Sahrens struct oscarg *oa = arg2; 4392199Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 4402199Sahrens int err; 4412199Sahrens uint64_t ddobj; 4422199Sahrens 4432199Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 4442199Sahrens oa->lastname, sizeof (uint64_t), 1, &ddobj); 4452199Sahrens if (err != ENOENT) 4462199Sahrens return (err ? err : EEXIST); 4472199Sahrens 4482199Sahrens if (oa->clone_parent != NULL) { 4492199Sahrens /* 4502199Sahrens * You can't clone across pools. 4512199Sahrens */ 4522199Sahrens if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) 4532199Sahrens return (EXDEV); 4542199Sahrens 4552199Sahrens /* 4562199Sahrens * You can only clone snapshots, not the head datasets. 4572199Sahrens */ 4582199Sahrens if (oa->clone_parent->ds_phys->ds_num_children == 0) 4592199Sahrens return (EINVAL); 4602199Sahrens } 4612199Sahrens return (0); 4622199Sahrens } 4632199Sahrens 4642199Sahrens static void 4652199Sahrens dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 4662199Sahrens { 4672199Sahrens dsl_dir_t *dd = arg1; 4682199Sahrens struct oscarg *oa = arg2; 469789Sahrens dsl_dataset_t *ds; 470789Sahrens blkptr_t bp; 4712199Sahrens uint64_t dsobj; 472789Sahrens 473789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 474789Sahrens 4752199Sahrens dsobj = dsl_dataset_create_sync(dd, oa->lastname, 476789Sahrens oa->clone_parent, tx); 477789Sahrens 4782199Sahrens VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 4791544Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); 480789Sahrens dsl_dataset_get_blkptr(ds, &bp); 481789Sahrens if (BP_IS_HOLE(&bp)) { 482789Sahrens objset_impl_t *osi; 483789Sahrens 484789Sahrens /* This is an empty dmu_objset; not a clone. */ 485789Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 486789Sahrens ds, oa->type, tx); 487789Sahrens 488789Sahrens if (oa->userfunc) 489789Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 490789Sahrens } 491789Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 492789Sahrens } 493789Sahrens 494789Sahrens int 495789Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 496789Sahrens objset_t *clone_parent, 497789Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 498789Sahrens { 4992199Sahrens dsl_dir_t *pdd; 500789Sahrens const char *tail; 501789Sahrens int err = 0; 5022199Sahrens struct oscarg oa = { 0 }; 503789Sahrens 5042199Sahrens ASSERT(strchr(name, '@') == NULL); 5052199Sahrens err = dsl_dir_open(name, FTAG, &pdd, &tail); 5061544Seschrock if (err) 5071544Seschrock return (err); 508789Sahrens if (tail == NULL) { 5092199Sahrens dsl_dir_close(pdd, FTAG); 510789Sahrens return (EEXIST); 511789Sahrens } 512789Sahrens 513789Sahrens dprintf("name=%s\n", name); 514789Sahrens 5152199Sahrens oa.userfunc = func; 5162199Sahrens oa.userarg = arg; 5172199Sahrens oa.lastname = tail; 5182199Sahrens oa.type = type; 5192199Sahrens if (clone_parent != NULL) { 520789Sahrens /* 5212199Sahrens * You can't clone to a different type. 522789Sahrens */ 5232199Sahrens if (clone_parent->os->os_phys->os_type != type) { 5242199Sahrens dsl_dir_close(pdd, FTAG); 5252199Sahrens return (EINVAL); 526789Sahrens } 5272199Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 528789Sahrens } 5292199Sahrens err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 5302199Sahrens dmu_objset_create_sync, pdd, &oa, 5); 5312199Sahrens dsl_dir_close(pdd, FTAG); 532789Sahrens return (err); 533789Sahrens } 534789Sahrens 535789Sahrens int 536789Sahrens dmu_objset_destroy(const char *name) 537789Sahrens { 538789Sahrens objset_t *os; 539789Sahrens int error; 540789Sahrens 541789Sahrens /* 542789Sahrens * If it looks like we'll be able to destroy it, and there's 543789Sahrens * an unplayed replay log sitting around, destroy the log. 544789Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 545789Sahrens * but the replay log objset is modified in open context. 546789Sahrens */ 547789Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 548789Sahrens if (error == 0) { 5491807Sbonwick zil_destroy(dmu_objset_zil(os), B_FALSE); 550789Sahrens dmu_objset_close(os); 551789Sahrens } 552789Sahrens 553789Sahrens return (dsl_dataset_destroy(name)); 554789Sahrens } 555789Sahrens 556789Sahrens int 557789Sahrens dmu_objset_rollback(const char *name) 558789Sahrens { 559789Sahrens int err; 560789Sahrens objset_t *os; 561789Sahrens 5622199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, 5632199Sahrens DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 564789Sahrens if (err == 0) { 565789Sahrens err = zil_suspend(dmu_objset_zil(os)); 566789Sahrens if (err == 0) 567789Sahrens zil_resume(dmu_objset_zil(os)); 568789Sahrens if (err == 0) { 569789Sahrens /* XXX uncache everything? */ 5702199Sahrens err = dsl_dataset_rollback(os->os->os_dsl_dataset); 571789Sahrens } 5722199Sahrens dmu_objset_close(os); 573789Sahrens } 574789Sahrens return (err); 575789Sahrens } 576789Sahrens 5772199Sahrens struct snaparg { 5782199Sahrens dsl_sync_task_group_t *dstg; 5792199Sahrens char *snapname; 5802199Sahrens char failed[MAXPATHLEN]; 5812199Sahrens }; 5822199Sahrens 5832199Sahrens static int 5842199Sahrens dmu_objset_snapshot_one(char *name, void *arg) 5852199Sahrens { 5862199Sahrens struct snaparg *sn = arg; 5872199Sahrens objset_t *os; 5882199Sahrens int err; 5892199Sahrens 5902199Sahrens (void) strcpy(sn->failed, name); 5912199Sahrens 5922199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); 5932199Sahrens if (err != 0) 5942199Sahrens return (err); 5952199Sahrens 5962199Sahrens /* 5972199Sahrens * NB: we need to wait for all in-flight changes to get to disk, 5982199Sahrens * so that we snapshot those changes. zil_suspend does this as 5992199Sahrens * a side effect. 6002199Sahrens */ 6012199Sahrens err = zil_suspend(dmu_objset_zil(os)); 6022199Sahrens if (err == 0) { 6032199Sahrens dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, 6042199Sahrens dsl_dataset_snapshot_sync, os, sn->snapname, 3); 6052199Sahrens } 6062199Sahrens return (err); 6072199Sahrens } 6082199Sahrens 6092199Sahrens int 6102199Sahrens dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) 6112199Sahrens { 6122199Sahrens dsl_sync_task_t *dst; 6132199Sahrens struct snaparg sn = { 0 }; 6142199Sahrens char *cp; 6152199Sahrens spa_t *spa; 6162199Sahrens int err; 6172199Sahrens 6182199Sahrens (void) strcpy(sn.failed, fsname); 6192199Sahrens 6202199Sahrens cp = strchr(fsname, '/'); 6212199Sahrens if (cp) { 6222199Sahrens *cp = '\0'; 6232199Sahrens err = spa_open(fsname, &spa, FTAG); 6242199Sahrens *cp = '/'; 6252199Sahrens } else { 6262199Sahrens err = spa_open(fsname, &spa, FTAG); 6272199Sahrens } 6282199Sahrens if (err) 6292199Sahrens return (err); 6302199Sahrens 6312199Sahrens sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 6322199Sahrens sn.snapname = snapname; 6332199Sahrens 6342417Sahrens if (recursive) { 6352417Sahrens err = dmu_objset_find(fsname, 6362417Sahrens dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 6372417Sahrens } else { 6382199Sahrens err = dmu_objset_snapshot_one(fsname, &sn); 6392417Sahrens } 6402199Sahrens 6412199Sahrens if (err) 6422199Sahrens goto out; 6432199Sahrens 6442199Sahrens err = dsl_sync_task_group_wait(sn.dstg); 6452199Sahrens 6462199Sahrens for (dst = list_head(&sn.dstg->dstg_tasks); dst; 6472199Sahrens dst = list_next(&sn.dstg->dstg_tasks, dst)) { 6482199Sahrens objset_t *os = dst->dst_arg1; 6492199Sahrens if (dst->dst_err) 6502199Sahrens dmu_objset_name(os, sn.failed); 6512199Sahrens zil_resume(dmu_objset_zil(os)); 6522199Sahrens dmu_objset_close(os); 6532199Sahrens } 6542199Sahrens out: 6552199Sahrens if (err) 6562199Sahrens (void) strcpy(fsname, sn.failed); 6572199Sahrens dsl_sync_task_group_destroy(sn.dstg); 6582199Sahrens spa_close(spa, FTAG); 6592199Sahrens return (err); 6602199Sahrens } 6612199Sahrens 662789Sahrens static void 663789Sahrens dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) 664789Sahrens { 665789Sahrens dnode_t *dn = list_head(list); 666789Sahrens int level, err; 667789Sahrens 668789Sahrens for (level = 0; dn = list_head(list); level++) { 669789Sahrens zio_t *zio; 670789Sahrens zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 671789Sahrens 672789Sahrens ASSERT3U(level, <=, DN_MAX_LEVELS); 673789Sahrens 674789Sahrens while (dn) { 675789Sahrens dnode_t *next = list_next(list, dn); 676789Sahrens 677789Sahrens list_remove(list, dn); 678789Sahrens if (dnode_sync(dn, level, zio, tx) == 0) { 679789Sahrens /* 680789Sahrens * This dnode requires syncing at higher 681789Sahrens * levels; put it back onto the list. 682789Sahrens */ 683789Sahrens if (next) 684789Sahrens list_insert_before(list, next, dn); 685789Sahrens else 686789Sahrens list_insert_tail(list, dn); 687789Sahrens } 688789Sahrens dn = next; 689789Sahrens } 690*2981Sahrens 691*2981Sahrens DTRACE_PROBE1(wait__begin, zio_t *, zio); 692789Sahrens err = zio_wait(zio); 693*2981Sahrens DTRACE_PROBE4(wait__end, zio_t *, zio, 694*2981Sahrens uint64_t, tx->tx_txg, objset_impl_t *, os, int, level); 695*2981Sahrens 696789Sahrens ASSERT(err == 0); 697789Sahrens } 698789Sahrens } 699789Sahrens 700789Sahrens /* ARGSUSED */ 701789Sahrens static void 702789Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 703789Sahrens { 704789Sahrens objset_impl_t *os = arg; 705789Sahrens objset_phys_t *osphys = zio->io_data; 706789Sahrens dnode_phys_t *dnp = &osphys->os_meta_dnode; 707789Sahrens int i; 708789Sahrens 709789Sahrens ASSERT3U(zio->io_error, ==, 0); 710789Sahrens 711789Sahrens /* 712789Sahrens * Update rootbp fill count. 713789Sahrens */ 714789Sahrens os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ 715789Sahrens for (i = 0; i < dnp->dn_nblkptr; i++) 716789Sahrens os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; 717789Sahrens 718789Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 719789Sahrens BP_SET_LEVEL(zio->io_bp, 0); 720789Sahrens 721789Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 722789Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 723789Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, 724789Sahrens os->os_synctx); 725789Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 726789Sahrens os->os_synctx); 727789Sahrens } 728789Sahrens } 729789Sahrens 730789Sahrens /* called from dsl */ 731789Sahrens void 732789Sahrens dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) 733789Sahrens { 734789Sahrens extern taskq_t *dbuf_tq; 735789Sahrens int txgoff; 736789Sahrens list_t *dirty_list; 737789Sahrens int err; 7381544Seschrock zbookmark_t zb; 739789Sahrens arc_buf_t *abuf = 740789Sahrens arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); 741789Sahrens 742789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 743789Sahrens ASSERT(os->os_synctx == NULL); 744789Sahrens /* XXX the write_done callback should really give us the tx... */ 745789Sahrens os->os_synctx = tx; 746789Sahrens 747789Sahrens dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 748789Sahrens 749789Sahrens txgoff = tx->tx_txg & TXG_MASK; 750789Sahrens 751789Sahrens dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); 752789Sahrens dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); 753789Sahrens 754789Sahrens /* 755789Sahrens * Free intent log blocks up to this tx. 756789Sahrens */ 757789Sahrens zil_sync(os->os_zil, tx); 758789Sahrens 759789Sahrens /* 760789Sahrens * Sync meta-dnode 761789Sahrens */ 762789Sahrens dirty_list = &os->os_dirty_dnodes[txgoff]; 763789Sahrens ASSERT(list_head(dirty_list) == NULL); 764789Sahrens list_insert_tail(dirty_list, os->os_meta_dnode); 765789Sahrens dmu_objset_sync_dnodes(os, dirty_list, tx); 766789Sahrens 767789Sahrens /* 768789Sahrens * Sync the root block. 769789Sahrens */ 770789Sahrens bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); 7711544Seschrock zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 7721544Seschrock zb.zb_object = 0; 7731544Seschrock zb.zb_level = -1; 7741544Seschrock zb.zb_blkid = 0; 775789Sahrens err = arc_write(NULL, os->os_spa, os->os_md_checksum, 7761775Sbillm os->os_md_compress, 7771775Sbillm dmu_get_replication_level(os->os_spa, &zb, DMU_OT_OBJSET), 7781775Sbillm tx->tx_txg, &os->os_rootbp, abuf, killer, os, 7791544Seschrock ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT, &zb); 780789Sahrens ASSERT(err == 0); 7811544Seschrock VERIFY(arc_buf_remove_ref(abuf, FTAG) == 1); 782789Sahrens 783789Sahrens dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); 784789Sahrens 785789Sahrens ASSERT3P(os->os_synctx, ==, tx); 786789Sahrens taskq_wait(dbuf_tq); 787789Sahrens os->os_synctx = NULL; 788789Sahrens } 789789Sahrens 790789Sahrens void 7912885Sahrens dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 7922885Sahrens uint64_t *usedobjsp, uint64_t *availobjsp) 7932885Sahrens { 7942885Sahrens dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp, 7952885Sahrens usedobjsp, availobjsp); 7962885Sahrens } 7972885Sahrens 7982885Sahrens uint64_t 7992885Sahrens dmu_objset_fsid_guid(objset_t *os) 8002885Sahrens { 8012885Sahrens return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset)); 8022885Sahrens } 8032885Sahrens 8042885Sahrens void 8052885Sahrens dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 806789Sahrens { 8072885Sahrens stat->dds_type = os->os->os_phys->os_type; 8082885Sahrens if (os->os->os_dsl_dataset) 8092885Sahrens dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat); 8102885Sahrens } 8112885Sahrens 8122885Sahrens void 8132885Sahrens dmu_objset_stats(objset_t *os, nvlist_t *nv) 8142885Sahrens { 8152885Sahrens ASSERT(os->os->os_dsl_dataset || 8162885Sahrens os->os->os_phys->os_type == DMU_OST_META); 8172885Sahrens 8182885Sahrens if (os->os->os_dsl_dataset != NULL) 8192885Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, nv); 8202885Sahrens 8212885Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 8222885Sahrens os->os->os_phys->os_type); 823789Sahrens } 824789Sahrens 825789Sahrens int 826789Sahrens dmu_objset_is_snapshot(objset_t *os) 827789Sahrens { 828789Sahrens if (os->os->os_dsl_dataset != NULL) 829789Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 830789Sahrens else 831789Sahrens return (B_FALSE); 832789Sahrens } 833789Sahrens 834789Sahrens int 835789Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 836885Sahrens uint64_t *idp, uint64_t *offp) 837789Sahrens { 838789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 839789Sahrens zap_cursor_t cursor; 840789Sahrens zap_attribute_t attr; 841789Sahrens 842789Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 843789Sahrens return (ENOENT); 844789Sahrens 845789Sahrens zap_cursor_init_serialized(&cursor, 846789Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 847789Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 848789Sahrens 849885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 850885Sahrens zap_cursor_fini(&cursor); 851885Sahrens return (ENOENT); 852885Sahrens } 853885Sahrens 854885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 855885Sahrens zap_cursor_fini(&cursor); 856885Sahrens return (ENAMETOOLONG); 857885Sahrens } 858885Sahrens 859885Sahrens (void) strcpy(name, attr.za_name); 860885Sahrens if (idp) 861885Sahrens *idp = attr.za_first_integer; 862885Sahrens zap_cursor_advance(&cursor); 863885Sahrens *offp = zap_cursor_serialize(&cursor); 864885Sahrens zap_cursor_fini(&cursor); 865885Sahrens 866885Sahrens return (0); 867885Sahrens } 868885Sahrens 869885Sahrens int 870885Sahrens dmu_dir_list_next(objset_t *os, int namelen, char *name, 871885Sahrens uint64_t *idp, uint64_t *offp) 872885Sahrens { 873885Sahrens dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 874885Sahrens zap_cursor_t cursor; 875885Sahrens zap_attribute_t attr; 876885Sahrens 877885Sahrens /* there is no next dir on a snapshot! */ 878885Sahrens if (os->os->os_dsl_dataset->ds_object != 879885Sahrens dd->dd_phys->dd_head_dataset_obj) 880885Sahrens return (ENOENT); 881885Sahrens 882885Sahrens zap_cursor_init_serialized(&cursor, 883885Sahrens dd->dd_pool->dp_meta_objset, 884885Sahrens dd->dd_phys->dd_child_dir_zapobj, *offp); 885885Sahrens 886885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 887885Sahrens zap_cursor_fini(&cursor); 888885Sahrens return (ENOENT); 889885Sahrens } 890885Sahrens 891885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 892885Sahrens zap_cursor_fini(&cursor); 893789Sahrens return (ENAMETOOLONG); 894885Sahrens } 895789Sahrens 896789Sahrens (void) strcpy(name, attr.za_name); 897885Sahrens if (idp) 898885Sahrens *idp = attr.za_first_integer; 899789Sahrens zap_cursor_advance(&cursor); 900789Sahrens *offp = zap_cursor_serialize(&cursor); 901885Sahrens zap_cursor_fini(&cursor); 902789Sahrens 903789Sahrens return (0); 904789Sahrens } 905789Sahrens 906789Sahrens /* 907789Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 908789Sahrens */ 9092199Sahrens int 9102199Sahrens dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 911789Sahrens { 912789Sahrens dsl_dir_t *dd; 913789Sahrens objset_t *os; 914789Sahrens uint64_t snapobj; 915789Sahrens zap_cursor_t zc; 916789Sahrens zap_attribute_t attr; 917789Sahrens char *child; 9181544Seschrock int do_self, err; 919789Sahrens 9201544Seschrock err = dsl_dir_open(name, FTAG, &dd, NULL); 9211544Seschrock if (err) 9222199Sahrens return (err); 923789Sahrens 9242199Sahrens /* NB: the $MOS dir doesn't have a head dataset */ 925789Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 926789Sahrens 927789Sahrens /* 928789Sahrens * Iterate over all children. 929789Sahrens */ 9302417Sahrens if (flags & DS_FIND_CHILDREN) { 9312417Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 9322417Sahrens dd->dd_phys->dd_child_dir_zapobj); 9332417Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 9342417Sahrens (void) zap_cursor_advance(&zc)) { 9352417Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 9362417Sahrens ASSERT(attr.za_num_integers == 1); 937789Sahrens 9382417Sahrens /* 9392417Sahrens * No separating '/' because parent's name ends in /. 9402417Sahrens */ 9412417Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 9422417Sahrens /* XXX could probably just use name here */ 9432417Sahrens dsl_dir_name(dd, child); 9442417Sahrens (void) strcat(child, "/"); 9452417Sahrens (void) strcat(child, attr.za_name); 9462417Sahrens err = dmu_objset_find(child, func, arg, flags); 9472417Sahrens kmem_free(child, MAXPATHLEN); 9482417Sahrens if (err) 9492417Sahrens break; 9502417Sahrens } 9512417Sahrens zap_cursor_fini(&zc); 9522199Sahrens 9532417Sahrens if (err) { 9542417Sahrens dsl_dir_close(dd, FTAG); 9552417Sahrens return (err); 9562417Sahrens } 957789Sahrens } 958789Sahrens 959789Sahrens /* 960789Sahrens * Iterate over all snapshots. 961789Sahrens */ 962789Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 963789Sahrens dmu_objset_open(name, DMU_OST_ANY, 964789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 965789Sahrens 966789Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 967789Sahrens dmu_objset_close(os); 968789Sahrens 969789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 970789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 971789Sahrens (void) zap_cursor_advance(&zc)) { 972789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 973789Sahrens ASSERT(attr.za_num_integers == 1); 974789Sahrens 975789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 976789Sahrens /* XXX could probably just use name here */ 977789Sahrens dsl_dir_name(dd, child); 978789Sahrens (void) strcat(child, "@"); 979789Sahrens (void) strcat(child, attr.za_name); 9802199Sahrens err = func(child, arg); 981789Sahrens kmem_free(child, MAXPATHLEN); 9822199Sahrens if (err) 9832199Sahrens break; 984789Sahrens } 985885Sahrens zap_cursor_fini(&zc); 986789Sahrens } 987789Sahrens 988789Sahrens dsl_dir_close(dd, FTAG); 989789Sahrens 9902199Sahrens if (err) 9912199Sahrens return (err); 9922199Sahrens 993789Sahrens /* 994789Sahrens * Apply to self if appropriate. 995789Sahrens */ 996789Sahrens if (do_self) 9972199Sahrens err = func(name, arg); 9982199Sahrens return (err); 999789Sahrens } 1000