1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221544Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/dmu_objset.h> 30789Sahrens #include <sys/dsl_dir.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/dsl_pool.h> 34789Sahrens #include <sys/dnode.h> 35789Sahrens #include <sys/dbuf.h> 36789Sahrens #include <sys/dmu_tx.h> 37789Sahrens #include <sys/zio_checksum.h> 38789Sahrens #include <sys/zap.h> 39789Sahrens #include <sys/zil.h> 40789Sahrens #include <sys/dmu_impl.h> 41789Sahrens 42789Sahrens 43789Sahrens spa_t * 44789Sahrens dmu_objset_spa(objset_t *os) 45789Sahrens { 46789Sahrens return (os->os->os_spa); 47789Sahrens } 48789Sahrens 49789Sahrens zilog_t * 50789Sahrens dmu_objset_zil(objset_t *os) 51789Sahrens { 52789Sahrens return (os->os->os_zil); 53789Sahrens } 54789Sahrens 55789Sahrens dsl_pool_t * 56789Sahrens dmu_objset_pool(objset_t *os) 57789Sahrens { 58789Sahrens dsl_dataset_t *ds; 59789Sahrens 60789Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 61789Sahrens return (ds->ds_dir->dd_pool); 62789Sahrens else 63789Sahrens return (spa_get_dsl(os->os->os_spa)); 64789Sahrens } 65789Sahrens 66789Sahrens dsl_dataset_t * 67789Sahrens dmu_objset_ds(objset_t *os) 68789Sahrens { 69789Sahrens return (os->os->os_dsl_dataset); 70789Sahrens } 71789Sahrens 72789Sahrens dmu_objset_type_t 73789Sahrens dmu_objset_type(objset_t *os) 74789Sahrens { 75789Sahrens return (os->os->os_phys->os_type); 76789Sahrens } 77789Sahrens 78789Sahrens void 79789Sahrens dmu_objset_name(objset_t *os, char *buf) 80789Sahrens { 81789Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 82789Sahrens } 83789Sahrens 84789Sahrens uint64_t 85789Sahrens dmu_objset_id(objset_t *os) 86789Sahrens { 87789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 88789Sahrens 89789Sahrens return (ds ? ds->ds_object : 0); 90789Sahrens } 91789Sahrens 92789Sahrens static void 93789Sahrens checksum_changed_cb(void *arg, uint64_t newval) 94789Sahrens { 95789Sahrens objset_impl_t *osi = arg; 96789Sahrens 97789Sahrens /* 98789Sahrens * Inheritance should have been done by now. 99789Sahrens */ 100789Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 101789Sahrens 102789Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 103789Sahrens } 104789Sahrens 105789Sahrens static void 106789Sahrens compression_changed_cb(void *arg, uint64_t newval) 107789Sahrens { 108789Sahrens objset_impl_t *osi = arg; 109789Sahrens 110789Sahrens /* 111789Sahrens * Inheritance and range checking should have been done by now. 112789Sahrens */ 113789Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 114789Sahrens 115789Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 116789Sahrens } 117789Sahrens 118789Sahrens void 119789Sahrens dmu_objset_byteswap(void *buf, size_t size) 120789Sahrens { 121789Sahrens objset_phys_t *osp = buf; 122789Sahrens 123789Sahrens ASSERT(size == sizeof (objset_phys_t)); 124789Sahrens dnode_byteswap(&osp->os_meta_dnode); 125789Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 126789Sahrens osp->os_type = BSWAP_64(osp->os_type); 127789Sahrens } 128789Sahrens 1291544Seschrock int 1301544Seschrock dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 1311544Seschrock objset_impl_t **osip) 132789Sahrens { 133789Sahrens objset_impl_t *winner, *osi; 134789Sahrens int i, err, checksum; 135789Sahrens 136789Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 137789Sahrens osi->os.os = osi; 138789Sahrens osi->os_dsl_dataset = ds; 139789Sahrens osi->os_spa = spa; 140789Sahrens if (bp) 141789Sahrens osi->os_rootbp = *bp; 142789Sahrens osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); 143789Sahrens if (!BP_IS_HOLE(&osi->os_rootbp)) { 1441544Seschrock zbookmark_t zb; 1451544Seschrock zb.zb_objset = ds ? ds->ds_object : 0; 1461544Seschrock zb.zb_object = 0; 1471544Seschrock zb.zb_level = -1; 1481544Seschrock zb.zb_blkid = 0; 1491544Seschrock 150789Sahrens dprintf_bp(&osi->os_rootbp, "reading %s", ""); 1511544Seschrock err = arc_read(NULL, spa, &osi->os_rootbp, 152789Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 153789Sahrens arc_bcopy_func, osi->os_phys, 1541544Seschrock ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, ARC_WAIT, &zb); 1551544Seschrock if (err) { 1561544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1571544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1581544Seschrock return (err); 1591544Seschrock } 160789Sahrens } else { 161789Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 162789Sahrens } 163789Sahrens 164789Sahrens /* 165789Sahrens * Note: the changed_cb will be called once before the register 166789Sahrens * func returns, thus changing the checksum/compression from the 167*2082Seschrock * default (fletcher2/off). Snapshots don't need to know, and 168*2082Seschrock * registering would complicate clone promotion. 169789Sahrens */ 170*2082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 171789Sahrens err = dsl_prop_register(ds, "checksum", 172789Sahrens checksum_changed_cb, osi); 1731544Seschrock if (err == 0) 1741544Seschrock err = dsl_prop_register(ds, "compression", 1751544Seschrock compression_changed_cb, osi); 1761544Seschrock if (err) { 1771544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1781544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1791544Seschrock return (err); 1801544Seschrock } 181*2082Seschrock } else if (ds == NULL) { 182789Sahrens /* It's the meta-objset. */ 183789Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 1841544Seschrock osi->os_compress = ZIO_COMPRESS_LZJB; 185789Sahrens } 186789Sahrens 1871544Seschrock osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 1881544Seschrock 189789Sahrens /* 190789Sahrens * Metadata always gets compressed and checksummed. 191789Sahrens * If the data checksum is multi-bit correctable, and it's not 192789Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 193789Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 194789Sahrens */ 195789Sahrens checksum = osi->os_checksum; 196789Sahrens 197789Sahrens if (zio_checksum_table[checksum].ci_correctable && 198789Sahrens !zio_checksum_table[checksum].ci_zbt) 199789Sahrens osi->os_md_checksum = checksum; 200789Sahrens else 201789Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 2021544Seschrock osi->os_md_compress = ZIO_COMPRESS_LZJB; 203789Sahrens 204789Sahrens for (i = 0; i < TXG_SIZE; i++) { 205789Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 206789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 207789Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 208789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 209789Sahrens } 210789Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 211789Sahrens offsetof(dnode_t, dn_link)); 212789Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 213789Sahrens offsetof(dmu_buf_impl_t, db_link)); 214789Sahrens 215789Sahrens osi->os_meta_dnode = dnode_special_open(osi, 216789Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 217789Sahrens 218789Sahrens if (ds != NULL) { 219789Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 220789Sahrens if (winner) { 221789Sahrens dmu_objset_evict(ds, osi); 222789Sahrens osi = winner; 223789Sahrens } 224789Sahrens } 225789Sahrens 2261544Seschrock *osip = osi; 2271544Seschrock return (0); 228789Sahrens } 229789Sahrens 230789Sahrens /* called from zpl */ 231789Sahrens int 232789Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 233789Sahrens objset_t **osp) 234789Sahrens { 235789Sahrens dsl_dataset_t *ds; 236789Sahrens int err; 237789Sahrens objset_t *os; 238789Sahrens objset_impl_t *osi; 239789Sahrens 240789Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 241789Sahrens err = dsl_dataset_open(name, mode, os, &ds); 242789Sahrens if (err) { 243789Sahrens kmem_free(os, sizeof (objset_t)); 244789Sahrens return (err); 245789Sahrens } 246789Sahrens 247789Sahrens osi = dsl_dataset_get_user_ptr(ds); 248789Sahrens if (osi == NULL) { 249789Sahrens blkptr_t bp; 250789Sahrens 251789Sahrens dsl_dataset_get_blkptr(ds, &bp); 2521544Seschrock err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 2531544Seschrock ds, &bp, &osi); 2541544Seschrock if (err) { 2551544Seschrock dsl_dataset_close(ds, mode, os); 2561544Seschrock kmem_free(os, sizeof (objset_t)); 2571544Seschrock return (err); 2581544Seschrock } 259789Sahrens } 260789Sahrens 261789Sahrens os->os = osi; 262789Sahrens os->os_mode = mode; 263789Sahrens 264789Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 265789Sahrens dmu_objset_close(os); 266789Sahrens return (EINVAL); 267789Sahrens } 268789Sahrens *osp = os; 269789Sahrens return (0); 270789Sahrens } 271789Sahrens 272789Sahrens void 273789Sahrens dmu_objset_close(objset_t *os) 274789Sahrens { 275789Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 276789Sahrens kmem_free(os, sizeof (objset_t)); 277789Sahrens } 278789Sahrens 2791646Sperrin int 2801646Sperrin dmu_objset_evict_dbufs(objset_t *os, int try) 2811544Seschrock { 2821544Seschrock objset_impl_t *osi = os->os; 2831544Seschrock dnode_t *dn; 2841596Sahrens 2851596Sahrens mutex_enter(&osi->os_lock); 2861596Sahrens 2871596Sahrens /* process the mdn last, since the other dnodes have holds on it */ 2881596Sahrens list_remove(&osi->os_dnodes, osi->os_meta_dnode); 2891596Sahrens list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 2901544Seschrock 2911544Seschrock /* 2921596Sahrens * Find the first dnode with holds. We have to do this dance 2931596Sahrens * because dnode_add_ref() only works if you already have a 2941596Sahrens * hold. If there are no holds then it has no dbufs so OK to 2951596Sahrens * skip. 2961544Seschrock */ 2971596Sahrens for (dn = list_head(&osi->os_dnodes); 2981596Sahrens dn && refcount_is_zero(&dn->dn_holds); 2991596Sahrens dn = list_next(&osi->os_dnodes, dn)) 3001596Sahrens continue; 3011596Sahrens if (dn) 3021596Sahrens dnode_add_ref(dn, FTAG); 3031596Sahrens 3041596Sahrens while (dn) { 3051596Sahrens dnode_t *next_dn = dn; 3061596Sahrens 3071596Sahrens do { 3081596Sahrens next_dn = list_next(&osi->os_dnodes, next_dn); 3091596Sahrens } while (next_dn && refcount_is_zero(&next_dn->dn_holds)); 3101596Sahrens if (next_dn) 3111596Sahrens dnode_add_ref(next_dn, FTAG); 3121596Sahrens 3131596Sahrens mutex_exit(&osi->os_lock); 3141646Sperrin if (dnode_evict_dbufs(dn, try)) { 3151646Sperrin dnode_rele(dn, FTAG); 3161646Sperrin if (next_dn) 3171646Sperrin dnode_rele(next_dn, FTAG); 3181646Sperrin return (1); 3191646Sperrin } 3201596Sahrens dnode_rele(dn, FTAG); 3211596Sahrens mutex_enter(&osi->os_lock); 3221596Sahrens dn = next_dn; 3231544Seschrock } 3241544Seschrock mutex_exit(&osi->os_lock); 3251646Sperrin return (0); 3261544Seschrock } 3271544Seschrock 3281544Seschrock void 329789Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 330789Sahrens { 331789Sahrens objset_impl_t *osi = arg; 3321544Seschrock objset_t os; 333*2082Seschrock int i; 334789Sahrens 335789Sahrens for (i = 0; i < TXG_SIZE; i++) { 336789Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 337789Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 338789Sahrens } 339789Sahrens 340*2082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 341*2082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "checksum", 342*2082Seschrock checksum_changed_cb, osi)); 343*2082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "compression", 344*2082Seschrock compression_changed_cb, osi)); 345789Sahrens } 346789Sahrens 3471544Seschrock /* 3481544Seschrock * We should need only a single pass over the dnode list, since 3491544Seschrock * nothing can be added to the list at this point. 3501544Seschrock */ 3511544Seschrock os.os = osi; 3521646Sperrin (void) dmu_objset_evict_dbufs(&os, 0); 3531544Seschrock 354789Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 355789Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 356789Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 357789Sahrens 358789Sahrens dnode_special_close(osi->os_meta_dnode); 359789Sahrens zil_free(osi->os_zil); 360789Sahrens 361789Sahrens zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 362789Sahrens kmem_free(osi, sizeof (objset_impl_t)); 363789Sahrens } 364789Sahrens 365789Sahrens /* called from dsl for meta-objset */ 366789Sahrens objset_impl_t * 367789Sahrens dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, 368789Sahrens dmu_tx_t *tx) 369789Sahrens { 370789Sahrens objset_impl_t *osi; 371789Sahrens dnode_t *mdn; 372789Sahrens 373789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 3741544Seschrock VERIFY(0 == dmu_objset_open_impl(spa, ds, NULL, &osi)); 375789Sahrens mdn = osi->os_meta_dnode; 376789Sahrens 377789Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 378789Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 379789Sahrens 380789Sahrens /* 381789Sahrens * We don't want to have to increase the meta-dnode's nlevels 382789Sahrens * later, because then we could do it in quescing context while 383789Sahrens * we are also accessing it in open context. 384789Sahrens * 385789Sahrens * This precaution is not necessary for the MOS (ds == NULL), 386789Sahrens * because the MOS is only updated in syncing context. 387789Sahrens * This is most fortunate: the MOS is the only objset that 388789Sahrens * needs to be synced multiple times as spa_sync() iterates 389789Sahrens * to convergence, so minimizing its dn_nlevels matters. 390789Sahrens */ 3911544Seschrock if (ds != NULL) { 3921544Seschrock int levels = 1; 3931544Seschrock 3941544Seschrock /* 3951544Seschrock * Determine the number of levels necessary for the meta-dnode 3961544Seschrock * to contain DN_MAX_OBJECT dnodes. 3971544Seschrock */ 3981544Seschrock while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 3991544Seschrock (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 4001544Seschrock DN_MAX_OBJECT * sizeof (dnode_phys_t)) 4011544Seschrock levels++; 4021544Seschrock 403789Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 4041544Seschrock mdn->dn_nlevels = levels; 4051544Seschrock } 406789Sahrens 407789Sahrens ASSERT(type != DMU_OST_NONE); 408789Sahrens ASSERT(type != DMU_OST_ANY); 409789Sahrens ASSERT(type < DMU_OST_NUMTYPES); 410789Sahrens osi->os_phys->os_type = type; 411789Sahrens 412789Sahrens dsl_dataset_dirty(ds, tx); 413789Sahrens 414789Sahrens return (osi); 415789Sahrens } 416789Sahrens 417789Sahrens struct oscarg { 418789Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 419789Sahrens void *userarg; 420789Sahrens dsl_dataset_t *clone_parent; 421789Sahrens const char *fullname; 422789Sahrens const char *lastname; 423789Sahrens dmu_objset_type_t type; 424789Sahrens }; 425789Sahrens 426789Sahrens static int 427789Sahrens dmu_objset_create_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 428789Sahrens { 429789Sahrens struct oscarg *oa = arg; 430789Sahrens dsl_dataset_t *ds; 431789Sahrens int err; 432789Sahrens blkptr_t bp; 433789Sahrens 434789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 435789Sahrens 436789Sahrens err = dsl_dataset_create_sync(dd, oa->fullname, oa->lastname, 437789Sahrens oa->clone_parent, tx); 438789Sahrens dprintf_dd(dd, "fn=%s ln=%s err=%d\n", 439789Sahrens oa->fullname, oa->lastname, err); 440789Sahrens if (err) 441789Sahrens return (err); 442789Sahrens 4431544Seschrock VERIFY(0 == dsl_dataset_open_spa(dd->dd_pool->dp_spa, oa->fullname, 4441544Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); 445789Sahrens dsl_dataset_get_blkptr(ds, &bp); 446789Sahrens if (BP_IS_HOLE(&bp)) { 447789Sahrens objset_impl_t *osi; 448789Sahrens 449789Sahrens /* This is an empty dmu_objset; not a clone. */ 450789Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 451789Sahrens ds, oa->type, tx); 452789Sahrens 453789Sahrens if (oa->userfunc) 454789Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 455789Sahrens } 456789Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 457789Sahrens 458789Sahrens return (0); 459789Sahrens } 460789Sahrens 461789Sahrens int 462789Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 463789Sahrens objset_t *clone_parent, 464789Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 465789Sahrens { 466789Sahrens dsl_dir_t *pds; 467789Sahrens const char *tail; 468789Sahrens int err = 0; 469789Sahrens 4701544Seschrock err = dsl_dir_open(name, FTAG, &pds, &tail); 4711544Seschrock if (err) 4721544Seschrock return (err); 473789Sahrens if (tail == NULL) { 474789Sahrens dsl_dir_close(pds, FTAG); 475789Sahrens return (EEXIST); 476789Sahrens } 477789Sahrens 478789Sahrens dprintf("name=%s\n", name); 479789Sahrens 480789Sahrens if (tail[0] == '@') { 481789Sahrens /* 482789Sahrens * If we're creating a snapshot, make sure everything 483789Sahrens * they might want is on disk. XXX Sketchy to know 484789Sahrens * about snapshots here, better to put in DSL. 485789Sahrens */ 486789Sahrens objset_t *os; 487789Sahrens size_t plen = strchr(name, '@') - name + 1; 488789Sahrens char *pbuf = kmem_alloc(plen, KM_SLEEP); 489789Sahrens bcopy(name, pbuf, plen - 1); 490789Sahrens pbuf[plen - 1] = '\0'; 491789Sahrens 492789Sahrens err = dmu_objset_open(pbuf, DMU_OST_ANY, DS_MODE_STANDARD, &os); 493789Sahrens if (err == 0) { 494789Sahrens err = zil_suspend(dmu_objset_zil(os)); 495789Sahrens if (err == 0) { 496789Sahrens err = dsl_dir_sync_task(pds, 497789Sahrens dsl_dataset_snapshot_sync, 498789Sahrens (void*)(tail+1), 16*1024); 499789Sahrens zil_resume(dmu_objset_zil(os)); 500789Sahrens } 501789Sahrens dmu_objset_close(os); 502789Sahrens } 503789Sahrens kmem_free(pbuf, plen); 504789Sahrens } else { 505789Sahrens struct oscarg oa = { 0 }; 506789Sahrens oa.userfunc = func; 507789Sahrens oa.userarg = arg; 508789Sahrens oa.fullname = name; 509789Sahrens oa.lastname = tail; 510789Sahrens oa.type = type; 511789Sahrens if (clone_parent != NULL) { 512789Sahrens /* 513789Sahrens * You can't clone to a different type. 514789Sahrens */ 515789Sahrens if (clone_parent->os->os_phys->os_type != type) { 516789Sahrens dsl_dir_close(pds, FTAG); 517789Sahrens return (EINVAL); 518789Sahrens } 519789Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 520789Sahrens } 521789Sahrens err = dsl_dir_sync_task(pds, dmu_objset_create_sync, &oa, 522789Sahrens 256*1024); 523789Sahrens } 524789Sahrens dsl_dir_close(pds, FTAG); 525789Sahrens return (err); 526789Sahrens } 527789Sahrens 528789Sahrens int 529789Sahrens dmu_objset_destroy(const char *name) 530789Sahrens { 531789Sahrens objset_t *os; 532789Sahrens int error; 533789Sahrens 534789Sahrens /* 535789Sahrens * If it looks like we'll be able to destroy it, and there's 536789Sahrens * an unplayed replay log sitting around, destroy the log. 537789Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 538789Sahrens * but the replay log objset is modified in open context. 539789Sahrens */ 540789Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 541789Sahrens if (error == 0) { 5421807Sbonwick zil_destroy(dmu_objset_zil(os), B_FALSE); 543789Sahrens dmu_objset_close(os); 544789Sahrens } 545789Sahrens 546789Sahrens /* XXX uncache everything? */ 547789Sahrens return (dsl_dataset_destroy(name)); 548789Sahrens } 549789Sahrens 550789Sahrens int 551789Sahrens dmu_objset_rollback(const char *name) 552789Sahrens { 553789Sahrens int err; 554789Sahrens objset_t *os; 555789Sahrens 556789Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 557789Sahrens if (err == 0) { 558789Sahrens err = zil_suspend(dmu_objset_zil(os)); 559789Sahrens if (err == 0) 560789Sahrens zil_resume(dmu_objset_zil(os)); 561789Sahrens dmu_objset_close(os); 562789Sahrens if (err == 0) { 563789Sahrens /* XXX uncache everything? */ 564789Sahrens err = dsl_dataset_rollback(name); 565789Sahrens } 566789Sahrens } 567789Sahrens return (err); 568789Sahrens } 569789Sahrens 570789Sahrens static void 571789Sahrens dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) 572789Sahrens { 573789Sahrens dnode_t *dn = list_head(list); 574789Sahrens int level, err; 575789Sahrens 576789Sahrens for (level = 0; dn = list_head(list); level++) { 577789Sahrens zio_t *zio; 578789Sahrens zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 579789Sahrens 580789Sahrens ASSERT3U(level, <=, DN_MAX_LEVELS); 581789Sahrens 582789Sahrens while (dn) { 583789Sahrens dnode_t *next = list_next(list, dn); 584789Sahrens 585789Sahrens list_remove(list, dn); 586789Sahrens if (dnode_sync(dn, level, zio, tx) == 0) { 587789Sahrens /* 588789Sahrens * This dnode requires syncing at higher 589789Sahrens * levels; put it back onto the list. 590789Sahrens */ 591789Sahrens if (next) 592789Sahrens list_insert_before(list, next, dn); 593789Sahrens else 594789Sahrens list_insert_tail(list, dn); 595789Sahrens } 596789Sahrens dn = next; 597789Sahrens } 598789Sahrens err = zio_wait(zio); 599789Sahrens ASSERT(err == 0); 600789Sahrens } 601789Sahrens } 602789Sahrens 603789Sahrens /* ARGSUSED */ 604789Sahrens static void 605789Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 606789Sahrens { 607789Sahrens objset_impl_t *os = arg; 608789Sahrens objset_phys_t *osphys = zio->io_data; 609789Sahrens dnode_phys_t *dnp = &osphys->os_meta_dnode; 610789Sahrens int i; 611789Sahrens 612789Sahrens ASSERT3U(zio->io_error, ==, 0); 613789Sahrens 614789Sahrens /* 615789Sahrens * Update rootbp fill count. 616789Sahrens */ 617789Sahrens os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ 618789Sahrens for (i = 0; i < dnp->dn_nblkptr; i++) 619789Sahrens os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; 620789Sahrens 621789Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 622789Sahrens BP_SET_LEVEL(zio->io_bp, 0); 623789Sahrens 624789Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 625789Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 626789Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, 627789Sahrens os->os_synctx); 628789Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 629789Sahrens os->os_synctx); 630789Sahrens } 631789Sahrens } 632789Sahrens 633789Sahrens 634789Sahrens /* called from dsl */ 635789Sahrens void 636789Sahrens dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) 637789Sahrens { 638789Sahrens extern taskq_t *dbuf_tq; 639789Sahrens int txgoff; 640789Sahrens list_t *dirty_list; 641789Sahrens int err; 6421544Seschrock zbookmark_t zb; 643789Sahrens arc_buf_t *abuf = 644789Sahrens arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); 645789Sahrens 646789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 647789Sahrens ASSERT(os->os_synctx == NULL); 648789Sahrens /* XXX the write_done callback should really give us the tx... */ 649789Sahrens os->os_synctx = tx; 650789Sahrens 651789Sahrens dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 652789Sahrens 653789Sahrens txgoff = tx->tx_txg & TXG_MASK; 654789Sahrens 655789Sahrens dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); 656789Sahrens dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); 657789Sahrens 658789Sahrens /* 659789Sahrens * Free intent log blocks up to this tx. 660789Sahrens */ 661789Sahrens zil_sync(os->os_zil, tx); 662789Sahrens 663789Sahrens /* 664789Sahrens * Sync meta-dnode 665789Sahrens */ 666789Sahrens dirty_list = &os->os_dirty_dnodes[txgoff]; 667789Sahrens ASSERT(list_head(dirty_list) == NULL); 668789Sahrens list_insert_tail(dirty_list, os->os_meta_dnode); 669789Sahrens dmu_objset_sync_dnodes(os, dirty_list, tx); 670789Sahrens 671789Sahrens /* 672789Sahrens * Sync the root block. 673789Sahrens */ 674789Sahrens bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); 6751544Seschrock zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 6761544Seschrock zb.zb_object = 0; 6771544Seschrock zb.zb_level = -1; 6781544Seschrock zb.zb_blkid = 0; 679789Sahrens err = arc_write(NULL, os->os_spa, os->os_md_checksum, 6801775Sbillm os->os_md_compress, 6811775Sbillm dmu_get_replication_level(os->os_spa, &zb, DMU_OT_OBJSET), 6821775Sbillm tx->tx_txg, &os->os_rootbp, abuf, killer, os, 6831544Seschrock ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT, &zb); 684789Sahrens ASSERT(err == 0); 6851544Seschrock VERIFY(arc_buf_remove_ref(abuf, FTAG) == 1); 686789Sahrens 687789Sahrens dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); 688789Sahrens 689789Sahrens ASSERT3P(os->os_synctx, ==, tx); 690789Sahrens taskq_wait(dbuf_tq); 691789Sahrens os->os_synctx = NULL; 692789Sahrens } 693789Sahrens 694789Sahrens void 695789Sahrens dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds) 696789Sahrens { 697789Sahrens if (os->os->os_dsl_dataset != NULL) { 698789Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, dds); 699789Sahrens } else { 700789Sahrens ASSERT(os->os->os_phys->os_type == DMU_OST_META); 701789Sahrens bzero(dds, sizeof (*dds)); 702789Sahrens } 703789Sahrens dds->dds_type = os->os->os_phys->os_type; 704789Sahrens } 705789Sahrens 706789Sahrens int 707789Sahrens dmu_objset_is_snapshot(objset_t *os) 708789Sahrens { 709789Sahrens if (os->os->os_dsl_dataset != NULL) 710789Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 711789Sahrens else 712789Sahrens return (B_FALSE); 713789Sahrens } 714789Sahrens 715789Sahrens int 716789Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 717885Sahrens uint64_t *idp, uint64_t *offp) 718789Sahrens { 719789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 720789Sahrens zap_cursor_t cursor; 721789Sahrens zap_attribute_t attr; 722789Sahrens 723789Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 724789Sahrens return (ENOENT); 725789Sahrens 726789Sahrens zap_cursor_init_serialized(&cursor, 727789Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 728789Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 729789Sahrens 730885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 731885Sahrens zap_cursor_fini(&cursor); 732885Sahrens return (ENOENT); 733885Sahrens } 734885Sahrens 735885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 736885Sahrens zap_cursor_fini(&cursor); 737885Sahrens return (ENAMETOOLONG); 738885Sahrens } 739885Sahrens 740885Sahrens (void) strcpy(name, attr.za_name); 741885Sahrens if (idp) 742885Sahrens *idp = attr.za_first_integer; 743885Sahrens zap_cursor_advance(&cursor); 744885Sahrens *offp = zap_cursor_serialize(&cursor); 745885Sahrens zap_cursor_fini(&cursor); 746885Sahrens 747885Sahrens return (0); 748885Sahrens } 749885Sahrens 750885Sahrens int 751885Sahrens dmu_dir_list_next(objset_t *os, int namelen, char *name, 752885Sahrens uint64_t *idp, uint64_t *offp) 753885Sahrens { 754885Sahrens dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 755885Sahrens zap_cursor_t cursor; 756885Sahrens zap_attribute_t attr; 757885Sahrens 758885Sahrens if (dd->dd_phys->dd_child_dir_zapobj == 0) 759789Sahrens return (ENOENT); 760789Sahrens 761885Sahrens /* there is no next dir on a snapshot! */ 762885Sahrens if (os->os->os_dsl_dataset->ds_object != 763885Sahrens dd->dd_phys->dd_head_dataset_obj) 764885Sahrens return (ENOENT); 765885Sahrens 766885Sahrens zap_cursor_init_serialized(&cursor, 767885Sahrens dd->dd_pool->dp_meta_objset, 768885Sahrens dd->dd_phys->dd_child_dir_zapobj, *offp); 769885Sahrens 770885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 771885Sahrens zap_cursor_fini(&cursor); 772885Sahrens return (ENOENT); 773885Sahrens } 774885Sahrens 775885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 776885Sahrens zap_cursor_fini(&cursor); 777789Sahrens return (ENAMETOOLONG); 778885Sahrens } 779789Sahrens 780789Sahrens (void) strcpy(name, attr.za_name); 781885Sahrens if (idp) 782885Sahrens *idp = attr.za_first_integer; 783789Sahrens zap_cursor_advance(&cursor); 784789Sahrens *offp = zap_cursor_serialize(&cursor); 785885Sahrens zap_cursor_fini(&cursor); 786789Sahrens 787789Sahrens return (0); 788789Sahrens } 789789Sahrens 790789Sahrens /* 791789Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 792789Sahrens */ 793789Sahrens void 794789Sahrens dmu_objset_find(char *name, void func(char *, void *), void *arg, int flags) 795789Sahrens { 796789Sahrens dsl_dir_t *dd; 797789Sahrens objset_t *os; 798789Sahrens uint64_t snapobj; 799789Sahrens zap_cursor_t zc; 800789Sahrens zap_attribute_t attr; 801789Sahrens char *child; 8021544Seschrock int do_self, err; 803789Sahrens 8041544Seschrock err = dsl_dir_open(name, FTAG, &dd, NULL); 8051544Seschrock if (err) 806789Sahrens return; 807789Sahrens 808789Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 809789Sahrens 810789Sahrens /* 811789Sahrens * Iterate over all children. 812789Sahrens */ 813789Sahrens if (dd->dd_phys->dd_child_dir_zapobj != 0) { 814789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 815789Sahrens dd->dd_phys->dd_child_dir_zapobj); 816789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 817789Sahrens (void) zap_cursor_advance(&zc)) { 818789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 819789Sahrens ASSERT(attr.za_num_integers == 1); 820789Sahrens 821789Sahrens /* 822789Sahrens * No separating '/' because parent's name ends in /. 823789Sahrens */ 824789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 825789Sahrens /* XXX could probably just use name here */ 826789Sahrens dsl_dir_name(dd, child); 827789Sahrens (void) strcat(child, "/"); 828789Sahrens (void) strcat(child, attr.za_name); 829789Sahrens dmu_objset_find(child, func, arg, flags); 830789Sahrens kmem_free(child, MAXPATHLEN); 831789Sahrens } 832885Sahrens zap_cursor_fini(&zc); 833789Sahrens } 834789Sahrens 835789Sahrens /* 836789Sahrens * Iterate over all snapshots. 837789Sahrens */ 838789Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 839789Sahrens dmu_objset_open(name, DMU_OST_ANY, 840789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 841789Sahrens 842789Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 843789Sahrens dmu_objset_close(os); 844789Sahrens 845789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 846789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 847789Sahrens (void) zap_cursor_advance(&zc)) { 848789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 849789Sahrens ASSERT(attr.za_num_integers == 1); 850789Sahrens 851789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 852789Sahrens /* XXX could probably just use name here */ 853789Sahrens dsl_dir_name(dd, child); 854789Sahrens (void) strcat(child, "@"); 855789Sahrens (void) strcat(child, attr.za_name); 856789Sahrens func(child, arg); 857789Sahrens kmem_free(child, MAXPATHLEN); 858789Sahrens } 859885Sahrens zap_cursor_fini(&zc); 860789Sahrens } 861789Sahrens 862789Sahrens dsl_dir_close(dd, FTAG); 863789Sahrens 864789Sahrens /* 865789Sahrens * Apply to self if appropriate. 866789Sahrens */ 867789Sahrens if (do_self) 868789Sahrens func(name, arg); 869789Sahrens } 870