1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221544Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/dmu_objset.h> 30789Sahrens #include <sys/dsl_dir.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/dsl_pool.h> 342199Sahrens #include <sys/dsl_synctask.h> 35789Sahrens #include <sys/dnode.h> 36789Sahrens #include <sys/dbuf.h> 37789Sahrens #include <sys/dmu_tx.h> 38789Sahrens #include <sys/zio_checksum.h> 39789Sahrens #include <sys/zap.h> 40789Sahrens #include <sys/zil.h> 41789Sahrens #include <sys/dmu_impl.h> 42789Sahrens 43789Sahrens 44789Sahrens spa_t * 45789Sahrens dmu_objset_spa(objset_t *os) 46789Sahrens { 47789Sahrens return (os->os->os_spa); 48789Sahrens } 49789Sahrens 50789Sahrens zilog_t * 51789Sahrens dmu_objset_zil(objset_t *os) 52789Sahrens { 53789Sahrens return (os->os->os_zil); 54789Sahrens } 55789Sahrens 56789Sahrens dsl_pool_t * 57789Sahrens dmu_objset_pool(objset_t *os) 58789Sahrens { 59789Sahrens dsl_dataset_t *ds; 60789Sahrens 61789Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 62789Sahrens return (ds->ds_dir->dd_pool); 63789Sahrens else 64789Sahrens return (spa_get_dsl(os->os->os_spa)); 65789Sahrens } 66789Sahrens 67789Sahrens dsl_dataset_t * 68789Sahrens dmu_objset_ds(objset_t *os) 69789Sahrens { 70789Sahrens return (os->os->os_dsl_dataset); 71789Sahrens } 72789Sahrens 73789Sahrens dmu_objset_type_t 74789Sahrens dmu_objset_type(objset_t *os) 75789Sahrens { 76789Sahrens return (os->os->os_phys->os_type); 77789Sahrens } 78789Sahrens 79789Sahrens void 80789Sahrens dmu_objset_name(objset_t *os, char *buf) 81789Sahrens { 82789Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 83789Sahrens } 84789Sahrens 85789Sahrens uint64_t 86789Sahrens dmu_objset_id(objset_t *os) 87789Sahrens { 88789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 89789Sahrens 90789Sahrens return (ds ? ds->ds_object : 0); 91789Sahrens } 92789Sahrens 93789Sahrens static void 94789Sahrens checksum_changed_cb(void *arg, uint64_t newval) 95789Sahrens { 96789Sahrens objset_impl_t *osi = arg; 97789Sahrens 98789Sahrens /* 99789Sahrens * Inheritance should have been done by now. 100789Sahrens */ 101789Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 102789Sahrens 103789Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 104789Sahrens } 105789Sahrens 106789Sahrens static void 107789Sahrens compression_changed_cb(void *arg, uint64_t newval) 108789Sahrens { 109789Sahrens objset_impl_t *osi = arg; 110789Sahrens 111789Sahrens /* 112789Sahrens * Inheritance and range checking should have been done by now. 113789Sahrens */ 114789Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 115789Sahrens 116789Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 117789Sahrens } 118789Sahrens 119789Sahrens void 120789Sahrens dmu_objset_byteswap(void *buf, size_t size) 121789Sahrens { 122789Sahrens objset_phys_t *osp = buf; 123789Sahrens 124789Sahrens ASSERT(size == sizeof (objset_phys_t)); 125789Sahrens dnode_byteswap(&osp->os_meta_dnode); 126789Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 127789Sahrens osp->os_type = BSWAP_64(osp->os_type); 128789Sahrens } 129789Sahrens 1301544Seschrock int 1311544Seschrock dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 1321544Seschrock objset_impl_t **osip) 133789Sahrens { 134789Sahrens objset_impl_t *winner, *osi; 135789Sahrens int i, err, checksum; 136789Sahrens 137789Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 138789Sahrens osi->os.os = osi; 139789Sahrens osi->os_dsl_dataset = ds; 140789Sahrens osi->os_spa = spa; 141789Sahrens if (bp) 142789Sahrens osi->os_rootbp = *bp; 143789Sahrens osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); 144789Sahrens if (!BP_IS_HOLE(&osi->os_rootbp)) { 1452391Smaybee uint32_t aflags = ARC_WAIT; 1461544Seschrock zbookmark_t zb; 1471544Seschrock zb.zb_objset = ds ? ds->ds_object : 0; 1481544Seschrock zb.zb_object = 0; 1491544Seschrock zb.zb_level = -1; 1501544Seschrock zb.zb_blkid = 0; 1511544Seschrock 152789Sahrens dprintf_bp(&osi->os_rootbp, "reading %s", ""); 1531544Seschrock err = arc_read(NULL, spa, &osi->os_rootbp, 154789Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 155789Sahrens arc_bcopy_func, osi->os_phys, 1562391Smaybee ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 1571544Seschrock if (err) { 1581544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1591544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1601544Seschrock return (err); 1611544Seschrock } 162789Sahrens } else { 163789Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 164789Sahrens } 165789Sahrens 166789Sahrens /* 167789Sahrens * Note: the changed_cb will be called once before the register 168789Sahrens * func returns, thus changing the checksum/compression from the 1692082Seschrock * default (fletcher2/off). Snapshots don't need to know, and 1702082Seschrock * registering would complicate clone promotion. 171789Sahrens */ 1722082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 173789Sahrens err = dsl_prop_register(ds, "checksum", 174789Sahrens checksum_changed_cb, osi); 1751544Seschrock if (err == 0) 1761544Seschrock err = dsl_prop_register(ds, "compression", 1771544Seschrock compression_changed_cb, osi); 1781544Seschrock if (err) { 1791544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1801544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1811544Seschrock return (err); 1821544Seschrock } 1832082Seschrock } else if (ds == NULL) { 184789Sahrens /* It's the meta-objset. */ 185789Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 1861544Seschrock osi->os_compress = ZIO_COMPRESS_LZJB; 187789Sahrens } 188789Sahrens 1891544Seschrock osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 1901544Seschrock 191789Sahrens /* 192789Sahrens * Metadata always gets compressed and checksummed. 193789Sahrens * If the data checksum is multi-bit correctable, and it's not 194789Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 195789Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 196789Sahrens */ 197789Sahrens checksum = osi->os_checksum; 198789Sahrens 199789Sahrens if (zio_checksum_table[checksum].ci_correctable && 200789Sahrens !zio_checksum_table[checksum].ci_zbt) 201789Sahrens osi->os_md_checksum = checksum; 202789Sahrens else 203789Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 2041544Seschrock osi->os_md_compress = ZIO_COMPRESS_LZJB; 205789Sahrens 206789Sahrens for (i = 0; i < TXG_SIZE; i++) { 207789Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 208789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 209789Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 210789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 211789Sahrens } 212789Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 213789Sahrens offsetof(dnode_t, dn_link)); 214789Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 215789Sahrens offsetof(dmu_buf_impl_t, db_link)); 216789Sahrens 217*2856Snd150628 mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); 218*2856Snd150628 mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 219*2856Snd150628 220789Sahrens osi->os_meta_dnode = dnode_special_open(osi, 221789Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 222789Sahrens 223789Sahrens if (ds != NULL) { 224789Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 225789Sahrens if (winner) { 226789Sahrens dmu_objset_evict(ds, osi); 227789Sahrens osi = winner; 228789Sahrens } 229789Sahrens } 230789Sahrens 2311544Seschrock *osip = osi; 2321544Seschrock return (0); 233789Sahrens } 234789Sahrens 235789Sahrens /* called from zpl */ 236789Sahrens int 237789Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 238789Sahrens objset_t **osp) 239789Sahrens { 240789Sahrens dsl_dataset_t *ds; 241789Sahrens int err; 242789Sahrens objset_t *os; 243789Sahrens objset_impl_t *osi; 244789Sahrens 245789Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 246789Sahrens err = dsl_dataset_open(name, mode, os, &ds); 247789Sahrens if (err) { 248789Sahrens kmem_free(os, sizeof (objset_t)); 249789Sahrens return (err); 250789Sahrens } 251789Sahrens 252789Sahrens osi = dsl_dataset_get_user_ptr(ds); 253789Sahrens if (osi == NULL) { 254789Sahrens blkptr_t bp; 255789Sahrens 256789Sahrens dsl_dataset_get_blkptr(ds, &bp); 2571544Seschrock err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 2581544Seschrock ds, &bp, &osi); 2591544Seschrock if (err) { 2601544Seschrock dsl_dataset_close(ds, mode, os); 2611544Seschrock kmem_free(os, sizeof (objset_t)); 2621544Seschrock return (err); 2631544Seschrock } 264789Sahrens } 265789Sahrens 266789Sahrens os->os = osi; 267789Sahrens os->os_mode = mode; 268789Sahrens 269789Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 270789Sahrens dmu_objset_close(os); 271789Sahrens return (EINVAL); 272789Sahrens } 273789Sahrens *osp = os; 274789Sahrens return (0); 275789Sahrens } 276789Sahrens 277789Sahrens void 278789Sahrens dmu_objset_close(objset_t *os) 279789Sahrens { 280789Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 281789Sahrens kmem_free(os, sizeof (objset_t)); 282789Sahrens } 283789Sahrens 2841646Sperrin int 2851646Sperrin dmu_objset_evict_dbufs(objset_t *os, int try) 2861544Seschrock { 2871544Seschrock objset_impl_t *osi = os->os; 2881544Seschrock dnode_t *dn; 2891596Sahrens 2901596Sahrens mutex_enter(&osi->os_lock); 2911596Sahrens 2921596Sahrens /* process the mdn last, since the other dnodes have holds on it */ 2931596Sahrens list_remove(&osi->os_dnodes, osi->os_meta_dnode); 2941596Sahrens list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 2951544Seschrock 2961544Seschrock /* 2971596Sahrens * Find the first dnode with holds. We have to do this dance 2981596Sahrens * because dnode_add_ref() only works if you already have a 2991596Sahrens * hold. If there are no holds then it has no dbufs so OK to 3001596Sahrens * skip. 3011544Seschrock */ 3021596Sahrens for (dn = list_head(&osi->os_dnodes); 3031596Sahrens dn && refcount_is_zero(&dn->dn_holds); 3041596Sahrens dn = list_next(&osi->os_dnodes, dn)) 3051596Sahrens continue; 3061596Sahrens if (dn) 3071596Sahrens dnode_add_ref(dn, FTAG); 3081596Sahrens 3091596Sahrens while (dn) { 3101596Sahrens dnode_t *next_dn = dn; 3111596Sahrens 3121596Sahrens do { 3131596Sahrens next_dn = list_next(&osi->os_dnodes, next_dn); 3141596Sahrens } while (next_dn && refcount_is_zero(&next_dn->dn_holds)); 3151596Sahrens if (next_dn) 3161596Sahrens dnode_add_ref(next_dn, FTAG); 3171596Sahrens 3181596Sahrens mutex_exit(&osi->os_lock); 3191646Sperrin if (dnode_evict_dbufs(dn, try)) { 3201646Sperrin dnode_rele(dn, FTAG); 3211646Sperrin if (next_dn) 3221646Sperrin dnode_rele(next_dn, FTAG); 3231646Sperrin return (1); 3241646Sperrin } 3251596Sahrens dnode_rele(dn, FTAG); 3261596Sahrens mutex_enter(&osi->os_lock); 3271596Sahrens dn = next_dn; 3281544Seschrock } 3291544Seschrock mutex_exit(&osi->os_lock); 3301646Sperrin return (0); 3311544Seschrock } 3321544Seschrock 3331544Seschrock void 334789Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 335789Sahrens { 336789Sahrens objset_impl_t *osi = arg; 3371544Seschrock objset_t os; 3382082Seschrock int i; 339789Sahrens 340789Sahrens for (i = 0; i < TXG_SIZE; i++) { 341789Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 342789Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 343789Sahrens } 344789Sahrens 3452082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 3462082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "checksum", 3472082Seschrock checksum_changed_cb, osi)); 3482082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "compression", 3492082Seschrock compression_changed_cb, osi)); 350789Sahrens } 351789Sahrens 3521544Seschrock /* 3531544Seschrock * We should need only a single pass over the dnode list, since 3541544Seschrock * nothing can be added to the list at this point. 3551544Seschrock */ 3561544Seschrock os.os = osi; 3571646Sperrin (void) dmu_objset_evict_dbufs(&os, 0); 3581544Seschrock 359789Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 360789Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 361789Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 362789Sahrens 363789Sahrens dnode_special_close(osi->os_meta_dnode); 364789Sahrens zil_free(osi->os_zil); 365789Sahrens 366789Sahrens zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 367*2856Snd150628 mutex_destroy(&osi->os_lock); 368*2856Snd150628 mutex_destroy(&osi->os_obj_lock); 369789Sahrens kmem_free(osi, sizeof (objset_impl_t)); 370789Sahrens } 371789Sahrens 372789Sahrens /* called from dsl for meta-objset */ 373789Sahrens objset_impl_t * 374789Sahrens dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, 375789Sahrens dmu_tx_t *tx) 376789Sahrens { 377789Sahrens objset_impl_t *osi; 378789Sahrens dnode_t *mdn; 379789Sahrens 380789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 3811544Seschrock VERIFY(0 == dmu_objset_open_impl(spa, ds, NULL, &osi)); 382789Sahrens mdn = osi->os_meta_dnode; 383789Sahrens 384789Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 385789Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 386789Sahrens 387789Sahrens /* 388789Sahrens * We don't want to have to increase the meta-dnode's nlevels 389789Sahrens * later, because then we could do it in quescing context while 390789Sahrens * we are also accessing it in open context. 391789Sahrens * 392789Sahrens * This precaution is not necessary for the MOS (ds == NULL), 393789Sahrens * because the MOS is only updated in syncing context. 394789Sahrens * This is most fortunate: the MOS is the only objset that 395789Sahrens * needs to be synced multiple times as spa_sync() iterates 396789Sahrens * to convergence, so minimizing its dn_nlevels matters. 397789Sahrens */ 3981544Seschrock if (ds != NULL) { 3991544Seschrock int levels = 1; 4001544Seschrock 4011544Seschrock /* 4021544Seschrock * Determine the number of levels necessary for the meta-dnode 4031544Seschrock * to contain DN_MAX_OBJECT dnodes. 4041544Seschrock */ 4051544Seschrock while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 4061544Seschrock (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 4071544Seschrock DN_MAX_OBJECT * sizeof (dnode_phys_t)) 4081544Seschrock levels++; 4091544Seschrock 410789Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 4111544Seschrock mdn->dn_nlevels = levels; 4121544Seschrock } 413789Sahrens 414789Sahrens ASSERT(type != DMU_OST_NONE); 415789Sahrens ASSERT(type != DMU_OST_ANY); 416789Sahrens ASSERT(type < DMU_OST_NUMTYPES); 417789Sahrens osi->os_phys->os_type = type; 418789Sahrens 419789Sahrens dsl_dataset_dirty(ds, tx); 420789Sahrens 421789Sahrens return (osi); 422789Sahrens } 423789Sahrens 424789Sahrens struct oscarg { 425789Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 426789Sahrens void *userarg; 427789Sahrens dsl_dataset_t *clone_parent; 428789Sahrens const char *lastname; 429789Sahrens dmu_objset_type_t type; 430789Sahrens }; 431789Sahrens 4322199Sahrens /* ARGSUSED */ 433789Sahrens static int 4342199Sahrens dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 435789Sahrens { 4362199Sahrens dsl_dir_t *dd = arg1; 4372199Sahrens struct oscarg *oa = arg2; 4382199Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 4392199Sahrens int err; 4402199Sahrens uint64_t ddobj; 4412199Sahrens 4422199Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 4432199Sahrens oa->lastname, sizeof (uint64_t), 1, &ddobj); 4442199Sahrens if (err != ENOENT) 4452199Sahrens return (err ? err : EEXIST); 4462199Sahrens 4472199Sahrens if (oa->clone_parent != NULL) { 4482199Sahrens /* 4492199Sahrens * You can't clone across pools. 4502199Sahrens */ 4512199Sahrens if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) 4522199Sahrens return (EXDEV); 4532199Sahrens 4542199Sahrens /* 4552199Sahrens * You can only clone snapshots, not the head datasets. 4562199Sahrens */ 4572199Sahrens if (oa->clone_parent->ds_phys->ds_num_children == 0) 4582199Sahrens return (EINVAL); 4592199Sahrens } 4602199Sahrens return (0); 4612199Sahrens } 4622199Sahrens 4632199Sahrens static void 4642199Sahrens dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 4652199Sahrens { 4662199Sahrens dsl_dir_t *dd = arg1; 4672199Sahrens struct oscarg *oa = arg2; 468789Sahrens dsl_dataset_t *ds; 469789Sahrens blkptr_t bp; 4702199Sahrens uint64_t dsobj; 471789Sahrens 472789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 473789Sahrens 4742199Sahrens dsobj = dsl_dataset_create_sync(dd, oa->lastname, 475789Sahrens oa->clone_parent, tx); 476789Sahrens 4772199Sahrens VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 4781544Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); 479789Sahrens dsl_dataset_get_blkptr(ds, &bp); 480789Sahrens if (BP_IS_HOLE(&bp)) { 481789Sahrens objset_impl_t *osi; 482789Sahrens 483789Sahrens /* This is an empty dmu_objset; not a clone. */ 484789Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 485789Sahrens ds, oa->type, tx); 486789Sahrens 487789Sahrens if (oa->userfunc) 488789Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 489789Sahrens } 490789Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 491789Sahrens } 492789Sahrens 493789Sahrens int 494789Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 495789Sahrens objset_t *clone_parent, 496789Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 497789Sahrens { 4982199Sahrens dsl_dir_t *pdd; 499789Sahrens const char *tail; 500789Sahrens int err = 0; 5012199Sahrens struct oscarg oa = { 0 }; 502789Sahrens 5032199Sahrens ASSERT(strchr(name, '@') == NULL); 5042199Sahrens err = dsl_dir_open(name, FTAG, &pdd, &tail); 5051544Seschrock if (err) 5061544Seschrock return (err); 507789Sahrens if (tail == NULL) { 5082199Sahrens dsl_dir_close(pdd, FTAG); 509789Sahrens return (EEXIST); 510789Sahrens } 511789Sahrens 512789Sahrens dprintf("name=%s\n", name); 513789Sahrens 5142199Sahrens oa.userfunc = func; 5152199Sahrens oa.userarg = arg; 5162199Sahrens oa.lastname = tail; 5172199Sahrens oa.type = type; 5182199Sahrens if (clone_parent != NULL) { 519789Sahrens /* 5202199Sahrens * You can't clone to a different type. 521789Sahrens */ 5222199Sahrens if (clone_parent->os->os_phys->os_type != type) { 5232199Sahrens dsl_dir_close(pdd, FTAG); 5242199Sahrens return (EINVAL); 525789Sahrens } 5262199Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 527789Sahrens } 5282199Sahrens err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 5292199Sahrens dmu_objset_create_sync, pdd, &oa, 5); 5302199Sahrens dsl_dir_close(pdd, FTAG); 531789Sahrens return (err); 532789Sahrens } 533789Sahrens 534789Sahrens int 535789Sahrens dmu_objset_destroy(const char *name) 536789Sahrens { 537789Sahrens objset_t *os; 538789Sahrens int error; 539789Sahrens 540789Sahrens /* 541789Sahrens * If it looks like we'll be able to destroy it, and there's 542789Sahrens * an unplayed replay log sitting around, destroy the log. 543789Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 544789Sahrens * but the replay log objset is modified in open context. 545789Sahrens */ 546789Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 547789Sahrens if (error == 0) { 5481807Sbonwick zil_destroy(dmu_objset_zil(os), B_FALSE); 549789Sahrens dmu_objset_close(os); 550789Sahrens } 551789Sahrens 552789Sahrens return (dsl_dataset_destroy(name)); 553789Sahrens } 554789Sahrens 555789Sahrens int 556789Sahrens dmu_objset_rollback(const char *name) 557789Sahrens { 558789Sahrens int err; 559789Sahrens objset_t *os; 560789Sahrens 5612199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, 5622199Sahrens DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 563789Sahrens if (err == 0) { 564789Sahrens err = zil_suspend(dmu_objset_zil(os)); 565789Sahrens if (err == 0) 566789Sahrens zil_resume(dmu_objset_zil(os)); 567789Sahrens if (err == 0) { 568789Sahrens /* XXX uncache everything? */ 5692199Sahrens err = dsl_dataset_rollback(os->os->os_dsl_dataset); 570789Sahrens } 5712199Sahrens dmu_objset_close(os); 572789Sahrens } 573789Sahrens return (err); 574789Sahrens } 575789Sahrens 5762199Sahrens struct snaparg { 5772199Sahrens dsl_sync_task_group_t *dstg; 5782199Sahrens char *snapname; 5792199Sahrens char failed[MAXPATHLEN]; 5802199Sahrens }; 5812199Sahrens 5822199Sahrens static int 5832199Sahrens dmu_objset_snapshot_one(char *name, void *arg) 5842199Sahrens { 5852199Sahrens struct snaparg *sn = arg; 5862199Sahrens objset_t *os; 5872199Sahrens int err; 5882199Sahrens 5892199Sahrens (void) strcpy(sn->failed, name); 5902199Sahrens 5912199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); 5922199Sahrens if (err != 0) 5932199Sahrens return (err); 5942199Sahrens 5952199Sahrens /* 5962199Sahrens * NB: we need to wait for all in-flight changes to get to disk, 5972199Sahrens * so that we snapshot those changes. zil_suspend does this as 5982199Sahrens * a side effect. 5992199Sahrens */ 6002199Sahrens err = zil_suspend(dmu_objset_zil(os)); 6012199Sahrens if (err == 0) { 6022199Sahrens dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, 6032199Sahrens dsl_dataset_snapshot_sync, os, sn->snapname, 3); 6042199Sahrens } 6052199Sahrens return (err); 6062199Sahrens } 6072199Sahrens 6082199Sahrens int 6092199Sahrens dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) 6102199Sahrens { 6112199Sahrens dsl_sync_task_t *dst; 6122199Sahrens struct snaparg sn = { 0 }; 6132199Sahrens char *cp; 6142199Sahrens spa_t *spa; 6152199Sahrens int err; 6162199Sahrens 6172199Sahrens (void) strcpy(sn.failed, fsname); 6182199Sahrens 6192199Sahrens cp = strchr(fsname, '/'); 6202199Sahrens if (cp) { 6212199Sahrens *cp = '\0'; 6222199Sahrens err = spa_open(fsname, &spa, FTAG); 6232199Sahrens *cp = '/'; 6242199Sahrens } else { 6252199Sahrens err = spa_open(fsname, &spa, FTAG); 6262199Sahrens } 6272199Sahrens if (err) 6282199Sahrens return (err); 6292199Sahrens 6302199Sahrens sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 6312199Sahrens sn.snapname = snapname; 6322199Sahrens 6332417Sahrens if (recursive) { 6342417Sahrens err = dmu_objset_find(fsname, 6352417Sahrens dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 6362417Sahrens } else { 6372199Sahrens err = dmu_objset_snapshot_one(fsname, &sn); 6382417Sahrens } 6392199Sahrens 6402199Sahrens if (err) 6412199Sahrens goto out; 6422199Sahrens 6432199Sahrens err = dsl_sync_task_group_wait(sn.dstg); 6442199Sahrens 6452199Sahrens for (dst = list_head(&sn.dstg->dstg_tasks); dst; 6462199Sahrens dst = list_next(&sn.dstg->dstg_tasks, dst)) { 6472199Sahrens objset_t *os = dst->dst_arg1; 6482199Sahrens if (dst->dst_err) 6492199Sahrens dmu_objset_name(os, sn.failed); 6502199Sahrens zil_resume(dmu_objset_zil(os)); 6512199Sahrens dmu_objset_close(os); 6522199Sahrens } 6532199Sahrens out: 6542199Sahrens if (err) 6552199Sahrens (void) strcpy(fsname, sn.failed); 6562199Sahrens dsl_sync_task_group_destroy(sn.dstg); 6572199Sahrens spa_close(spa, FTAG); 6582199Sahrens return (err); 6592199Sahrens } 6602199Sahrens 661789Sahrens static void 662789Sahrens dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) 663789Sahrens { 664789Sahrens dnode_t *dn = list_head(list); 665789Sahrens int level, err; 666789Sahrens 667789Sahrens for (level = 0; dn = list_head(list); level++) { 668789Sahrens zio_t *zio; 669789Sahrens zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 670789Sahrens 671789Sahrens ASSERT3U(level, <=, DN_MAX_LEVELS); 672789Sahrens 673789Sahrens while (dn) { 674789Sahrens dnode_t *next = list_next(list, dn); 675789Sahrens 676789Sahrens list_remove(list, dn); 677789Sahrens if (dnode_sync(dn, level, zio, tx) == 0) { 678789Sahrens /* 679789Sahrens * This dnode requires syncing at higher 680789Sahrens * levels; put it back onto the list. 681789Sahrens */ 682789Sahrens if (next) 683789Sahrens list_insert_before(list, next, dn); 684789Sahrens else 685789Sahrens list_insert_tail(list, dn); 686789Sahrens } 687789Sahrens dn = next; 688789Sahrens } 689789Sahrens err = zio_wait(zio); 690789Sahrens ASSERT(err == 0); 691789Sahrens } 692789Sahrens } 693789Sahrens 694789Sahrens /* ARGSUSED */ 695789Sahrens static void 696789Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 697789Sahrens { 698789Sahrens objset_impl_t *os = arg; 699789Sahrens objset_phys_t *osphys = zio->io_data; 700789Sahrens dnode_phys_t *dnp = &osphys->os_meta_dnode; 701789Sahrens int i; 702789Sahrens 703789Sahrens ASSERT3U(zio->io_error, ==, 0); 704789Sahrens 705789Sahrens /* 706789Sahrens * Update rootbp fill count. 707789Sahrens */ 708789Sahrens os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ 709789Sahrens for (i = 0; i < dnp->dn_nblkptr; i++) 710789Sahrens os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; 711789Sahrens 712789Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 713789Sahrens BP_SET_LEVEL(zio->io_bp, 0); 714789Sahrens 715789Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 716789Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 717789Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, 718789Sahrens os->os_synctx); 719789Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 720789Sahrens os->os_synctx); 721789Sahrens } 722789Sahrens } 723789Sahrens 724789Sahrens 725789Sahrens /* called from dsl */ 726789Sahrens void 727789Sahrens dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) 728789Sahrens { 729789Sahrens extern taskq_t *dbuf_tq; 730789Sahrens int txgoff; 731789Sahrens list_t *dirty_list; 732789Sahrens int err; 7331544Seschrock zbookmark_t zb; 734789Sahrens arc_buf_t *abuf = 735789Sahrens arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); 736789Sahrens 737789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 738789Sahrens ASSERT(os->os_synctx == NULL); 739789Sahrens /* XXX the write_done callback should really give us the tx... */ 740789Sahrens os->os_synctx = tx; 741789Sahrens 742789Sahrens dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 743789Sahrens 744789Sahrens txgoff = tx->tx_txg & TXG_MASK; 745789Sahrens 746789Sahrens dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); 747789Sahrens dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); 748789Sahrens 749789Sahrens /* 750789Sahrens * Free intent log blocks up to this tx. 751789Sahrens */ 752789Sahrens zil_sync(os->os_zil, tx); 753789Sahrens 754789Sahrens /* 755789Sahrens * Sync meta-dnode 756789Sahrens */ 757789Sahrens dirty_list = &os->os_dirty_dnodes[txgoff]; 758789Sahrens ASSERT(list_head(dirty_list) == NULL); 759789Sahrens list_insert_tail(dirty_list, os->os_meta_dnode); 760789Sahrens dmu_objset_sync_dnodes(os, dirty_list, tx); 761789Sahrens 762789Sahrens /* 763789Sahrens * Sync the root block. 764789Sahrens */ 765789Sahrens bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); 7661544Seschrock zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 7671544Seschrock zb.zb_object = 0; 7681544Seschrock zb.zb_level = -1; 7691544Seschrock zb.zb_blkid = 0; 770789Sahrens err = arc_write(NULL, os->os_spa, os->os_md_checksum, 7711775Sbillm os->os_md_compress, 7721775Sbillm dmu_get_replication_level(os->os_spa, &zb, DMU_OT_OBJSET), 7731775Sbillm tx->tx_txg, &os->os_rootbp, abuf, killer, os, 7741544Seschrock ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT, &zb); 775789Sahrens ASSERT(err == 0); 7761544Seschrock VERIFY(arc_buf_remove_ref(abuf, FTAG) == 1); 777789Sahrens 778789Sahrens dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); 779789Sahrens 780789Sahrens ASSERT3P(os->os_synctx, ==, tx); 781789Sahrens taskq_wait(dbuf_tq); 782789Sahrens os->os_synctx = NULL; 783789Sahrens } 784789Sahrens 785789Sahrens void 786789Sahrens dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds) 787789Sahrens { 788789Sahrens if (os->os->os_dsl_dataset != NULL) { 789789Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, dds); 790789Sahrens } else { 791789Sahrens ASSERT(os->os->os_phys->os_type == DMU_OST_META); 792789Sahrens bzero(dds, sizeof (*dds)); 793789Sahrens } 794789Sahrens dds->dds_type = os->os->os_phys->os_type; 795789Sahrens } 796789Sahrens 797789Sahrens int 798789Sahrens dmu_objset_is_snapshot(objset_t *os) 799789Sahrens { 800789Sahrens if (os->os->os_dsl_dataset != NULL) 801789Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 802789Sahrens else 803789Sahrens return (B_FALSE); 804789Sahrens } 805789Sahrens 806789Sahrens int 807789Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 808885Sahrens uint64_t *idp, uint64_t *offp) 809789Sahrens { 810789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 811789Sahrens zap_cursor_t cursor; 812789Sahrens zap_attribute_t attr; 813789Sahrens 814789Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 815789Sahrens return (ENOENT); 816789Sahrens 817789Sahrens zap_cursor_init_serialized(&cursor, 818789Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 819789Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 820789Sahrens 821885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 822885Sahrens zap_cursor_fini(&cursor); 823885Sahrens return (ENOENT); 824885Sahrens } 825885Sahrens 826885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 827885Sahrens zap_cursor_fini(&cursor); 828885Sahrens return (ENAMETOOLONG); 829885Sahrens } 830885Sahrens 831885Sahrens (void) strcpy(name, attr.za_name); 832885Sahrens if (idp) 833885Sahrens *idp = attr.za_first_integer; 834885Sahrens zap_cursor_advance(&cursor); 835885Sahrens *offp = zap_cursor_serialize(&cursor); 836885Sahrens zap_cursor_fini(&cursor); 837885Sahrens 838885Sahrens return (0); 839885Sahrens } 840885Sahrens 841885Sahrens int 842885Sahrens dmu_dir_list_next(objset_t *os, int namelen, char *name, 843885Sahrens uint64_t *idp, uint64_t *offp) 844885Sahrens { 845885Sahrens dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 846885Sahrens zap_cursor_t cursor; 847885Sahrens zap_attribute_t attr; 848885Sahrens 849885Sahrens /* there is no next dir on a snapshot! */ 850885Sahrens if (os->os->os_dsl_dataset->ds_object != 851885Sahrens dd->dd_phys->dd_head_dataset_obj) 852885Sahrens return (ENOENT); 853885Sahrens 854885Sahrens zap_cursor_init_serialized(&cursor, 855885Sahrens dd->dd_pool->dp_meta_objset, 856885Sahrens dd->dd_phys->dd_child_dir_zapobj, *offp); 857885Sahrens 858885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 859885Sahrens zap_cursor_fini(&cursor); 860885Sahrens return (ENOENT); 861885Sahrens } 862885Sahrens 863885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 864885Sahrens zap_cursor_fini(&cursor); 865789Sahrens return (ENAMETOOLONG); 866885Sahrens } 867789Sahrens 868789Sahrens (void) strcpy(name, attr.za_name); 869885Sahrens if (idp) 870885Sahrens *idp = attr.za_first_integer; 871789Sahrens zap_cursor_advance(&cursor); 872789Sahrens *offp = zap_cursor_serialize(&cursor); 873885Sahrens zap_cursor_fini(&cursor); 874789Sahrens 875789Sahrens return (0); 876789Sahrens } 877789Sahrens 878789Sahrens /* 879789Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 880789Sahrens */ 8812199Sahrens int 8822199Sahrens dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 883789Sahrens { 884789Sahrens dsl_dir_t *dd; 885789Sahrens objset_t *os; 886789Sahrens uint64_t snapobj; 887789Sahrens zap_cursor_t zc; 888789Sahrens zap_attribute_t attr; 889789Sahrens char *child; 8901544Seschrock int do_self, err; 891789Sahrens 8921544Seschrock err = dsl_dir_open(name, FTAG, &dd, NULL); 8931544Seschrock if (err) 8942199Sahrens return (err); 895789Sahrens 8962199Sahrens /* NB: the $MOS dir doesn't have a head dataset */ 897789Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 898789Sahrens 899789Sahrens /* 900789Sahrens * Iterate over all children. 901789Sahrens */ 9022417Sahrens if (flags & DS_FIND_CHILDREN) { 9032417Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 9042417Sahrens dd->dd_phys->dd_child_dir_zapobj); 9052417Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 9062417Sahrens (void) zap_cursor_advance(&zc)) { 9072417Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 9082417Sahrens ASSERT(attr.za_num_integers == 1); 909789Sahrens 9102417Sahrens /* 9112417Sahrens * No separating '/' because parent's name ends in /. 9122417Sahrens */ 9132417Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 9142417Sahrens /* XXX could probably just use name here */ 9152417Sahrens dsl_dir_name(dd, child); 9162417Sahrens (void) strcat(child, "/"); 9172417Sahrens (void) strcat(child, attr.za_name); 9182417Sahrens err = dmu_objset_find(child, func, arg, flags); 9192417Sahrens kmem_free(child, MAXPATHLEN); 9202417Sahrens if (err) 9212417Sahrens break; 9222417Sahrens } 9232417Sahrens zap_cursor_fini(&zc); 9242199Sahrens 9252417Sahrens if (err) { 9262417Sahrens dsl_dir_close(dd, FTAG); 9272417Sahrens return (err); 9282417Sahrens } 929789Sahrens } 930789Sahrens 931789Sahrens /* 932789Sahrens * Iterate over all snapshots. 933789Sahrens */ 934789Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 935789Sahrens dmu_objset_open(name, DMU_OST_ANY, 936789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 937789Sahrens 938789Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 939789Sahrens dmu_objset_close(os); 940789Sahrens 941789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 942789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 943789Sahrens (void) zap_cursor_advance(&zc)) { 944789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 945789Sahrens ASSERT(attr.za_num_integers == 1); 946789Sahrens 947789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 948789Sahrens /* XXX could probably just use name here */ 949789Sahrens dsl_dir_name(dd, child); 950789Sahrens (void) strcat(child, "@"); 951789Sahrens (void) strcat(child, attr.za_name); 9522199Sahrens err = func(child, arg); 953789Sahrens kmem_free(child, MAXPATHLEN); 9542199Sahrens if (err) 9552199Sahrens break; 956789Sahrens } 957885Sahrens zap_cursor_fini(&zc); 958789Sahrens } 959789Sahrens 960789Sahrens dsl_dir_close(dd, FTAG); 961789Sahrens 9622199Sahrens if (err) 9632199Sahrens return (err); 9642199Sahrens 965789Sahrens /* 966789Sahrens * Apply to self if appropriate. 967789Sahrens */ 968789Sahrens if (do_self) 9692199Sahrens err = func(name, arg); 9702199Sahrens return (err); 971789Sahrens } 972