1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 22*3547Smaybee * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/dmu_objset.h> 30789Sahrens #include <sys/dsl_dir.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/dsl_pool.h> 342199Sahrens #include <sys/dsl_synctask.h> 35789Sahrens #include <sys/dnode.h> 36789Sahrens #include <sys/dbuf.h> 372885Sahrens #include <sys/zvol.h> 38789Sahrens #include <sys/dmu_tx.h> 39789Sahrens #include <sys/zio_checksum.h> 40789Sahrens #include <sys/zap.h> 41789Sahrens #include <sys/zil.h> 42789Sahrens #include <sys/dmu_impl.h> 43789Sahrens 44789Sahrens 45789Sahrens spa_t * 46789Sahrens dmu_objset_spa(objset_t *os) 47789Sahrens { 48789Sahrens return (os->os->os_spa); 49789Sahrens } 50789Sahrens 51789Sahrens zilog_t * 52789Sahrens dmu_objset_zil(objset_t *os) 53789Sahrens { 54789Sahrens return (os->os->os_zil); 55789Sahrens } 56789Sahrens 57789Sahrens dsl_pool_t * 58789Sahrens dmu_objset_pool(objset_t *os) 59789Sahrens { 60789Sahrens dsl_dataset_t *ds; 61789Sahrens 62789Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 63789Sahrens return (ds->ds_dir->dd_pool); 64789Sahrens else 65789Sahrens return (spa_get_dsl(os->os->os_spa)); 66789Sahrens } 67789Sahrens 68789Sahrens dsl_dataset_t * 69789Sahrens dmu_objset_ds(objset_t *os) 70789Sahrens { 71789Sahrens return (os->os->os_dsl_dataset); 72789Sahrens } 73789Sahrens 74789Sahrens dmu_objset_type_t 75789Sahrens dmu_objset_type(objset_t *os) 76789Sahrens { 77789Sahrens return (os->os->os_phys->os_type); 78789Sahrens } 79789Sahrens 80789Sahrens void 81789Sahrens dmu_objset_name(objset_t *os, char *buf) 82789Sahrens { 83789Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 84789Sahrens } 85789Sahrens 86789Sahrens uint64_t 87789Sahrens dmu_objset_id(objset_t *os) 88789Sahrens { 89789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 90789Sahrens 91789Sahrens return (ds ? ds->ds_object : 0); 92789Sahrens } 93789Sahrens 94789Sahrens static void 95789Sahrens checksum_changed_cb(void *arg, uint64_t newval) 96789Sahrens { 97789Sahrens objset_impl_t *osi = arg; 98789Sahrens 99789Sahrens /* 100789Sahrens * Inheritance should have been done by now. 101789Sahrens */ 102789Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 103789Sahrens 104789Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 105789Sahrens } 106789Sahrens 107789Sahrens static void 108789Sahrens compression_changed_cb(void *arg, uint64_t newval) 109789Sahrens { 110789Sahrens objset_impl_t *osi = arg; 111789Sahrens 112789Sahrens /* 113789Sahrens * Inheritance and range checking should have been done by now. 114789Sahrens */ 115789Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 116789Sahrens 117789Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 118789Sahrens } 119789Sahrens 120789Sahrens void 121789Sahrens dmu_objset_byteswap(void *buf, size_t size) 122789Sahrens { 123789Sahrens objset_phys_t *osp = buf; 124789Sahrens 125789Sahrens ASSERT(size == sizeof (objset_phys_t)); 126789Sahrens dnode_byteswap(&osp->os_meta_dnode); 127789Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 128789Sahrens osp->os_type = BSWAP_64(osp->os_type); 129789Sahrens } 130789Sahrens 1311544Seschrock int 1321544Seschrock dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 1331544Seschrock objset_impl_t **osip) 134789Sahrens { 135789Sahrens objset_impl_t *winner, *osi; 136789Sahrens int i, err, checksum; 137789Sahrens 138789Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 139789Sahrens osi->os.os = osi; 140789Sahrens osi->os_dsl_dataset = ds; 141789Sahrens osi->os_spa = spa; 142*3547Smaybee osi->os_rootbp = bp; 143*3547Smaybee if (!BP_IS_HOLE(osi->os_rootbp)) { 1442391Smaybee uint32_t aflags = ARC_WAIT; 1451544Seschrock zbookmark_t zb; 1461544Seschrock zb.zb_objset = ds ? ds->ds_object : 0; 1471544Seschrock zb.zb_object = 0; 1481544Seschrock zb.zb_level = -1; 1491544Seschrock zb.zb_blkid = 0; 1501544Seschrock 151*3547Smaybee dprintf_bp(osi->os_rootbp, "reading %s", ""); 152*3547Smaybee err = arc_read(NULL, spa, osi->os_rootbp, 153789Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 154*3547Smaybee arc_getbuf_func, &osi->os_phys_buf, 1552391Smaybee ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 1561544Seschrock if (err) { 1571544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1581544Seschrock return (err); 1591544Seschrock } 160*3547Smaybee osi->os_phys = osi->os_phys_buf->b_data; 161*3547Smaybee arc_release(osi->os_phys_buf, &osi->os_phys_buf); 162789Sahrens } else { 163*3547Smaybee osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), 164*3547Smaybee &osi->os_phys_buf, ARC_BUFC_METADATA); 165*3547Smaybee osi->os_phys = osi->os_phys_buf->b_data; 166789Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 167789Sahrens } 168789Sahrens 169789Sahrens /* 170789Sahrens * Note: the changed_cb will be called once before the register 171789Sahrens * func returns, thus changing the checksum/compression from the 1722082Seschrock * default (fletcher2/off). Snapshots don't need to know, and 1732082Seschrock * registering would complicate clone promotion. 174789Sahrens */ 1752082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 176789Sahrens err = dsl_prop_register(ds, "checksum", 177789Sahrens checksum_changed_cb, osi); 1781544Seschrock if (err == 0) 1791544Seschrock err = dsl_prop_register(ds, "compression", 1801544Seschrock compression_changed_cb, osi); 1811544Seschrock if (err) { 182*3547Smaybee VERIFY(arc_buf_remove_ref(osi->os_phys_buf, 183*3547Smaybee &osi->os_phys_buf) == 1); 1841544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1851544Seschrock return (err); 1861544Seschrock } 1872082Seschrock } else if (ds == NULL) { 188789Sahrens /* It's the meta-objset. */ 189789Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 1901544Seschrock osi->os_compress = ZIO_COMPRESS_LZJB; 191789Sahrens } 192789Sahrens 1931544Seschrock osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 1941544Seschrock 195789Sahrens /* 196789Sahrens * Metadata always gets compressed and checksummed. 197789Sahrens * If the data checksum is multi-bit correctable, and it's not 198789Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 199789Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 200789Sahrens */ 201789Sahrens checksum = osi->os_checksum; 202789Sahrens 203789Sahrens if (zio_checksum_table[checksum].ci_correctable && 204789Sahrens !zio_checksum_table[checksum].ci_zbt) 205789Sahrens osi->os_md_checksum = checksum; 206789Sahrens else 207789Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 2081544Seschrock osi->os_md_compress = ZIO_COMPRESS_LZJB; 209789Sahrens 210789Sahrens for (i = 0; i < TXG_SIZE; i++) { 211789Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 212789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 213789Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 214789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 215789Sahrens } 216789Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 217789Sahrens offsetof(dnode_t, dn_link)); 218789Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 219789Sahrens offsetof(dmu_buf_impl_t, db_link)); 220789Sahrens 2212856Snd150628 mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); 2222856Snd150628 mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 2232856Snd150628 224789Sahrens osi->os_meta_dnode = dnode_special_open(osi, 225789Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 226789Sahrens 227789Sahrens if (ds != NULL) { 228789Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 229789Sahrens if (winner) { 230789Sahrens dmu_objset_evict(ds, osi); 231789Sahrens osi = winner; 232789Sahrens } 233789Sahrens } 234789Sahrens 2351544Seschrock *osip = osi; 2361544Seschrock return (0); 237789Sahrens } 238789Sahrens 239789Sahrens /* called from zpl */ 240789Sahrens int 241789Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 242789Sahrens objset_t **osp) 243789Sahrens { 244789Sahrens dsl_dataset_t *ds; 245789Sahrens int err; 246789Sahrens objset_t *os; 247789Sahrens objset_impl_t *osi; 248789Sahrens 249789Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 250789Sahrens err = dsl_dataset_open(name, mode, os, &ds); 251789Sahrens if (err) { 252789Sahrens kmem_free(os, sizeof (objset_t)); 253789Sahrens return (err); 254789Sahrens } 255789Sahrens 256789Sahrens osi = dsl_dataset_get_user_ptr(ds); 257789Sahrens if (osi == NULL) { 2581544Seschrock err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 259*3547Smaybee ds, &ds->ds_phys->ds_bp, &osi); 2601544Seschrock if (err) { 2611544Seschrock dsl_dataset_close(ds, mode, os); 2621544Seschrock kmem_free(os, sizeof (objset_t)); 2631544Seschrock return (err); 2641544Seschrock } 265789Sahrens } 266789Sahrens 267789Sahrens os->os = osi; 268789Sahrens os->os_mode = mode; 269789Sahrens 270789Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 271789Sahrens dmu_objset_close(os); 272789Sahrens return (EINVAL); 273789Sahrens } 274789Sahrens *osp = os; 275789Sahrens return (0); 276789Sahrens } 277789Sahrens 278789Sahrens void 279789Sahrens dmu_objset_close(objset_t *os) 280789Sahrens { 281789Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 282789Sahrens kmem_free(os, sizeof (objset_t)); 283789Sahrens } 284789Sahrens 2851646Sperrin int 2861646Sperrin dmu_objset_evict_dbufs(objset_t *os, int try) 2871544Seschrock { 2881544Seschrock objset_impl_t *osi = os->os; 2891544Seschrock dnode_t *dn; 2901596Sahrens 2911596Sahrens mutex_enter(&osi->os_lock); 2921596Sahrens 2931596Sahrens /* process the mdn last, since the other dnodes have holds on it */ 2941596Sahrens list_remove(&osi->os_dnodes, osi->os_meta_dnode); 2951596Sahrens list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 2961544Seschrock 2971544Seschrock /* 2981596Sahrens * Find the first dnode with holds. We have to do this dance 2991596Sahrens * because dnode_add_ref() only works if you already have a 3001596Sahrens * hold. If there are no holds then it has no dbufs so OK to 3011596Sahrens * skip. 3021544Seschrock */ 3031596Sahrens for (dn = list_head(&osi->os_dnodes); 3041596Sahrens dn && refcount_is_zero(&dn->dn_holds); 3051596Sahrens dn = list_next(&osi->os_dnodes, dn)) 3061596Sahrens continue; 3071596Sahrens if (dn) 3081596Sahrens dnode_add_ref(dn, FTAG); 3091596Sahrens 3101596Sahrens while (dn) { 3111596Sahrens dnode_t *next_dn = dn; 3121596Sahrens 3131596Sahrens do { 3141596Sahrens next_dn = list_next(&osi->os_dnodes, next_dn); 3151596Sahrens } while (next_dn && refcount_is_zero(&next_dn->dn_holds)); 3161596Sahrens if (next_dn) 3171596Sahrens dnode_add_ref(next_dn, FTAG); 3181596Sahrens 3191596Sahrens mutex_exit(&osi->os_lock); 3201646Sperrin if (dnode_evict_dbufs(dn, try)) { 3211646Sperrin dnode_rele(dn, FTAG); 3221646Sperrin if (next_dn) 3231646Sperrin dnode_rele(next_dn, FTAG); 3241646Sperrin return (1); 3251646Sperrin } 3261596Sahrens dnode_rele(dn, FTAG); 3271596Sahrens mutex_enter(&osi->os_lock); 3281596Sahrens dn = next_dn; 3291544Seschrock } 3301544Seschrock mutex_exit(&osi->os_lock); 3311646Sperrin return (0); 3321544Seschrock } 3331544Seschrock 3341544Seschrock void 335789Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 336789Sahrens { 337789Sahrens objset_impl_t *osi = arg; 3381544Seschrock objset_t os; 3392082Seschrock int i; 340789Sahrens 341789Sahrens for (i = 0; i < TXG_SIZE; i++) { 342789Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 343789Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 344789Sahrens } 345789Sahrens 3462082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 3472082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "checksum", 3482082Seschrock checksum_changed_cb, osi)); 3492082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "compression", 3502082Seschrock compression_changed_cb, osi)); 351789Sahrens } 352789Sahrens 3531544Seschrock /* 3541544Seschrock * We should need only a single pass over the dnode list, since 3551544Seschrock * nothing can be added to the list at this point. 3561544Seschrock */ 3571544Seschrock os.os = osi; 3581646Sperrin (void) dmu_objset_evict_dbufs(&os, 0); 3591544Seschrock 360789Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 361789Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 362789Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 363789Sahrens 364789Sahrens dnode_special_close(osi->os_meta_dnode); 365789Sahrens zil_free(osi->os_zil); 366789Sahrens 367*3547Smaybee VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); 3682856Snd150628 mutex_destroy(&osi->os_lock); 3692856Snd150628 mutex_destroy(&osi->os_obj_lock); 370789Sahrens kmem_free(osi, sizeof (objset_impl_t)); 371789Sahrens } 372789Sahrens 373789Sahrens /* called from dsl for meta-objset */ 374789Sahrens objset_impl_t * 375*3547Smaybee dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 376*3547Smaybee dmu_objset_type_t type, dmu_tx_t *tx) 377789Sahrens { 378789Sahrens objset_impl_t *osi; 379789Sahrens dnode_t *mdn; 380789Sahrens 381789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 382*3547Smaybee VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi)); 383789Sahrens mdn = osi->os_meta_dnode; 384789Sahrens 385789Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 386789Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 387789Sahrens 388789Sahrens /* 389789Sahrens * We don't want to have to increase the meta-dnode's nlevels 390789Sahrens * later, because then we could do it in quescing context while 391789Sahrens * we are also accessing it in open context. 392789Sahrens * 393789Sahrens * This precaution is not necessary for the MOS (ds == NULL), 394789Sahrens * because the MOS is only updated in syncing context. 395789Sahrens * This is most fortunate: the MOS is the only objset that 396789Sahrens * needs to be synced multiple times as spa_sync() iterates 397789Sahrens * to convergence, so minimizing its dn_nlevels matters. 398789Sahrens */ 3991544Seschrock if (ds != NULL) { 4001544Seschrock int levels = 1; 4011544Seschrock 4021544Seschrock /* 4031544Seschrock * Determine the number of levels necessary for the meta-dnode 4041544Seschrock * to contain DN_MAX_OBJECT dnodes. 4051544Seschrock */ 4061544Seschrock while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 4071544Seschrock (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 4081544Seschrock DN_MAX_OBJECT * sizeof (dnode_phys_t)) 4091544Seschrock levels++; 4101544Seschrock 411789Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 4121544Seschrock mdn->dn_nlevels = levels; 4131544Seschrock } 414789Sahrens 415789Sahrens ASSERT(type != DMU_OST_NONE); 416789Sahrens ASSERT(type != DMU_OST_ANY); 417789Sahrens ASSERT(type < DMU_OST_NUMTYPES); 418789Sahrens osi->os_phys->os_type = type; 419789Sahrens 420789Sahrens dsl_dataset_dirty(ds, tx); 421789Sahrens 422789Sahrens return (osi); 423789Sahrens } 424789Sahrens 425789Sahrens struct oscarg { 426789Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 427789Sahrens void *userarg; 428789Sahrens dsl_dataset_t *clone_parent; 429789Sahrens const char *lastname; 430789Sahrens dmu_objset_type_t type; 431789Sahrens }; 432789Sahrens 4332199Sahrens /* ARGSUSED */ 434789Sahrens static int 4352199Sahrens dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 436789Sahrens { 4372199Sahrens dsl_dir_t *dd = arg1; 4382199Sahrens struct oscarg *oa = arg2; 4392199Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 4402199Sahrens int err; 4412199Sahrens uint64_t ddobj; 4422199Sahrens 4432199Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 4442199Sahrens oa->lastname, sizeof (uint64_t), 1, &ddobj); 4452199Sahrens if (err != ENOENT) 4462199Sahrens return (err ? err : EEXIST); 4472199Sahrens 4482199Sahrens if (oa->clone_parent != NULL) { 4492199Sahrens /* 4502199Sahrens * You can't clone across pools. 4512199Sahrens */ 4522199Sahrens if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) 4532199Sahrens return (EXDEV); 4542199Sahrens 4552199Sahrens /* 4562199Sahrens * You can only clone snapshots, not the head datasets. 4572199Sahrens */ 4582199Sahrens if (oa->clone_parent->ds_phys->ds_num_children == 0) 4592199Sahrens return (EINVAL); 4602199Sahrens } 4612199Sahrens return (0); 4622199Sahrens } 4632199Sahrens 4642199Sahrens static void 4652199Sahrens dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 4662199Sahrens { 4672199Sahrens dsl_dir_t *dd = arg1; 4682199Sahrens struct oscarg *oa = arg2; 469789Sahrens dsl_dataset_t *ds; 470*3547Smaybee blkptr_t *bp; 4712199Sahrens uint64_t dsobj; 472789Sahrens 473789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 474789Sahrens 4752199Sahrens dsobj = dsl_dataset_create_sync(dd, oa->lastname, 476789Sahrens oa->clone_parent, tx); 477789Sahrens 4782199Sahrens VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 4791544Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); 480*3547Smaybee bp = dsl_dataset_get_blkptr(ds); 481*3547Smaybee if (BP_IS_HOLE(bp)) { 482789Sahrens objset_impl_t *osi; 483789Sahrens 484789Sahrens /* This is an empty dmu_objset; not a clone. */ 485789Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 486*3547Smaybee ds, bp, oa->type, tx); 487789Sahrens 488789Sahrens if (oa->userfunc) 489789Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 490789Sahrens } 491789Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 492789Sahrens } 493789Sahrens 494789Sahrens int 495789Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 496789Sahrens objset_t *clone_parent, 497789Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 498789Sahrens { 4992199Sahrens dsl_dir_t *pdd; 500789Sahrens const char *tail; 501789Sahrens int err = 0; 5022199Sahrens struct oscarg oa = { 0 }; 503789Sahrens 5042199Sahrens ASSERT(strchr(name, '@') == NULL); 5052199Sahrens err = dsl_dir_open(name, FTAG, &pdd, &tail); 5061544Seschrock if (err) 5071544Seschrock return (err); 508789Sahrens if (tail == NULL) { 5092199Sahrens dsl_dir_close(pdd, FTAG); 510789Sahrens return (EEXIST); 511789Sahrens } 512789Sahrens 513789Sahrens dprintf("name=%s\n", name); 514789Sahrens 5152199Sahrens oa.userfunc = func; 5162199Sahrens oa.userarg = arg; 5172199Sahrens oa.lastname = tail; 5182199Sahrens oa.type = type; 5192199Sahrens if (clone_parent != NULL) { 520789Sahrens /* 5212199Sahrens * You can't clone to a different type. 522789Sahrens */ 5232199Sahrens if (clone_parent->os->os_phys->os_type != type) { 5242199Sahrens dsl_dir_close(pdd, FTAG); 5252199Sahrens return (EINVAL); 526789Sahrens } 5272199Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 528789Sahrens } 5292199Sahrens err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 5302199Sahrens dmu_objset_create_sync, pdd, &oa, 5); 5312199Sahrens dsl_dir_close(pdd, FTAG); 532789Sahrens return (err); 533789Sahrens } 534789Sahrens 535789Sahrens int 536789Sahrens dmu_objset_destroy(const char *name) 537789Sahrens { 538789Sahrens objset_t *os; 539789Sahrens int error; 540789Sahrens 541789Sahrens /* 542789Sahrens * If it looks like we'll be able to destroy it, and there's 543789Sahrens * an unplayed replay log sitting around, destroy the log. 544789Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 545789Sahrens * but the replay log objset is modified in open context. 546789Sahrens */ 547789Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 548789Sahrens if (error == 0) { 5491807Sbonwick zil_destroy(dmu_objset_zil(os), B_FALSE); 550789Sahrens dmu_objset_close(os); 551789Sahrens } 552789Sahrens 553789Sahrens return (dsl_dataset_destroy(name)); 554789Sahrens } 555789Sahrens 556789Sahrens int 557789Sahrens dmu_objset_rollback(const char *name) 558789Sahrens { 559789Sahrens int err; 560789Sahrens objset_t *os; 561789Sahrens 5622199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, 5632199Sahrens DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 564789Sahrens if (err == 0) { 565789Sahrens err = zil_suspend(dmu_objset_zil(os)); 566789Sahrens if (err == 0) 567789Sahrens zil_resume(dmu_objset_zil(os)); 568789Sahrens if (err == 0) { 569789Sahrens /* XXX uncache everything? */ 5702199Sahrens err = dsl_dataset_rollback(os->os->os_dsl_dataset); 571789Sahrens } 5722199Sahrens dmu_objset_close(os); 573789Sahrens } 574789Sahrens return (err); 575789Sahrens } 576789Sahrens 5772199Sahrens struct snaparg { 5782199Sahrens dsl_sync_task_group_t *dstg; 5792199Sahrens char *snapname; 5802199Sahrens char failed[MAXPATHLEN]; 5812199Sahrens }; 5822199Sahrens 5832199Sahrens static int 5842199Sahrens dmu_objset_snapshot_one(char *name, void *arg) 5852199Sahrens { 5862199Sahrens struct snaparg *sn = arg; 5872199Sahrens objset_t *os; 5882199Sahrens int err; 5892199Sahrens 5902199Sahrens (void) strcpy(sn->failed, name); 5912199Sahrens 5922199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); 5932199Sahrens if (err != 0) 5942199Sahrens return (err); 5952199Sahrens 5962199Sahrens /* 5972199Sahrens * NB: we need to wait for all in-flight changes to get to disk, 5982199Sahrens * so that we snapshot those changes. zil_suspend does this as 5992199Sahrens * a side effect. 6002199Sahrens */ 6012199Sahrens err = zil_suspend(dmu_objset_zil(os)); 6022199Sahrens if (err == 0) { 6032199Sahrens dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, 6042199Sahrens dsl_dataset_snapshot_sync, os, sn->snapname, 3); 6052199Sahrens } 6062199Sahrens return (err); 6072199Sahrens } 6082199Sahrens 6092199Sahrens int 6102199Sahrens dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) 6112199Sahrens { 6122199Sahrens dsl_sync_task_t *dst; 6132199Sahrens struct snaparg sn = { 0 }; 6142199Sahrens char *cp; 6152199Sahrens spa_t *spa; 6162199Sahrens int err; 6172199Sahrens 6182199Sahrens (void) strcpy(sn.failed, fsname); 6192199Sahrens 6202199Sahrens cp = strchr(fsname, '/'); 6212199Sahrens if (cp) { 6222199Sahrens *cp = '\0'; 6232199Sahrens err = spa_open(fsname, &spa, FTAG); 6242199Sahrens *cp = '/'; 6252199Sahrens } else { 6262199Sahrens err = spa_open(fsname, &spa, FTAG); 6272199Sahrens } 6282199Sahrens if (err) 6292199Sahrens return (err); 6302199Sahrens 6312199Sahrens sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 6322199Sahrens sn.snapname = snapname; 6332199Sahrens 6342417Sahrens if (recursive) { 6352417Sahrens err = dmu_objset_find(fsname, 6362417Sahrens dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 6372417Sahrens } else { 6382199Sahrens err = dmu_objset_snapshot_one(fsname, &sn); 6392417Sahrens } 6402199Sahrens 6412199Sahrens if (err) 6422199Sahrens goto out; 6432199Sahrens 6442199Sahrens err = dsl_sync_task_group_wait(sn.dstg); 6452199Sahrens 6462199Sahrens for (dst = list_head(&sn.dstg->dstg_tasks); dst; 6472199Sahrens dst = list_next(&sn.dstg->dstg_tasks, dst)) { 6482199Sahrens objset_t *os = dst->dst_arg1; 6492199Sahrens if (dst->dst_err) 6502199Sahrens dmu_objset_name(os, sn.failed); 6512199Sahrens zil_resume(dmu_objset_zil(os)); 6522199Sahrens dmu_objset_close(os); 6532199Sahrens } 6542199Sahrens out: 6552199Sahrens if (err) 6562199Sahrens (void) strcpy(fsname, sn.failed); 6572199Sahrens dsl_sync_task_group_destroy(sn.dstg); 6582199Sahrens spa_close(spa, FTAG); 6592199Sahrens return (err); 6602199Sahrens } 6612199Sahrens 662789Sahrens static void 663*3547Smaybee dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx) 664789Sahrens { 665*3547Smaybee dnode_t *dn; 666789Sahrens 667*3547Smaybee while (dn = list_head(list)) { 668*3547Smaybee ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 669*3547Smaybee ASSERT(dn->dn_dbuf->db_data_pending); 670*3547Smaybee /* 671*3547Smaybee * Initialize dn_zio outside dnode_sync() 672*3547Smaybee * to accomodate meta-dnode 673*3547Smaybee */ 674*3547Smaybee dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 675*3547Smaybee ASSERT(dn->dn_zio); 676789Sahrens 677*3547Smaybee ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 678*3547Smaybee list_remove(list, dn); 679*3547Smaybee dnode_sync(dn, tx); 680*3547Smaybee } 681*3547Smaybee } 6822981Sahrens 683*3547Smaybee /* ARGSUSED */ 684*3547Smaybee static void 685*3547Smaybee ready(zio_t *zio, arc_buf_t *abuf, void *arg) 686*3547Smaybee { 687*3547Smaybee objset_impl_t *os = arg; 688*3547Smaybee blkptr_t *bp = os->os_rootbp; 689*3547Smaybee dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 690*3547Smaybee int i; 6912981Sahrens 692*3547Smaybee /* 693*3547Smaybee * Update rootbp fill count. 694*3547Smaybee */ 695*3547Smaybee bp->blk_fill = 1; /* count the meta-dnode */ 696*3547Smaybee for (i = 0; i < dnp->dn_nblkptr; i++) 697*3547Smaybee bp->blk_fill += dnp->dn_blkptr[i].blk_fill; 698789Sahrens } 699789Sahrens 700789Sahrens /* ARGSUSED */ 701789Sahrens static void 702789Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 703789Sahrens { 704789Sahrens objset_impl_t *os = arg; 705789Sahrens 706789Sahrens ASSERT3U(zio->io_error, ==, 0); 707789Sahrens 708789Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 709789Sahrens BP_SET_LEVEL(zio->io_bp, 0); 710789Sahrens 711789Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 712789Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 713*3547Smaybee if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg) 714*3547Smaybee dsl_dataset_block_kill(os->os_dsl_dataset, 715*3547Smaybee &zio->io_bp_orig, NULL, os->os_synctx); 716789Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 717789Sahrens os->os_synctx); 718789Sahrens } 719*3547Smaybee arc_release(os->os_phys_buf, &os->os_phys_buf); 720*3547Smaybee 721*3547Smaybee if (os->os_dsl_dataset) 722*3547Smaybee dmu_buf_rele(os->os_dsl_dataset->ds_dbuf, os->os_dsl_dataset); 723789Sahrens } 724789Sahrens 725789Sahrens /* called from dsl */ 726789Sahrens void 727*3547Smaybee dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) 728789Sahrens { 729789Sahrens int txgoff; 7301544Seschrock zbookmark_t zb; 731*3547Smaybee zio_t *zio; 732*3547Smaybee list_t *list; 733*3547Smaybee dbuf_dirty_record_t *dr; 734*3547Smaybee 735*3547Smaybee dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 736789Sahrens 737789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 738789Sahrens /* XXX the write_done callback should really give us the tx... */ 739789Sahrens os->os_synctx = tx; 740789Sahrens 741*3547Smaybee /* 742*3547Smaybee * Create the root block IO 743*3547Smaybee */ 744*3547Smaybee zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 745*3547Smaybee zb.zb_object = 0; 746*3547Smaybee zb.zb_level = -1; 747*3547Smaybee zb.zb_blkid = 0; 748*3547Smaybee if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) 749*3547Smaybee dsl_dataset_block_kill(os->os_dsl_dataset, 750*3547Smaybee os->os_rootbp, pio, tx); 751*3547Smaybee zio = arc_write(pio, os->os_spa, os->os_md_checksum, 752*3547Smaybee os->os_md_compress, 753*3547Smaybee dmu_get_replication_level(os->os_spa, &zb, DMU_OT_OBJSET), 754*3547Smaybee tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, killer, os, 755*3547Smaybee ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 756*3547Smaybee 757*3547Smaybee /* 758*3547Smaybee * Sync meta-dnode - the parent IO for the sync is the root block 759*3547Smaybee */ 760*3547Smaybee os->os_meta_dnode->dn_zio = zio; 761*3547Smaybee dnode_sync(os->os_meta_dnode, tx); 762789Sahrens 763789Sahrens txgoff = tx->tx_txg & TXG_MASK; 764789Sahrens 765*3547Smaybee dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx); 766*3547Smaybee dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx); 767789Sahrens 768*3547Smaybee list = &os->os_meta_dnode->dn_dirty_records[txgoff]; 769*3547Smaybee while (dr = list_head(list)) { 770*3547Smaybee ASSERT(dr->dr_dbuf->db_level == 0); 771*3547Smaybee list_remove(list, dr); 772*3547Smaybee if (dr->dr_zio) 773*3547Smaybee zio_nowait(dr->dr_zio); 774*3547Smaybee } 775789Sahrens /* 776789Sahrens * Free intent log blocks up to this tx. 777789Sahrens */ 778789Sahrens zil_sync(os->os_zil, tx); 779*3547Smaybee zio_nowait(zio); 780789Sahrens } 781789Sahrens 782789Sahrens void 7832885Sahrens dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 7842885Sahrens uint64_t *usedobjsp, uint64_t *availobjsp) 7852885Sahrens { 7862885Sahrens dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp, 7872885Sahrens usedobjsp, availobjsp); 7882885Sahrens } 7892885Sahrens 7902885Sahrens uint64_t 7912885Sahrens dmu_objset_fsid_guid(objset_t *os) 7922885Sahrens { 7932885Sahrens return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset)); 7942885Sahrens } 7952885Sahrens 7962885Sahrens void 7972885Sahrens dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 798789Sahrens { 7992885Sahrens stat->dds_type = os->os->os_phys->os_type; 8002885Sahrens if (os->os->os_dsl_dataset) 8012885Sahrens dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat); 8022885Sahrens } 8032885Sahrens 8042885Sahrens void 8052885Sahrens dmu_objset_stats(objset_t *os, nvlist_t *nv) 8062885Sahrens { 8072885Sahrens ASSERT(os->os->os_dsl_dataset || 8082885Sahrens os->os->os_phys->os_type == DMU_OST_META); 8092885Sahrens 8102885Sahrens if (os->os->os_dsl_dataset != NULL) 8112885Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, nv); 8122885Sahrens 8132885Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 8142885Sahrens os->os->os_phys->os_type); 815789Sahrens } 816789Sahrens 817789Sahrens int 818789Sahrens dmu_objset_is_snapshot(objset_t *os) 819789Sahrens { 820789Sahrens if (os->os->os_dsl_dataset != NULL) 821789Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 822789Sahrens else 823789Sahrens return (B_FALSE); 824789Sahrens } 825789Sahrens 826789Sahrens int 827789Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 828885Sahrens uint64_t *idp, uint64_t *offp) 829789Sahrens { 830789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 831789Sahrens zap_cursor_t cursor; 832789Sahrens zap_attribute_t attr; 833789Sahrens 834789Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 835789Sahrens return (ENOENT); 836789Sahrens 837789Sahrens zap_cursor_init_serialized(&cursor, 838789Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 839789Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 840789Sahrens 841885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 842885Sahrens zap_cursor_fini(&cursor); 843885Sahrens return (ENOENT); 844885Sahrens } 845885Sahrens 846885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 847885Sahrens zap_cursor_fini(&cursor); 848885Sahrens return (ENAMETOOLONG); 849885Sahrens } 850885Sahrens 851885Sahrens (void) strcpy(name, attr.za_name); 852885Sahrens if (idp) 853885Sahrens *idp = attr.za_first_integer; 854885Sahrens zap_cursor_advance(&cursor); 855885Sahrens *offp = zap_cursor_serialize(&cursor); 856885Sahrens zap_cursor_fini(&cursor); 857885Sahrens 858885Sahrens return (0); 859885Sahrens } 860885Sahrens 861885Sahrens int 862885Sahrens dmu_dir_list_next(objset_t *os, int namelen, char *name, 863885Sahrens uint64_t *idp, uint64_t *offp) 864885Sahrens { 865885Sahrens dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 866885Sahrens zap_cursor_t cursor; 867885Sahrens zap_attribute_t attr; 868885Sahrens 869885Sahrens /* there is no next dir on a snapshot! */ 870885Sahrens if (os->os->os_dsl_dataset->ds_object != 871885Sahrens dd->dd_phys->dd_head_dataset_obj) 872885Sahrens return (ENOENT); 873885Sahrens 874885Sahrens zap_cursor_init_serialized(&cursor, 875885Sahrens dd->dd_pool->dp_meta_objset, 876885Sahrens dd->dd_phys->dd_child_dir_zapobj, *offp); 877885Sahrens 878885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 879885Sahrens zap_cursor_fini(&cursor); 880885Sahrens return (ENOENT); 881885Sahrens } 882885Sahrens 883885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 884885Sahrens zap_cursor_fini(&cursor); 885789Sahrens return (ENAMETOOLONG); 886885Sahrens } 887789Sahrens 888789Sahrens (void) strcpy(name, attr.za_name); 889885Sahrens if (idp) 890885Sahrens *idp = attr.za_first_integer; 891789Sahrens zap_cursor_advance(&cursor); 892789Sahrens *offp = zap_cursor_serialize(&cursor); 893885Sahrens zap_cursor_fini(&cursor); 894789Sahrens 895789Sahrens return (0); 896789Sahrens } 897789Sahrens 898789Sahrens /* 899789Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 900789Sahrens */ 9012199Sahrens int 9022199Sahrens dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 903789Sahrens { 904789Sahrens dsl_dir_t *dd; 905789Sahrens objset_t *os; 906789Sahrens uint64_t snapobj; 907789Sahrens zap_cursor_t zc; 908789Sahrens zap_attribute_t attr; 909789Sahrens char *child; 9101544Seschrock int do_self, err; 911789Sahrens 9121544Seschrock err = dsl_dir_open(name, FTAG, &dd, NULL); 9131544Seschrock if (err) 9142199Sahrens return (err); 915789Sahrens 9162199Sahrens /* NB: the $MOS dir doesn't have a head dataset */ 917789Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 918789Sahrens 919789Sahrens /* 920789Sahrens * Iterate over all children. 921789Sahrens */ 9222417Sahrens if (flags & DS_FIND_CHILDREN) { 9232417Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 9242417Sahrens dd->dd_phys->dd_child_dir_zapobj); 9252417Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 9262417Sahrens (void) zap_cursor_advance(&zc)) { 9272417Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 9282417Sahrens ASSERT(attr.za_num_integers == 1); 929789Sahrens 9302417Sahrens /* 9312417Sahrens * No separating '/' because parent's name ends in /. 9322417Sahrens */ 9332417Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 9342417Sahrens /* XXX could probably just use name here */ 9352417Sahrens dsl_dir_name(dd, child); 9362417Sahrens (void) strcat(child, "/"); 9372417Sahrens (void) strcat(child, attr.za_name); 9382417Sahrens err = dmu_objset_find(child, func, arg, flags); 9392417Sahrens kmem_free(child, MAXPATHLEN); 9402417Sahrens if (err) 9412417Sahrens break; 9422417Sahrens } 9432417Sahrens zap_cursor_fini(&zc); 9442199Sahrens 9452417Sahrens if (err) { 9462417Sahrens dsl_dir_close(dd, FTAG); 9472417Sahrens return (err); 9482417Sahrens } 949789Sahrens } 950789Sahrens 951789Sahrens /* 952789Sahrens * Iterate over all snapshots. 953789Sahrens */ 954789Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 955789Sahrens dmu_objset_open(name, DMU_OST_ANY, 956789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 957789Sahrens 958789Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 959789Sahrens dmu_objset_close(os); 960789Sahrens 961789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 962789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 963789Sahrens (void) zap_cursor_advance(&zc)) { 964789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 965789Sahrens ASSERT(attr.za_num_integers == 1); 966789Sahrens 967789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 968789Sahrens /* XXX could probably just use name here */ 969789Sahrens dsl_dir_name(dd, child); 970789Sahrens (void) strcat(child, "@"); 971789Sahrens (void) strcat(child, attr.za_name); 9722199Sahrens err = func(child, arg); 973789Sahrens kmem_free(child, MAXPATHLEN); 9742199Sahrens if (err) 9752199Sahrens break; 976789Sahrens } 977885Sahrens zap_cursor_fini(&zc); 978789Sahrens } 979789Sahrens 980789Sahrens dsl_dir_close(dd, FTAG); 981789Sahrens 9822199Sahrens if (err) 9832199Sahrens return (err); 9842199Sahrens 985789Sahrens /* 986789Sahrens * Apply to self if appropriate. 987789Sahrens */ 988789Sahrens if (do_self) 9892199Sahrens err = func(name, arg); 9902199Sahrens return (err); 991789Sahrens } 992