1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221544Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/dmu_objset.h> 30789Sahrens #include <sys/dsl_dir.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/dsl_pool.h> 34*2199Sahrens #include <sys/dsl_synctask.h> 35789Sahrens #include <sys/dnode.h> 36789Sahrens #include <sys/dbuf.h> 37789Sahrens #include <sys/dmu_tx.h> 38789Sahrens #include <sys/zio_checksum.h> 39789Sahrens #include <sys/zap.h> 40789Sahrens #include <sys/zil.h> 41789Sahrens #include <sys/dmu_impl.h> 42789Sahrens 43789Sahrens 44789Sahrens spa_t * 45789Sahrens dmu_objset_spa(objset_t *os) 46789Sahrens { 47789Sahrens return (os->os->os_spa); 48789Sahrens } 49789Sahrens 50789Sahrens zilog_t * 51789Sahrens dmu_objset_zil(objset_t *os) 52789Sahrens { 53789Sahrens return (os->os->os_zil); 54789Sahrens } 55789Sahrens 56789Sahrens dsl_pool_t * 57789Sahrens dmu_objset_pool(objset_t *os) 58789Sahrens { 59789Sahrens dsl_dataset_t *ds; 60789Sahrens 61789Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 62789Sahrens return (ds->ds_dir->dd_pool); 63789Sahrens else 64789Sahrens return (spa_get_dsl(os->os->os_spa)); 65789Sahrens } 66789Sahrens 67789Sahrens dsl_dataset_t * 68789Sahrens dmu_objset_ds(objset_t *os) 69789Sahrens { 70789Sahrens return (os->os->os_dsl_dataset); 71789Sahrens } 72789Sahrens 73789Sahrens dmu_objset_type_t 74789Sahrens dmu_objset_type(objset_t *os) 75789Sahrens { 76789Sahrens return (os->os->os_phys->os_type); 77789Sahrens } 78789Sahrens 79789Sahrens void 80789Sahrens dmu_objset_name(objset_t *os, char *buf) 81789Sahrens { 82789Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 83789Sahrens } 84789Sahrens 85789Sahrens uint64_t 86789Sahrens dmu_objset_id(objset_t *os) 87789Sahrens { 88789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 89789Sahrens 90789Sahrens return (ds ? ds->ds_object : 0); 91789Sahrens } 92789Sahrens 93789Sahrens static void 94789Sahrens checksum_changed_cb(void *arg, uint64_t newval) 95789Sahrens { 96789Sahrens objset_impl_t *osi = arg; 97789Sahrens 98789Sahrens /* 99789Sahrens * Inheritance should have been done by now. 100789Sahrens */ 101789Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 102789Sahrens 103789Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 104789Sahrens } 105789Sahrens 106789Sahrens static void 107789Sahrens compression_changed_cb(void *arg, uint64_t newval) 108789Sahrens { 109789Sahrens objset_impl_t *osi = arg; 110789Sahrens 111789Sahrens /* 112789Sahrens * Inheritance and range checking should have been done by now. 113789Sahrens */ 114789Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 115789Sahrens 116789Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 117789Sahrens } 118789Sahrens 119789Sahrens void 120789Sahrens dmu_objset_byteswap(void *buf, size_t size) 121789Sahrens { 122789Sahrens objset_phys_t *osp = buf; 123789Sahrens 124789Sahrens ASSERT(size == sizeof (objset_phys_t)); 125789Sahrens dnode_byteswap(&osp->os_meta_dnode); 126789Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 127789Sahrens osp->os_type = BSWAP_64(osp->os_type); 128789Sahrens } 129789Sahrens 1301544Seschrock int 1311544Seschrock dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 1321544Seschrock objset_impl_t **osip) 133789Sahrens { 134789Sahrens objset_impl_t *winner, *osi; 135789Sahrens int i, err, checksum; 136789Sahrens 137789Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 138789Sahrens osi->os.os = osi; 139789Sahrens osi->os_dsl_dataset = ds; 140789Sahrens osi->os_spa = spa; 141789Sahrens if (bp) 142789Sahrens osi->os_rootbp = *bp; 143789Sahrens osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); 144789Sahrens if (!BP_IS_HOLE(&osi->os_rootbp)) { 1451544Seschrock zbookmark_t zb; 1461544Seschrock zb.zb_objset = ds ? ds->ds_object : 0; 1471544Seschrock zb.zb_object = 0; 1481544Seschrock zb.zb_level = -1; 1491544Seschrock zb.zb_blkid = 0; 1501544Seschrock 151789Sahrens dprintf_bp(&osi->os_rootbp, "reading %s", ""); 1521544Seschrock err = arc_read(NULL, spa, &osi->os_rootbp, 153789Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 154789Sahrens arc_bcopy_func, osi->os_phys, 1551544Seschrock ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, ARC_WAIT, &zb); 1561544Seschrock if (err) { 1571544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1581544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1591544Seschrock return (err); 1601544Seschrock } 161789Sahrens } else { 162789Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 163789Sahrens } 164789Sahrens 165789Sahrens /* 166789Sahrens * Note: the changed_cb will be called once before the register 167789Sahrens * func returns, thus changing the checksum/compression from the 1682082Seschrock * default (fletcher2/off). Snapshots don't need to know, and 1692082Seschrock * registering would complicate clone promotion. 170789Sahrens */ 1712082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 172789Sahrens err = dsl_prop_register(ds, "checksum", 173789Sahrens checksum_changed_cb, osi); 1741544Seschrock if (err == 0) 1751544Seschrock err = dsl_prop_register(ds, "compression", 1761544Seschrock compression_changed_cb, osi); 1771544Seschrock if (err) { 1781544Seschrock zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 1791544Seschrock kmem_free(osi, sizeof (objset_impl_t)); 1801544Seschrock return (err); 1811544Seschrock } 1822082Seschrock } else if (ds == NULL) { 183789Sahrens /* It's the meta-objset. */ 184789Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 1851544Seschrock osi->os_compress = ZIO_COMPRESS_LZJB; 186789Sahrens } 187789Sahrens 1881544Seschrock osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 1891544Seschrock 190789Sahrens /* 191789Sahrens * Metadata always gets compressed and checksummed. 192789Sahrens * If the data checksum is multi-bit correctable, and it's not 193789Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 194789Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 195789Sahrens */ 196789Sahrens checksum = osi->os_checksum; 197789Sahrens 198789Sahrens if (zio_checksum_table[checksum].ci_correctable && 199789Sahrens !zio_checksum_table[checksum].ci_zbt) 200789Sahrens osi->os_md_checksum = checksum; 201789Sahrens else 202789Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 2031544Seschrock osi->os_md_compress = ZIO_COMPRESS_LZJB; 204789Sahrens 205789Sahrens for (i = 0; i < TXG_SIZE; i++) { 206789Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 207789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 208789Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 209789Sahrens offsetof(dnode_t, dn_dirty_link[i])); 210789Sahrens } 211789Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 212789Sahrens offsetof(dnode_t, dn_link)); 213789Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 214789Sahrens offsetof(dmu_buf_impl_t, db_link)); 215789Sahrens 216789Sahrens osi->os_meta_dnode = dnode_special_open(osi, 217789Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 218789Sahrens 219789Sahrens if (ds != NULL) { 220789Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 221789Sahrens if (winner) { 222789Sahrens dmu_objset_evict(ds, osi); 223789Sahrens osi = winner; 224789Sahrens } 225789Sahrens } 226789Sahrens 2271544Seschrock *osip = osi; 2281544Seschrock return (0); 229789Sahrens } 230789Sahrens 231789Sahrens /* called from zpl */ 232789Sahrens int 233789Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 234789Sahrens objset_t **osp) 235789Sahrens { 236789Sahrens dsl_dataset_t *ds; 237789Sahrens int err; 238789Sahrens objset_t *os; 239789Sahrens objset_impl_t *osi; 240789Sahrens 241789Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 242789Sahrens err = dsl_dataset_open(name, mode, os, &ds); 243789Sahrens if (err) { 244789Sahrens kmem_free(os, sizeof (objset_t)); 245789Sahrens return (err); 246789Sahrens } 247789Sahrens 248789Sahrens osi = dsl_dataset_get_user_ptr(ds); 249789Sahrens if (osi == NULL) { 250789Sahrens blkptr_t bp; 251789Sahrens 252789Sahrens dsl_dataset_get_blkptr(ds, &bp); 2531544Seschrock err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 2541544Seschrock ds, &bp, &osi); 2551544Seschrock if (err) { 2561544Seschrock dsl_dataset_close(ds, mode, os); 2571544Seschrock kmem_free(os, sizeof (objset_t)); 2581544Seschrock return (err); 2591544Seschrock } 260789Sahrens } 261789Sahrens 262789Sahrens os->os = osi; 263789Sahrens os->os_mode = mode; 264789Sahrens 265789Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 266789Sahrens dmu_objset_close(os); 267789Sahrens return (EINVAL); 268789Sahrens } 269789Sahrens *osp = os; 270789Sahrens return (0); 271789Sahrens } 272789Sahrens 273789Sahrens void 274789Sahrens dmu_objset_close(objset_t *os) 275789Sahrens { 276789Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 277789Sahrens kmem_free(os, sizeof (objset_t)); 278789Sahrens } 279789Sahrens 2801646Sperrin int 2811646Sperrin dmu_objset_evict_dbufs(objset_t *os, int try) 2821544Seschrock { 2831544Seschrock objset_impl_t *osi = os->os; 2841544Seschrock dnode_t *dn; 2851596Sahrens 2861596Sahrens mutex_enter(&osi->os_lock); 2871596Sahrens 2881596Sahrens /* process the mdn last, since the other dnodes have holds on it */ 2891596Sahrens list_remove(&osi->os_dnodes, osi->os_meta_dnode); 2901596Sahrens list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 2911544Seschrock 2921544Seschrock /* 2931596Sahrens * Find the first dnode with holds. We have to do this dance 2941596Sahrens * because dnode_add_ref() only works if you already have a 2951596Sahrens * hold. If there are no holds then it has no dbufs so OK to 2961596Sahrens * skip. 2971544Seschrock */ 2981596Sahrens for (dn = list_head(&osi->os_dnodes); 2991596Sahrens dn && refcount_is_zero(&dn->dn_holds); 3001596Sahrens dn = list_next(&osi->os_dnodes, dn)) 3011596Sahrens continue; 3021596Sahrens if (dn) 3031596Sahrens dnode_add_ref(dn, FTAG); 3041596Sahrens 3051596Sahrens while (dn) { 3061596Sahrens dnode_t *next_dn = dn; 3071596Sahrens 3081596Sahrens do { 3091596Sahrens next_dn = list_next(&osi->os_dnodes, next_dn); 3101596Sahrens } while (next_dn && refcount_is_zero(&next_dn->dn_holds)); 3111596Sahrens if (next_dn) 3121596Sahrens dnode_add_ref(next_dn, FTAG); 3131596Sahrens 3141596Sahrens mutex_exit(&osi->os_lock); 3151646Sperrin if (dnode_evict_dbufs(dn, try)) { 3161646Sperrin dnode_rele(dn, FTAG); 3171646Sperrin if (next_dn) 3181646Sperrin dnode_rele(next_dn, FTAG); 3191646Sperrin return (1); 3201646Sperrin } 3211596Sahrens dnode_rele(dn, FTAG); 3221596Sahrens mutex_enter(&osi->os_lock); 3231596Sahrens dn = next_dn; 3241544Seschrock } 3251544Seschrock mutex_exit(&osi->os_lock); 3261646Sperrin return (0); 3271544Seschrock } 3281544Seschrock 3291544Seschrock void 330789Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 331789Sahrens { 332789Sahrens objset_impl_t *osi = arg; 3331544Seschrock objset_t os; 3342082Seschrock int i; 335789Sahrens 336789Sahrens for (i = 0; i < TXG_SIZE; i++) { 337789Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 338789Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 339789Sahrens } 340789Sahrens 3412082Seschrock if (ds && ds->ds_phys->ds_num_children == 0) { 3422082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "checksum", 3432082Seschrock checksum_changed_cb, osi)); 3442082Seschrock VERIFY(0 == dsl_prop_unregister(ds, "compression", 3452082Seschrock compression_changed_cb, osi)); 346789Sahrens } 347789Sahrens 3481544Seschrock /* 3491544Seschrock * We should need only a single pass over the dnode list, since 3501544Seschrock * nothing can be added to the list at this point. 3511544Seschrock */ 3521544Seschrock os.os = osi; 3531646Sperrin (void) dmu_objset_evict_dbufs(&os, 0); 3541544Seschrock 355789Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 356789Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 357789Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 358789Sahrens 359789Sahrens dnode_special_close(osi->os_meta_dnode); 360789Sahrens zil_free(osi->os_zil); 361789Sahrens 362789Sahrens zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 363789Sahrens kmem_free(osi, sizeof (objset_impl_t)); 364789Sahrens } 365789Sahrens 366789Sahrens /* called from dsl for meta-objset */ 367789Sahrens objset_impl_t * 368789Sahrens dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, 369789Sahrens dmu_tx_t *tx) 370789Sahrens { 371789Sahrens objset_impl_t *osi; 372789Sahrens dnode_t *mdn; 373789Sahrens 374789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 3751544Seschrock VERIFY(0 == dmu_objset_open_impl(spa, ds, NULL, &osi)); 376789Sahrens mdn = osi->os_meta_dnode; 377789Sahrens 378789Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 379789Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 380789Sahrens 381789Sahrens /* 382789Sahrens * We don't want to have to increase the meta-dnode's nlevels 383789Sahrens * later, because then we could do it in quescing context while 384789Sahrens * we are also accessing it in open context. 385789Sahrens * 386789Sahrens * This precaution is not necessary for the MOS (ds == NULL), 387789Sahrens * because the MOS is only updated in syncing context. 388789Sahrens * This is most fortunate: the MOS is the only objset that 389789Sahrens * needs to be synced multiple times as spa_sync() iterates 390789Sahrens * to convergence, so minimizing its dn_nlevels matters. 391789Sahrens */ 3921544Seschrock if (ds != NULL) { 3931544Seschrock int levels = 1; 3941544Seschrock 3951544Seschrock /* 3961544Seschrock * Determine the number of levels necessary for the meta-dnode 3971544Seschrock * to contain DN_MAX_OBJECT dnodes. 3981544Seschrock */ 3991544Seschrock while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 4001544Seschrock (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 4011544Seschrock DN_MAX_OBJECT * sizeof (dnode_phys_t)) 4021544Seschrock levels++; 4031544Seschrock 404789Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 4051544Seschrock mdn->dn_nlevels = levels; 4061544Seschrock } 407789Sahrens 408789Sahrens ASSERT(type != DMU_OST_NONE); 409789Sahrens ASSERT(type != DMU_OST_ANY); 410789Sahrens ASSERT(type < DMU_OST_NUMTYPES); 411789Sahrens osi->os_phys->os_type = type; 412789Sahrens 413789Sahrens dsl_dataset_dirty(ds, tx); 414789Sahrens 415789Sahrens return (osi); 416789Sahrens } 417789Sahrens 418789Sahrens struct oscarg { 419789Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 420789Sahrens void *userarg; 421789Sahrens dsl_dataset_t *clone_parent; 422789Sahrens const char *lastname; 423789Sahrens dmu_objset_type_t type; 424789Sahrens }; 425789Sahrens 426*2199Sahrens /* ARGSUSED */ 427789Sahrens static int 428*2199Sahrens dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 429789Sahrens { 430*2199Sahrens dsl_dir_t *dd = arg1; 431*2199Sahrens struct oscarg *oa = arg2; 432*2199Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 433*2199Sahrens int err; 434*2199Sahrens uint64_t ddobj; 435*2199Sahrens 436*2199Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 437*2199Sahrens oa->lastname, sizeof (uint64_t), 1, &ddobj); 438*2199Sahrens if (err != ENOENT) 439*2199Sahrens return (err ? err : EEXIST); 440*2199Sahrens 441*2199Sahrens if (oa->clone_parent != NULL) { 442*2199Sahrens /* 443*2199Sahrens * You can't clone across pools. 444*2199Sahrens */ 445*2199Sahrens if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) 446*2199Sahrens return (EXDEV); 447*2199Sahrens 448*2199Sahrens /* 449*2199Sahrens * You can only clone snapshots, not the head datasets. 450*2199Sahrens */ 451*2199Sahrens if (oa->clone_parent->ds_phys->ds_num_children == 0) 452*2199Sahrens return (EINVAL); 453*2199Sahrens } 454*2199Sahrens return (0); 455*2199Sahrens } 456*2199Sahrens 457*2199Sahrens static void 458*2199Sahrens dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 459*2199Sahrens { 460*2199Sahrens dsl_dir_t *dd = arg1; 461*2199Sahrens struct oscarg *oa = arg2; 462789Sahrens dsl_dataset_t *ds; 463789Sahrens blkptr_t bp; 464*2199Sahrens uint64_t dsobj; 465789Sahrens 466789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 467789Sahrens 468*2199Sahrens dsobj = dsl_dataset_create_sync(dd, oa->lastname, 469789Sahrens oa->clone_parent, tx); 470789Sahrens 471*2199Sahrens VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 4721544Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); 473789Sahrens dsl_dataset_get_blkptr(ds, &bp); 474789Sahrens if (BP_IS_HOLE(&bp)) { 475789Sahrens objset_impl_t *osi; 476789Sahrens 477789Sahrens /* This is an empty dmu_objset; not a clone. */ 478789Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 479789Sahrens ds, oa->type, tx); 480789Sahrens 481789Sahrens if (oa->userfunc) 482789Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 483789Sahrens } 484789Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 485789Sahrens } 486789Sahrens 487789Sahrens int 488789Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 489789Sahrens objset_t *clone_parent, 490789Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 491789Sahrens { 492*2199Sahrens dsl_dir_t *pdd; 493789Sahrens const char *tail; 494789Sahrens int err = 0; 495*2199Sahrens struct oscarg oa = { 0 }; 496789Sahrens 497*2199Sahrens ASSERT(strchr(name, '@') == NULL); 498*2199Sahrens err = dsl_dir_open(name, FTAG, &pdd, &tail); 4991544Seschrock if (err) 5001544Seschrock return (err); 501789Sahrens if (tail == NULL) { 502*2199Sahrens dsl_dir_close(pdd, FTAG); 503789Sahrens return (EEXIST); 504789Sahrens } 505789Sahrens 506789Sahrens dprintf("name=%s\n", name); 507789Sahrens 508*2199Sahrens oa.userfunc = func; 509*2199Sahrens oa.userarg = arg; 510*2199Sahrens oa.lastname = tail; 511*2199Sahrens oa.type = type; 512*2199Sahrens if (clone_parent != NULL) { 513789Sahrens /* 514*2199Sahrens * You can't clone to a different type. 515789Sahrens */ 516*2199Sahrens if (clone_parent->os->os_phys->os_type != type) { 517*2199Sahrens dsl_dir_close(pdd, FTAG); 518*2199Sahrens return (EINVAL); 519789Sahrens } 520*2199Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 521789Sahrens } 522*2199Sahrens err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 523*2199Sahrens dmu_objset_create_sync, pdd, &oa, 5); 524*2199Sahrens dsl_dir_close(pdd, FTAG); 525789Sahrens return (err); 526789Sahrens } 527789Sahrens 528789Sahrens int 529789Sahrens dmu_objset_destroy(const char *name) 530789Sahrens { 531789Sahrens objset_t *os; 532789Sahrens int error; 533789Sahrens 534789Sahrens /* 535789Sahrens * If it looks like we'll be able to destroy it, and there's 536789Sahrens * an unplayed replay log sitting around, destroy the log. 537789Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 538789Sahrens * but the replay log objset is modified in open context. 539789Sahrens */ 540789Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 541789Sahrens if (error == 0) { 5421807Sbonwick zil_destroy(dmu_objset_zil(os), B_FALSE); 543789Sahrens dmu_objset_close(os); 544789Sahrens } 545789Sahrens 546789Sahrens return (dsl_dataset_destroy(name)); 547789Sahrens } 548789Sahrens 549789Sahrens int 550789Sahrens dmu_objset_rollback(const char *name) 551789Sahrens { 552789Sahrens int err; 553789Sahrens objset_t *os; 554789Sahrens 555*2199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, 556*2199Sahrens DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 557789Sahrens if (err == 0) { 558789Sahrens err = zil_suspend(dmu_objset_zil(os)); 559789Sahrens if (err == 0) 560789Sahrens zil_resume(dmu_objset_zil(os)); 561789Sahrens if (err == 0) { 562789Sahrens /* XXX uncache everything? */ 563*2199Sahrens err = dsl_dataset_rollback(os->os->os_dsl_dataset); 564789Sahrens } 565*2199Sahrens dmu_objset_close(os); 566789Sahrens } 567789Sahrens return (err); 568789Sahrens } 569789Sahrens 570*2199Sahrens struct snaparg { 571*2199Sahrens dsl_sync_task_group_t *dstg; 572*2199Sahrens char *snapname; 573*2199Sahrens char failed[MAXPATHLEN]; 574*2199Sahrens }; 575*2199Sahrens 576*2199Sahrens static int 577*2199Sahrens dmu_objset_snapshot_one(char *name, void *arg) 578*2199Sahrens { 579*2199Sahrens struct snaparg *sn = arg; 580*2199Sahrens objset_t *os; 581*2199Sahrens int err; 582*2199Sahrens 583*2199Sahrens (void) strcpy(sn->failed, name); 584*2199Sahrens 585*2199Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); 586*2199Sahrens if (err != 0) 587*2199Sahrens return (err); 588*2199Sahrens 589*2199Sahrens /* 590*2199Sahrens * NB: we need to wait for all in-flight changes to get to disk, 591*2199Sahrens * so that we snapshot those changes. zil_suspend does this as 592*2199Sahrens * a side effect. 593*2199Sahrens */ 594*2199Sahrens err = zil_suspend(dmu_objset_zil(os)); 595*2199Sahrens if (err == 0) { 596*2199Sahrens dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, 597*2199Sahrens dsl_dataset_snapshot_sync, os, sn->snapname, 3); 598*2199Sahrens } 599*2199Sahrens return (err); 600*2199Sahrens } 601*2199Sahrens 602*2199Sahrens int 603*2199Sahrens dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) 604*2199Sahrens { 605*2199Sahrens dsl_sync_task_t *dst; 606*2199Sahrens struct snaparg sn = { 0 }; 607*2199Sahrens char *cp; 608*2199Sahrens spa_t *spa; 609*2199Sahrens int err; 610*2199Sahrens 611*2199Sahrens (void) strcpy(sn.failed, fsname); 612*2199Sahrens 613*2199Sahrens cp = strchr(fsname, '/'); 614*2199Sahrens if (cp) { 615*2199Sahrens *cp = '\0'; 616*2199Sahrens err = spa_open(fsname, &spa, FTAG); 617*2199Sahrens *cp = '/'; 618*2199Sahrens } else { 619*2199Sahrens err = spa_open(fsname, &spa, FTAG); 620*2199Sahrens } 621*2199Sahrens if (err) 622*2199Sahrens return (err); 623*2199Sahrens 624*2199Sahrens sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 625*2199Sahrens sn.snapname = snapname; 626*2199Sahrens 627*2199Sahrens if (recursive) 628*2199Sahrens err = dmu_objset_find(fsname, dmu_objset_snapshot_one, &sn, 0); 629*2199Sahrens else 630*2199Sahrens err = dmu_objset_snapshot_one(fsname, &sn); 631*2199Sahrens 632*2199Sahrens if (err) 633*2199Sahrens goto out; 634*2199Sahrens 635*2199Sahrens err = dsl_sync_task_group_wait(sn.dstg); 636*2199Sahrens 637*2199Sahrens for (dst = list_head(&sn.dstg->dstg_tasks); dst; 638*2199Sahrens dst = list_next(&sn.dstg->dstg_tasks, dst)) { 639*2199Sahrens objset_t *os = dst->dst_arg1; 640*2199Sahrens if (dst->dst_err) 641*2199Sahrens dmu_objset_name(os, sn.failed); 642*2199Sahrens zil_resume(dmu_objset_zil(os)); 643*2199Sahrens dmu_objset_close(os); 644*2199Sahrens } 645*2199Sahrens out: 646*2199Sahrens if (err) 647*2199Sahrens (void) strcpy(fsname, sn.failed); 648*2199Sahrens dsl_sync_task_group_destroy(sn.dstg); 649*2199Sahrens spa_close(spa, FTAG); 650*2199Sahrens return (err); 651*2199Sahrens } 652*2199Sahrens 653789Sahrens static void 654789Sahrens dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) 655789Sahrens { 656789Sahrens dnode_t *dn = list_head(list); 657789Sahrens int level, err; 658789Sahrens 659789Sahrens for (level = 0; dn = list_head(list); level++) { 660789Sahrens zio_t *zio; 661789Sahrens zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 662789Sahrens 663789Sahrens ASSERT3U(level, <=, DN_MAX_LEVELS); 664789Sahrens 665789Sahrens while (dn) { 666789Sahrens dnode_t *next = list_next(list, dn); 667789Sahrens 668789Sahrens list_remove(list, dn); 669789Sahrens if (dnode_sync(dn, level, zio, tx) == 0) { 670789Sahrens /* 671789Sahrens * This dnode requires syncing at higher 672789Sahrens * levels; put it back onto the list. 673789Sahrens */ 674789Sahrens if (next) 675789Sahrens list_insert_before(list, next, dn); 676789Sahrens else 677789Sahrens list_insert_tail(list, dn); 678789Sahrens } 679789Sahrens dn = next; 680789Sahrens } 681789Sahrens err = zio_wait(zio); 682789Sahrens ASSERT(err == 0); 683789Sahrens } 684789Sahrens } 685789Sahrens 686789Sahrens /* ARGSUSED */ 687789Sahrens static void 688789Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 689789Sahrens { 690789Sahrens objset_impl_t *os = arg; 691789Sahrens objset_phys_t *osphys = zio->io_data; 692789Sahrens dnode_phys_t *dnp = &osphys->os_meta_dnode; 693789Sahrens int i; 694789Sahrens 695789Sahrens ASSERT3U(zio->io_error, ==, 0); 696789Sahrens 697789Sahrens /* 698789Sahrens * Update rootbp fill count. 699789Sahrens */ 700789Sahrens os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ 701789Sahrens for (i = 0; i < dnp->dn_nblkptr; i++) 702789Sahrens os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; 703789Sahrens 704789Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 705789Sahrens BP_SET_LEVEL(zio->io_bp, 0); 706789Sahrens 707789Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 708789Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 709789Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, 710789Sahrens os->os_synctx); 711789Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 712789Sahrens os->os_synctx); 713789Sahrens } 714789Sahrens } 715789Sahrens 716789Sahrens 717789Sahrens /* called from dsl */ 718789Sahrens void 719789Sahrens dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) 720789Sahrens { 721789Sahrens extern taskq_t *dbuf_tq; 722789Sahrens int txgoff; 723789Sahrens list_t *dirty_list; 724789Sahrens int err; 7251544Seschrock zbookmark_t zb; 726789Sahrens arc_buf_t *abuf = 727789Sahrens arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); 728789Sahrens 729789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 730789Sahrens ASSERT(os->os_synctx == NULL); 731789Sahrens /* XXX the write_done callback should really give us the tx... */ 732789Sahrens os->os_synctx = tx; 733789Sahrens 734789Sahrens dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 735789Sahrens 736789Sahrens txgoff = tx->tx_txg & TXG_MASK; 737789Sahrens 738789Sahrens dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); 739789Sahrens dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); 740789Sahrens 741789Sahrens /* 742789Sahrens * Free intent log blocks up to this tx. 743789Sahrens */ 744789Sahrens zil_sync(os->os_zil, tx); 745789Sahrens 746789Sahrens /* 747789Sahrens * Sync meta-dnode 748789Sahrens */ 749789Sahrens dirty_list = &os->os_dirty_dnodes[txgoff]; 750789Sahrens ASSERT(list_head(dirty_list) == NULL); 751789Sahrens list_insert_tail(dirty_list, os->os_meta_dnode); 752789Sahrens dmu_objset_sync_dnodes(os, dirty_list, tx); 753789Sahrens 754789Sahrens /* 755789Sahrens * Sync the root block. 756789Sahrens */ 757789Sahrens bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); 7581544Seschrock zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 7591544Seschrock zb.zb_object = 0; 7601544Seschrock zb.zb_level = -1; 7611544Seschrock zb.zb_blkid = 0; 762789Sahrens err = arc_write(NULL, os->os_spa, os->os_md_checksum, 7631775Sbillm os->os_md_compress, 7641775Sbillm dmu_get_replication_level(os->os_spa, &zb, DMU_OT_OBJSET), 7651775Sbillm tx->tx_txg, &os->os_rootbp, abuf, killer, os, 7661544Seschrock ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT, &zb); 767789Sahrens ASSERT(err == 0); 7681544Seschrock VERIFY(arc_buf_remove_ref(abuf, FTAG) == 1); 769789Sahrens 770789Sahrens dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); 771789Sahrens 772789Sahrens ASSERT3P(os->os_synctx, ==, tx); 773789Sahrens taskq_wait(dbuf_tq); 774789Sahrens os->os_synctx = NULL; 775789Sahrens } 776789Sahrens 777789Sahrens void 778789Sahrens dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds) 779789Sahrens { 780789Sahrens if (os->os->os_dsl_dataset != NULL) { 781789Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, dds); 782789Sahrens } else { 783789Sahrens ASSERT(os->os->os_phys->os_type == DMU_OST_META); 784789Sahrens bzero(dds, sizeof (*dds)); 785789Sahrens } 786789Sahrens dds->dds_type = os->os->os_phys->os_type; 787789Sahrens } 788789Sahrens 789789Sahrens int 790789Sahrens dmu_objset_is_snapshot(objset_t *os) 791789Sahrens { 792789Sahrens if (os->os->os_dsl_dataset != NULL) 793789Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 794789Sahrens else 795789Sahrens return (B_FALSE); 796789Sahrens } 797789Sahrens 798789Sahrens int 799789Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 800885Sahrens uint64_t *idp, uint64_t *offp) 801789Sahrens { 802789Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 803789Sahrens zap_cursor_t cursor; 804789Sahrens zap_attribute_t attr; 805789Sahrens 806789Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 807789Sahrens return (ENOENT); 808789Sahrens 809789Sahrens zap_cursor_init_serialized(&cursor, 810789Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 811789Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 812789Sahrens 813885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 814885Sahrens zap_cursor_fini(&cursor); 815885Sahrens return (ENOENT); 816885Sahrens } 817885Sahrens 818885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 819885Sahrens zap_cursor_fini(&cursor); 820885Sahrens return (ENAMETOOLONG); 821885Sahrens } 822885Sahrens 823885Sahrens (void) strcpy(name, attr.za_name); 824885Sahrens if (idp) 825885Sahrens *idp = attr.za_first_integer; 826885Sahrens zap_cursor_advance(&cursor); 827885Sahrens *offp = zap_cursor_serialize(&cursor); 828885Sahrens zap_cursor_fini(&cursor); 829885Sahrens 830885Sahrens return (0); 831885Sahrens } 832885Sahrens 833885Sahrens int 834885Sahrens dmu_dir_list_next(objset_t *os, int namelen, char *name, 835885Sahrens uint64_t *idp, uint64_t *offp) 836885Sahrens { 837885Sahrens dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 838885Sahrens zap_cursor_t cursor; 839885Sahrens zap_attribute_t attr; 840885Sahrens 841885Sahrens /* there is no next dir on a snapshot! */ 842885Sahrens if (os->os->os_dsl_dataset->ds_object != 843885Sahrens dd->dd_phys->dd_head_dataset_obj) 844885Sahrens return (ENOENT); 845885Sahrens 846885Sahrens zap_cursor_init_serialized(&cursor, 847885Sahrens dd->dd_pool->dp_meta_objset, 848885Sahrens dd->dd_phys->dd_child_dir_zapobj, *offp); 849885Sahrens 850885Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) { 851885Sahrens zap_cursor_fini(&cursor); 852885Sahrens return (ENOENT); 853885Sahrens } 854885Sahrens 855885Sahrens if (strlen(attr.za_name) + 1 > namelen) { 856885Sahrens zap_cursor_fini(&cursor); 857789Sahrens return (ENAMETOOLONG); 858885Sahrens } 859789Sahrens 860789Sahrens (void) strcpy(name, attr.za_name); 861885Sahrens if (idp) 862885Sahrens *idp = attr.za_first_integer; 863789Sahrens zap_cursor_advance(&cursor); 864789Sahrens *offp = zap_cursor_serialize(&cursor); 865885Sahrens zap_cursor_fini(&cursor); 866789Sahrens 867789Sahrens return (0); 868789Sahrens } 869789Sahrens 870789Sahrens /* 871789Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 872789Sahrens */ 873*2199Sahrens int 874*2199Sahrens dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 875789Sahrens { 876789Sahrens dsl_dir_t *dd; 877789Sahrens objset_t *os; 878789Sahrens uint64_t snapobj; 879789Sahrens zap_cursor_t zc; 880789Sahrens zap_attribute_t attr; 881789Sahrens char *child; 8821544Seschrock int do_self, err; 883789Sahrens 8841544Seschrock err = dsl_dir_open(name, FTAG, &dd, NULL); 8851544Seschrock if (err) 886*2199Sahrens return (err); 887789Sahrens 888*2199Sahrens /* NB: the $MOS dir doesn't have a head dataset */ 889789Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 890789Sahrens 891789Sahrens /* 892789Sahrens * Iterate over all children. 893789Sahrens */ 894*2199Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 895*2199Sahrens dd->dd_phys->dd_child_dir_zapobj); 896*2199Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 897*2199Sahrens (void) zap_cursor_advance(&zc)) { 898*2199Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 899*2199Sahrens ASSERT(attr.za_num_integers == 1); 900789Sahrens 901*2199Sahrens /* 902*2199Sahrens * No separating '/' because parent's name ends in /. 903*2199Sahrens */ 904*2199Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 905*2199Sahrens /* XXX could probably just use name here */ 906*2199Sahrens dsl_dir_name(dd, child); 907*2199Sahrens (void) strcat(child, "/"); 908*2199Sahrens (void) strcat(child, attr.za_name); 909*2199Sahrens err = dmu_objset_find(child, func, arg, flags); 910*2199Sahrens kmem_free(child, MAXPATHLEN); 911*2199Sahrens if (err) 912*2199Sahrens break; 913*2199Sahrens } 914*2199Sahrens zap_cursor_fini(&zc); 915*2199Sahrens 916*2199Sahrens if (err) { 917*2199Sahrens dsl_dir_close(dd, FTAG); 918*2199Sahrens return (err); 919789Sahrens } 920789Sahrens 921789Sahrens /* 922789Sahrens * Iterate over all snapshots. 923789Sahrens */ 924789Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 925789Sahrens dmu_objset_open(name, DMU_OST_ANY, 926789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 927789Sahrens 928789Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 929789Sahrens dmu_objset_close(os); 930789Sahrens 931789Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 932789Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 933789Sahrens (void) zap_cursor_advance(&zc)) { 934789Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 935789Sahrens ASSERT(attr.za_num_integers == 1); 936789Sahrens 937789Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 938789Sahrens /* XXX could probably just use name here */ 939789Sahrens dsl_dir_name(dd, child); 940789Sahrens (void) strcat(child, "@"); 941789Sahrens (void) strcat(child, attr.za_name); 942*2199Sahrens err = func(child, arg); 943789Sahrens kmem_free(child, MAXPATHLEN); 944*2199Sahrens if (err) 945*2199Sahrens break; 946789Sahrens } 947885Sahrens zap_cursor_fini(&zc); 948789Sahrens } 949789Sahrens 950789Sahrens dsl_dir_close(dd, FTAG); 951789Sahrens 952*2199Sahrens if (err) 953*2199Sahrens return (err); 954*2199Sahrens 955789Sahrens /* 956789Sahrens * Apply to self if appropriate. 957789Sahrens */ 958789Sahrens if (do_self) 959*2199Sahrens err = func(name, arg); 960*2199Sahrens return (err); 961789Sahrens } 962