12743Sahrens /* 22743Sahrens * CDDL HEADER START 32743Sahrens * 42743Sahrens * The contents of this file are subject to the terms of the 52743Sahrens * Common Development and Distribution License (the "License"). 62743Sahrens * You may not use this file except in compliance with the License. 72743Sahrens * 82743Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 92743Sahrens * or http://www.opensolaris.org/os/licensing. 102743Sahrens * See the License for the specific language governing permissions 112743Sahrens * and limitations under the License. 122743Sahrens * 132743Sahrens * When distributing Covered Code, include this CDDL HEADER in each 142743Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 152743Sahrens * If applicable, add the following below this CDDL HEADER, with the 162743Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 172743Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 182743Sahrens * 192743Sahrens * CDDL HEADER END 202743Sahrens */ 212743Sahrens /* 228644SMark.Maybee@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 232743Sahrens * Use is subject to license terms. 242743Sahrens */ 252743Sahrens 262743Sahrens #include <sys/dmu.h> 272743Sahrens #include <sys/dmu_impl.h> 282743Sahrens #include <sys/dmu_tx.h> 292743Sahrens #include <sys/dbuf.h> 302743Sahrens #include <sys/dnode.h> 312743Sahrens #include <sys/zfs_context.h> 322743Sahrens #include <sys/dmu_objset.h> 332743Sahrens #include <sys/dmu_traverse.h> 342743Sahrens #include <sys/dsl_dataset.h> 352743Sahrens #include <sys/dsl_dir.h> 36*11022STom.Erickson@Sun.COM #include <sys/dsl_prop.h> 372743Sahrens #include <sys/dsl_pool.h> 382743Sahrens #include <sys/dsl_synctask.h> 392743Sahrens #include <sys/zfs_ioctl.h> 402743Sahrens #include <sys/zap.h> 412743Sahrens #include <sys/zio_checksum.h> 4211007SLori.Alt@Sun.COM #include <sys/avl.h> 432743Sahrens 445367Sahrens static char *dmu_recv_tag = "dmu_recv_tag"; 455367Sahrens 4611007SLori.Alt@Sun.COM /* 4711007SLori.Alt@Sun.COM * The list of data whose inclusion in a send stream can be pending from 4811007SLori.Alt@Sun.COM * one call to backup_cb to another. Multiple calls to dump_free() and 4911007SLori.Alt@Sun.COM * dump_freeobjects() can be aggregated into a single DRR_FREE or 5011007SLori.Alt@Sun.COM * DRR_FREEOBJECTS replay record. 5111007SLori.Alt@Sun.COM */ 5211007SLori.Alt@Sun.COM typedef enum { 5311007SLori.Alt@Sun.COM PENDING_NONE, 5411007SLori.Alt@Sun.COM PENDING_FREE, 5511007SLori.Alt@Sun.COM PENDING_FREEOBJECTS 5611007SLori.Alt@Sun.COM } pendop_t; 5711007SLori.Alt@Sun.COM 582743Sahrens struct backuparg { 592743Sahrens dmu_replay_record_t *drr; 602743Sahrens vnode_t *vp; 615367Sahrens offset_t *off; 622743Sahrens objset_t *os; 632743Sahrens zio_cksum_t zc; 6411007SLori.Alt@Sun.COM uint64_t toguid; 652743Sahrens int err; 6611007SLori.Alt@Sun.COM pendop_t pending_op; 672743Sahrens }; 682743Sahrens 692743Sahrens static int 702743Sahrens dump_bytes(struct backuparg *ba, void *buf, int len) 712743Sahrens { 722743Sahrens ssize_t resid; /* have to get resid to get detailed errno */ 732743Sahrens ASSERT3U(len % 8, ==, 0); 742743Sahrens 752743Sahrens fletcher_4_incremental_native(buf, len, &ba->zc); 762743Sahrens ba->err = vn_rdwr(UIO_WRITE, ba->vp, 772743Sahrens (caddr_t)buf, len, 782743Sahrens 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); 795367Sahrens *ba->off += len; 802743Sahrens return (ba->err); 812743Sahrens } 822743Sahrens 832743Sahrens static int 842743Sahrens dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 852743Sahrens uint64_t length) 862743Sahrens { 8711007SLori.Alt@Sun.COM struct drr_free *drrf = &(ba->drr->drr_u.drr_free); 8811007SLori.Alt@Sun.COM 8911007SLori.Alt@Sun.COM /* 9011007SLori.Alt@Sun.COM * If there is a pending op, but it's not PENDING_FREE, push it out, 9111007SLori.Alt@Sun.COM * since free block aggregation can only be done for blocks of the 9211007SLori.Alt@Sun.COM * same type (i.e., DRR_FREE records can only be aggregated with 9311007SLori.Alt@Sun.COM * other DRR_FREE records. DRR_FREEOBJECTS records can only be 9411007SLori.Alt@Sun.COM * aggregated with other DRR_FREEOBJECTS records. 9511007SLori.Alt@Sun.COM */ 9611007SLori.Alt@Sun.COM if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { 9711007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 9811007SLori.Alt@Sun.COM return (EINTR); 9911007SLori.Alt@Sun.COM ba->pending_op = PENDING_NONE; 10011007SLori.Alt@Sun.COM } 10111007SLori.Alt@Sun.COM 10211007SLori.Alt@Sun.COM if (ba->pending_op == PENDING_FREE) { 10311007SLori.Alt@Sun.COM /* 10411007SLori.Alt@Sun.COM * There should never be a PENDING_FREE if length is -1 10511007SLori.Alt@Sun.COM * (because dump_dnode is the only place where this 10611007SLori.Alt@Sun.COM * function is called with a -1, and only after flushing 10711007SLori.Alt@Sun.COM * any pending record). 10811007SLori.Alt@Sun.COM */ 10911007SLori.Alt@Sun.COM ASSERT(length != -1ULL); 11011007SLori.Alt@Sun.COM /* 11111007SLori.Alt@Sun.COM * Check to see whether this free block can be aggregated 11211007SLori.Alt@Sun.COM * with pending one. 11311007SLori.Alt@Sun.COM */ 11411007SLori.Alt@Sun.COM if (drrf->drr_object == object && drrf->drr_offset + 11511007SLori.Alt@Sun.COM drrf->drr_length == offset) { 11611007SLori.Alt@Sun.COM drrf->drr_length += length; 11711007SLori.Alt@Sun.COM return (0); 11811007SLori.Alt@Sun.COM } else { 11911007SLori.Alt@Sun.COM /* not a continuation. Push out pending record */ 12011007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, 12111007SLori.Alt@Sun.COM sizeof (dmu_replay_record_t)) != 0) 12211007SLori.Alt@Sun.COM return (EINTR); 12311007SLori.Alt@Sun.COM ba->pending_op = PENDING_NONE; 12411007SLori.Alt@Sun.COM } 12511007SLori.Alt@Sun.COM } 12611007SLori.Alt@Sun.COM /* create a FREE record and make it pending */ 1272743Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 1282743Sahrens ba->drr->drr_type = DRR_FREE; 12911007SLori.Alt@Sun.COM drrf->drr_object = object; 13011007SLori.Alt@Sun.COM drrf->drr_offset = offset; 13111007SLori.Alt@Sun.COM drrf->drr_length = length; 13211007SLori.Alt@Sun.COM drrf->drr_toguid = ba->toguid; 13311007SLori.Alt@Sun.COM if (length == -1ULL) { 13411007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 13511007SLori.Alt@Sun.COM return (EINTR); 13611007SLori.Alt@Sun.COM } else { 13711007SLori.Alt@Sun.COM ba->pending_op = PENDING_FREE; 13811007SLori.Alt@Sun.COM } 1392743Sahrens 1402743Sahrens return (0); 1412743Sahrens } 1422743Sahrens 1432743Sahrens static int 1442743Sahrens dump_data(struct backuparg *ba, dmu_object_type_t type, 1452743Sahrens uint64_t object, uint64_t offset, int blksz, void *data) 1462743Sahrens { 14711007SLori.Alt@Sun.COM struct drr_write *drrw = &(ba->drr->drr_u.drr_write); 14811007SLori.Alt@Sun.COM 14911007SLori.Alt@Sun.COM /* 15011007SLori.Alt@Sun.COM * If there is any kind of pending aggregation (currently either 15111007SLori.Alt@Sun.COM * a grouping of free objects or free blocks), push it out to 15211007SLori.Alt@Sun.COM * the stream, since aggregation can't be done across operations 15311007SLori.Alt@Sun.COM * of different types. 15411007SLori.Alt@Sun.COM */ 15511007SLori.Alt@Sun.COM if (ba->pending_op != PENDING_NONE) { 15611007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 15711007SLori.Alt@Sun.COM return (EINTR); 15811007SLori.Alt@Sun.COM ba->pending_op = PENDING_NONE; 15911007SLori.Alt@Sun.COM } 1602743Sahrens /* write a DATA record */ 1612743Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 1622743Sahrens ba->drr->drr_type = DRR_WRITE; 16311007SLori.Alt@Sun.COM drrw->drr_object = object; 16411007SLori.Alt@Sun.COM drrw->drr_type = type; 16511007SLori.Alt@Sun.COM drrw->drr_offset = offset; 16611007SLori.Alt@Sun.COM drrw->drr_length = blksz; 16711007SLori.Alt@Sun.COM drrw->drr_toguid = ba->toguid; 1682743Sahrens 16911007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 1702743Sahrens return (EINTR); 17111007SLori.Alt@Sun.COM if (dump_bytes(ba, data, blksz) != 0) 1722743Sahrens return (EINTR); 1732743Sahrens return (0); 1742743Sahrens } 1752743Sahrens 1762743Sahrens static int 1772743Sahrens dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 1782743Sahrens { 17911007SLori.Alt@Sun.COM struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); 18011007SLori.Alt@Sun.COM 18111007SLori.Alt@Sun.COM /* 18211007SLori.Alt@Sun.COM * If there is a pending op, but it's not PENDING_FREEOBJECTS, 18311007SLori.Alt@Sun.COM * push it out, since free block aggregation can only be done for 18411007SLori.Alt@Sun.COM * blocks of the same type (i.e., DRR_FREE records can only be 18511007SLori.Alt@Sun.COM * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 18611007SLori.Alt@Sun.COM * can only be aggregated with other DRR_FREEOBJECTS records. 18711007SLori.Alt@Sun.COM */ 18811007SLori.Alt@Sun.COM if (ba->pending_op != PENDING_NONE && 18911007SLori.Alt@Sun.COM ba->pending_op != PENDING_FREEOBJECTS) { 19011007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 19111007SLori.Alt@Sun.COM return (EINTR); 19211007SLori.Alt@Sun.COM ba->pending_op = PENDING_NONE; 19311007SLori.Alt@Sun.COM } 19411007SLori.Alt@Sun.COM if (ba->pending_op == PENDING_FREEOBJECTS) { 19511007SLori.Alt@Sun.COM /* 19611007SLori.Alt@Sun.COM * See whether this free object array can be aggregated 19711007SLori.Alt@Sun.COM * with pending one 19811007SLori.Alt@Sun.COM */ 19911007SLori.Alt@Sun.COM if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 20011007SLori.Alt@Sun.COM drrfo->drr_numobjs += numobjs; 20111007SLori.Alt@Sun.COM return (0); 20211007SLori.Alt@Sun.COM } else { 20311007SLori.Alt@Sun.COM /* can't be aggregated. Push out pending record */ 20411007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, 20511007SLori.Alt@Sun.COM sizeof (dmu_replay_record_t)) != 0) 20611007SLori.Alt@Sun.COM return (EINTR); 20711007SLori.Alt@Sun.COM ba->pending_op = PENDING_NONE; 20811007SLori.Alt@Sun.COM } 20911007SLori.Alt@Sun.COM } 21011007SLori.Alt@Sun.COM 2112743Sahrens /* write a FREEOBJECTS record */ 2122743Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 2132743Sahrens ba->drr->drr_type = DRR_FREEOBJECTS; 21411007SLori.Alt@Sun.COM drrfo->drr_firstobj = firstobj; 21511007SLori.Alt@Sun.COM drrfo->drr_numobjs = numobjs; 21611007SLori.Alt@Sun.COM drrfo->drr_toguid = ba->toguid; 2172743Sahrens 21811007SLori.Alt@Sun.COM ba->pending_op = PENDING_FREEOBJECTS; 21911007SLori.Alt@Sun.COM 2202743Sahrens return (0); 2212743Sahrens } 2222743Sahrens 2232743Sahrens static int 2242743Sahrens dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 2252743Sahrens { 22611007SLori.Alt@Sun.COM struct drr_object *drro = &(ba->drr->drr_u.drr_object); 22711007SLori.Alt@Sun.COM 2282743Sahrens if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 2292743Sahrens return (dump_freeobjects(ba, object, 1)); 2302743Sahrens 23111007SLori.Alt@Sun.COM if (ba->pending_op != PENDING_NONE) { 23211007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 23311007SLori.Alt@Sun.COM return (EINTR); 23411007SLori.Alt@Sun.COM ba->pending_op = PENDING_NONE; 23511007SLori.Alt@Sun.COM } 23611007SLori.Alt@Sun.COM 2372743Sahrens /* write an OBJECT record */ 2382743Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 2392743Sahrens ba->drr->drr_type = DRR_OBJECT; 24011007SLori.Alt@Sun.COM drro->drr_object = object; 24111007SLori.Alt@Sun.COM drro->drr_type = dnp->dn_type; 24211007SLori.Alt@Sun.COM drro->drr_bonustype = dnp->dn_bonustype; 24311007SLori.Alt@Sun.COM drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 24411007SLori.Alt@Sun.COM drro->drr_bonuslen = dnp->dn_bonuslen; 24511007SLori.Alt@Sun.COM drro->drr_checksumtype = dnp->dn_checksum; 24611007SLori.Alt@Sun.COM drro->drr_compress = dnp->dn_compress; 24711007SLori.Alt@Sun.COM drro->drr_toguid = ba->toguid; 2482743Sahrens 24911007SLori.Alt@Sun.COM if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 2502743Sahrens return (EINTR); 2512743Sahrens 25211007SLori.Alt@Sun.COM if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 2532743Sahrens return (EINTR); 2542743Sahrens 2552743Sahrens /* free anything past the end of the file */ 2562743Sahrens if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 2572743Sahrens (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 2582743Sahrens return (EINTR); 2592743Sahrens if (ba->err) 2602743Sahrens return (EINTR); 2612743Sahrens return (0); 2622743Sahrens } 2632743Sahrens 2642743Sahrens #define BP_SPAN(dnp, level) \ 2652743Sahrens (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 2662743Sahrens (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 2672743Sahrens 26810922SJeff.Bonwick@Sun.COM /* ARGSUSED */ 2692743Sahrens static int 27010922SJeff.Bonwick@Sun.COM backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 27110922SJeff.Bonwick@Sun.COM const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 2722743Sahrens { 2732743Sahrens struct backuparg *ba = arg; 2742743Sahrens dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 2752743Sahrens int err = 0; 2762743Sahrens 2772743Sahrens if (issig(JUSTLOOKING) && issig(FORREAL)) 2782743Sahrens return (EINTR); 2792743Sahrens 28010922SJeff.Bonwick@Sun.COM if (zb->zb_object != DMU_META_DNODE_OBJECT && 28110922SJeff.Bonwick@Sun.COM DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 2829396SMatthew.Ahrens@Sun.COM return (0); 28310922SJeff.Bonwick@Sun.COM } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { 2847837SMatthew.Ahrens@Sun.COM uint64_t span = BP_SPAN(dnp, zb->zb_level); 2857837SMatthew.Ahrens@Sun.COM uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 2862743Sahrens err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 2872743Sahrens } else if (bp == NULL) { 2887837SMatthew.Ahrens@Sun.COM uint64_t span = BP_SPAN(dnp, zb->zb_level); 2897837SMatthew.Ahrens@Sun.COM err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); 2907837SMatthew.Ahrens@Sun.COM } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 2917837SMatthew.Ahrens@Sun.COM return (0); 2927837SMatthew.Ahrens@Sun.COM } else if (type == DMU_OT_DNODE) { 2937837SMatthew.Ahrens@Sun.COM dnode_phys_t *blk; 2942743Sahrens int i; 2952743Sahrens int blksz = BP_GET_LSIZE(bp); 2967837SMatthew.Ahrens@Sun.COM uint32_t aflags = ARC_WAIT; 2977837SMatthew.Ahrens@Sun.COM arc_buf_t *abuf; 2982743Sahrens 2997837SMatthew.Ahrens@Sun.COM if (arc_read_nolock(NULL, spa, bp, 3007837SMatthew.Ahrens@Sun.COM arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, 3017837SMatthew.Ahrens@Sun.COM ZIO_FLAG_CANFAIL, &aflags, zb) != 0) 3027837SMatthew.Ahrens@Sun.COM return (EIO); 3037837SMatthew.Ahrens@Sun.COM 3047837SMatthew.Ahrens@Sun.COM blk = abuf->b_data; 3052743Sahrens for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 3067837SMatthew.Ahrens@Sun.COM uint64_t dnobj = (zb->zb_blkid << 3077837SMatthew.Ahrens@Sun.COM (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 3082743Sahrens err = dump_dnode(ba, dnobj, blk+i); 3092743Sahrens if (err) 3102743Sahrens break; 3112743Sahrens } 3127837SMatthew.Ahrens@Sun.COM (void) arc_buf_remove_ref(abuf, &abuf); 3137837SMatthew.Ahrens@Sun.COM } else { /* it's a level-0 block of a regular object */ 3147837SMatthew.Ahrens@Sun.COM uint32_t aflags = ARC_WAIT; 3157837SMatthew.Ahrens@Sun.COM arc_buf_t *abuf; 3162743Sahrens int blksz = BP_GET_LSIZE(bp); 3172743Sahrens 3187837SMatthew.Ahrens@Sun.COM if (arc_read_nolock(NULL, spa, bp, 3197837SMatthew.Ahrens@Sun.COM arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, 3207837SMatthew.Ahrens@Sun.COM ZIO_FLAG_CANFAIL, &aflags, zb) != 0) 3217837SMatthew.Ahrens@Sun.COM return (EIO); 3222743Sahrens 3237837SMatthew.Ahrens@Sun.COM err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, 3247837SMatthew.Ahrens@Sun.COM blksz, abuf->b_data); 3257837SMatthew.Ahrens@Sun.COM (void) arc_buf_remove_ref(abuf, &abuf); 3262743Sahrens } 3272743Sahrens 3282743Sahrens ASSERT(err == 0 || err == EINTR); 3292743Sahrens return (err); 3302743Sahrens } 3312743Sahrens 3322743Sahrens int 3335367Sahrens dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, 3345367Sahrens vnode_t *vp, offset_t *off) 3352743Sahrens { 33610298SMatthew.Ahrens@Sun.COM dsl_dataset_t *ds = tosnap->os_dsl_dataset; 33710298SMatthew.Ahrens@Sun.COM dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; 3382743Sahrens dmu_replay_record_t *drr; 3392743Sahrens struct backuparg ba; 3402743Sahrens int err; 3415367Sahrens uint64_t fromtxg = 0; 3422743Sahrens 3432743Sahrens /* tosnap must be a snapshot */ 3442743Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) 3452743Sahrens return (EINVAL); 3462743Sahrens 3472743Sahrens /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 3482743Sahrens if (fromds && (ds->ds_dir != fromds->ds_dir || 3495367Sahrens fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) 3502743Sahrens return (EXDEV); 3512743Sahrens 3525367Sahrens if (fromorigin) { 3537046Sahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 3547046Sahrens 3555367Sahrens if (fromsnap) 3565367Sahrens return (EINVAL); 3575367Sahrens 3587046Sahrens if (dsl_dir_is_clone(ds->ds_dir)) { 3595367Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 3606689Smaybee err = dsl_dataset_hold_obj(dp, 3616689Smaybee ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); 3625367Sahrens rw_exit(&dp->dp_config_rwlock); 3635367Sahrens if (err) 3645367Sahrens return (err); 3655367Sahrens } else { 3665367Sahrens fromorigin = B_FALSE; 3675367Sahrens } 3685367Sahrens } 3695367Sahrens 3705367Sahrens 3712743Sahrens drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 3722743Sahrens drr->drr_type = DRR_BEGIN; 3732743Sahrens drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 37411007SLori.Alt@Sun.COM DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 37511007SLori.Alt@Sun.COM DMU_SUBSTREAM); 3762743Sahrens drr->drr_u.drr_begin.drr_creation_time = 3772743Sahrens ds->ds_phys->ds_creation_time; 37810298SMatthew.Ahrens@Sun.COM drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; 3795367Sahrens if (fromorigin) 3805367Sahrens drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 3812743Sahrens drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 3826492Stimh if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 3836492Stimh drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 3846492Stimh 3852743Sahrens if (fromds) 3862743Sahrens drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 3872743Sahrens dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 3882743Sahrens 3895367Sahrens if (fromds) 3905367Sahrens fromtxg = fromds->ds_phys->ds_creation_txg; 3915367Sahrens if (fromorigin) 3926689Smaybee dsl_dataset_rele(fromds, FTAG); 3935367Sahrens 3942743Sahrens ba.drr = drr; 3952743Sahrens ba.vp = vp; 3962743Sahrens ba.os = tosnap; 3975367Sahrens ba.off = off; 39811007SLori.Alt@Sun.COM ba.toguid = ds->ds_phys->ds_guid; 3992743Sahrens ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); 40011007SLori.Alt@Sun.COM ba.pending_op = PENDING_NONE; 4012743Sahrens 40211007SLori.Alt@Sun.COM if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { 4032743Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 4042743Sahrens return (ba.err); 4052743Sahrens } 4062743Sahrens 4077837SMatthew.Ahrens@Sun.COM err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 4082743Sahrens backup_cb, &ba); 4092743Sahrens 41011007SLori.Alt@Sun.COM if (ba.pending_op != PENDING_NONE) 41111007SLori.Alt@Sun.COM if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) 41211007SLori.Alt@Sun.COM err = EINTR; 41311007SLori.Alt@Sun.COM 4142743Sahrens if (err) { 4152743Sahrens if (err == EINTR && ba.err) 4162743Sahrens err = ba.err; 4173655Sgw25295 kmem_free(drr, sizeof (dmu_replay_record_t)); 4182743Sahrens return (err); 4192743Sahrens } 4202743Sahrens 4212743Sahrens bzero(drr, sizeof (dmu_replay_record_t)); 4222743Sahrens drr->drr_type = DRR_END; 4232743Sahrens drr->drr_u.drr_end.drr_checksum = ba.zc; 42411007SLori.Alt@Sun.COM drr->drr_u.drr_end.drr_toguid = ba.toguid; 4252743Sahrens 42611007SLori.Alt@Sun.COM if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { 4273655Sgw25295 kmem_free(drr, sizeof (dmu_replay_record_t)); 4282743Sahrens return (ba.err); 4293655Sgw25295 } 4302743Sahrens 4312743Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 4322743Sahrens 4332743Sahrens return (0); 4342743Sahrens } 4352743Sahrens 4365367Sahrens struct recvbeginsyncarg { 4375367Sahrens const char *tofs; 4385367Sahrens const char *tosnap; 4395367Sahrens dsl_dataset_t *origin; 4405367Sahrens uint64_t fromguid; 4415367Sahrens dmu_objset_type_t type; 4425367Sahrens void *tag; 4435367Sahrens boolean_t force; 4446492Stimh uint64_t dsflags; 4455367Sahrens char clonelastname[MAXNAMELEN]; 4465367Sahrens dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ 4472743Sahrens }; 4482743Sahrens 4495367Sahrens /* ARGSUSED */ 4502743Sahrens static int 45110272SMatthew.Ahrens@Sun.COM recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) 4525367Sahrens { 4535367Sahrens dsl_dir_t *dd = arg1; 4545367Sahrens struct recvbeginsyncarg *rbsa = arg2; 4555367Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 4565367Sahrens uint64_t val; 4575367Sahrens int err; 4585367Sahrens 4595367Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 4605367Sahrens strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); 4615367Sahrens 4625367Sahrens if (err != ENOENT) 4635367Sahrens return (err ? err : EEXIST); 4645367Sahrens 4655367Sahrens if (rbsa->origin) { 4665367Sahrens /* make sure it's a snap in the same pool */ 4675367Sahrens if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) 4685367Sahrens return (EXDEV); 46910272SMatthew.Ahrens@Sun.COM if (!dsl_dataset_is_snapshot(rbsa->origin)) 4705367Sahrens return (EINVAL); 4715367Sahrens if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) 4725367Sahrens return (ENODEV); 4735367Sahrens } 4745367Sahrens 4755367Sahrens return (0); 4765367Sahrens } 4775367Sahrens 4785367Sahrens static void 47910272SMatthew.Ahrens@Sun.COM recv_new_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 4805367Sahrens { 4815367Sahrens dsl_dir_t *dd = arg1; 4825367Sahrens struct recvbeginsyncarg *rbsa = arg2; 4836689Smaybee uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 4845367Sahrens uint64_t dsobj; 4855367Sahrens 48610272SMatthew.Ahrens@Sun.COM /* Create and open new dataset. */ 4875367Sahrens dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, 4886492Stimh rbsa->origin, flags, cr, tx); 48910272SMatthew.Ahrens@Sun.COM VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, 49010298SMatthew.Ahrens@Sun.COM B_TRUE, dmu_recv_tag, &rbsa->ds)); 4915367Sahrens 49210272SMatthew.Ahrens@Sun.COM if (rbsa->origin == NULL) { 49310272SMatthew.Ahrens@Sun.COM (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, 49410272SMatthew.Ahrens@Sun.COM rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); 4955367Sahrens } 4965367Sahrens 49710272SMatthew.Ahrens@Sun.COM spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC, 49810272SMatthew.Ahrens@Sun.COM dd->dd_pool->dp_spa, tx, cr, "dataset = %lld", dsobj); 4995367Sahrens } 5005367Sahrens 5015367Sahrens /* ARGSUSED */ 5025367Sahrens static int 50310272SMatthew.Ahrens@Sun.COM recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) 5045367Sahrens { 5055367Sahrens dsl_dataset_t *ds = arg1; 5065367Sahrens struct recvbeginsyncarg *rbsa = arg2; 5072743Sahrens int err; 5082743Sahrens uint64_t val; 5092743Sahrens 5105367Sahrens /* must not have any changes since most recent snapshot */ 5115367Sahrens if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) 5125367Sahrens return (ETXTBSY); 5135367Sahrens 51410272SMatthew.Ahrens@Sun.COM if (rbsa->fromguid) { 51510272SMatthew.Ahrens@Sun.COM /* if incremental, most recent snapshot must match fromguid */ 51610272SMatthew.Ahrens@Sun.COM if (ds->ds_prev == NULL) 51710272SMatthew.Ahrens@Sun.COM return (ENODEV); 518*11022STom.Erickson@Sun.COM 519*11022STom.Erickson@Sun.COM /* 520*11022STom.Erickson@Sun.COM * most recent snapshot must match fromguid, or there are no 521*11022STom.Erickson@Sun.COM * changes since the fromguid one 522*11022STom.Erickson@Sun.COM */ 523*11022STom.Erickson@Sun.COM if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { 524*11022STom.Erickson@Sun.COM uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; 525*11022STom.Erickson@Sun.COM uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; 526*11022STom.Erickson@Sun.COM while (obj != 0) { 527*11022STom.Erickson@Sun.COM dsl_dataset_t *snap; 528*11022STom.Erickson@Sun.COM err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 529*11022STom.Erickson@Sun.COM obj, FTAG, &snap); 530*11022STom.Erickson@Sun.COM if (err) 531*11022STom.Erickson@Sun.COM return (ENODEV); 532*11022STom.Erickson@Sun.COM if (snap->ds_phys->ds_creation_txg < birth) { 533*11022STom.Erickson@Sun.COM dsl_dataset_rele(snap, FTAG); 534*11022STom.Erickson@Sun.COM return (ENODEV); 535*11022STom.Erickson@Sun.COM } 536*11022STom.Erickson@Sun.COM if (snap->ds_phys->ds_guid == rbsa->fromguid) { 537*11022STom.Erickson@Sun.COM dsl_dataset_rele(snap, FTAG); 538*11022STom.Erickson@Sun.COM break; /* it's ok */ 539*11022STom.Erickson@Sun.COM } 540*11022STom.Erickson@Sun.COM obj = snap->ds_phys->ds_prev_snap_obj; 541*11022STom.Erickson@Sun.COM dsl_dataset_rele(snap, FTAG); 542*11022STom.Erickson@Sun.COM } 543*11022STom.Erickson@Sun.COM if (obj == 0) 544*11022STom.Erickson@Sun.COM return (ENODEV); 545*11022STom.Erickson@Sun.COM } 54610272SMatthew.Ahrens@Sun.COM } else { 54710272SMatthew.Ahrens@Sun.COM /* if full, most recent snapshot must be $ORIGIN */ 54810272SMatthew.Ahrens@Sun.COM if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 54910272SMatthew.Ahrens@Sun.COM return (ENODEV); 55010272SMatthew.Ahrens@Sun.COM } 5512743Sahrens 5526083Sek110237 /* temporary clone name must not exist */ 5536083Sek110237 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 5546083Sek110237 ds->ds_dir->dd_phys->dd_child_dir_zapobj, 5556083Sek110237 rbsa->clonelastname, 8, 1, &val); 5566083Sek110237 if (err == 0) 5576083Sek110237 return (EEXIST); 5586083Sek110237 if (err != ENOENT) 5596083Sek110237 return (err); 5606083Sek110237 5612743Sahrens /* new snapshot name must not exist */ 5625367Sahrens err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 5635367Sahrens ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); 5645367Sahrens if (err == 0) 5652743Sahrens return (EEXIST); 5662743Sahrens if (err != ENOENT) 5675367Sahrens return (err); 5682743Sahrens return (0); 5692743Sahrens } 5702743Sahrens 5712743Sahrens /* ARGSUSED */ 5725367Sahrens static void 57310272SMatthew.Ahrens@Sun.COM recv_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 5745326Sek110237 { 5755367Sahrens dsl_dataset_t *ohds = arg1; 5765367Sahrens struct recvbeginsyncarg *rbsa = arg2; 5775367Sahrens dsl_pool_t *dp = ohds->ds_dir->dd_pool; 57810272SMatthew.Ahrens@Sun.COM dsl_dataset_t *cds; 5796689Smaybee uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 5805367Sahrens uint64_t dsobj; 5815326Sek110237 58210272SMatthew.Ahrens@Sun.COM /* create and open the temporary clone */ 58310272SMatthew.Ahrens@Sun.COM dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, 58410272SMatthew.Ahrens@Sun.COM ohds->ds_prev, flags, cr, tx); 58510298SMatthew.Ahrens@Sun.COM VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); 5865367Sahrens 58710272SMatthew.Ahrens@Sun.COM /* 58810272SMatthew.Ahrens@Sun.COM * If we actually created a non-clone, we need to create the 58910272SMatthew.Ahrens@Sun.COM * objset in our new dataset. 59010272SMatthew.Ahrens@Sun.COM */ 59110272SMatthew.Ahrens@Sun.COM if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { 59210272SMatthew.Ahrens@Sun.COM (void) dmu_objset_create_impl(dp->dp_spa, 59310272SMatthew.Ahrens@Sun.COM cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); 59410272SMatthew.Ahrens@Sun.COM } 59510272SMatthew.Ahrens@Sun.COM 5965367Sahrens rbsa->ds = cds; 5975367Sahrens 5985367Sahrens spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC, 5996689Smaybee dp->dp_spa, tx, cr, "dataset = %lld", dsobj); 6005326Sek110237 } 6015326Sek110237 6025367Sahrens /* 6035367Sahrens * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 6045367Sahrens * succeeds; otherwise we will leak the holds on the datasets. 6055367Sahrens */ 6065367Sahrens int 60711007SLori.Alt@Sun.COM dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, 60810204SMatthew.Ahrens@Sun.COM boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) 6092743Sahrens { 6105367Sahrens int err = 0; 6115367Sahrens boolean_t byteswap; 61210272SMatthew.Ahrens@Sun.COM struct recvbeginsyncarg rbsa = { 0 }; 61311007SLori.Alt@Sun.COM uint64_t versioninfo; 6145367Sahrens int flags; 6155367Sahrens dsl_dataset_t *ds; 6165367Sahrens 6175367Sahrens if (drrb->drr_magic == DMU_BACKUP_MAGIC) 6185367Sahrens byteswap = FALSE; 6195367Sahrens else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 6205367Sahrens byteswap = TRUE; 6215367Sahrens else 6225367Sahrens return (EINVAL); 6235367Sahrens 6245367Sahrens rbsa.tofs = tofs; 6255367Sahrens rbsa.tosnap = tosnap; 62610298SMatthew.Ahrens@Sun.COM rbsa.origin = origin ? origin->os_dsl_dataset : NULL; 6275367Sahrens rbsa.fromguid = drrb->drr_fromguid; 6285367Sahrens rbsa.type = drrb->drr_type; 6295367Sahrens rbsa.tag = FTAG; 6306492Stimh rbsa.dsflags = 0; 63111007SLori.Alt@Sun.COM versioninfo = drrb->drr_versioninfo; 6325367Sahrens flags = drrb->drr_flags; 6335367Sahrens 6345367Sahrens if (byteswap) { 6355367Sahrens rbsa.type = BSWAP_32(rbsa.type); 6365367Sahrens rbsa.fromguid = BSWAP_64(rbsa.fromguid); 63711007SLori.Alt@Sun.COM versioninfo = BSWAP_64(versioninfo); 6385367Sahrens flags = BSWAP_32(flags); 6395367Sahrens } 6405367Sahrens 64111007SLori.Alt@Sun.COM if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || 6425367Sahrens rbsa.type >= DMU_OST_NUMTYPES || 6435367Sahrens ((flags & DRR_FLAG_CLONE) && origin == NULL)) 6445367Sahrens return (EINVAL); 6455367Sahrens 6466492Stimh if (flags & DRR_FLAG_CI_DATA) 6476492Stimh rbsa.dsflags = DS_FLAG_CI_DATASET; 6486492Stimh 6495367Sahrens bzero(drc, sizeof (dmu_recv_cookie_t)); 6505367Sahrens drc->drc_drrb = drrb; 6515367Sahrens drc->drc_tosnap = tosnap; 65211007SLori.Alt@Sun.COM drc->drc_top_ds = top_ds; 6535367Sahrens drc->drc_force = force; 6545367Sahrens 6555367Sahrens /* 6565367Sahrens * Process the begin in syncing context. 6575367Sahrens */ 65810272SMatthew.Ahrens@Sun.COM 65910272SMatthew.Ahrens@Sun.COM /* open the dataset we are logically receiving into */ 66010272SMatthew.Ahrens@Sun.COM err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); 66110272SMatthew.Ahrens@Sun.COM if (err == 0) { 66210272SMatthew.Ahrens@Sun.COM /* target fs already exists; recv into temp clone */ 6635367Sahrens 66410272SMatthew.Ahrens@Sun.COM /* Can't recv a clone into an existing fs */ 66510272SMatthew.Ahrens@Sun.COM if (flags & DRR_FLAG_CLONE) { 66610272SMatthew.Ahrens@Sun.COM dsl_dataset_rele(ds, dmu_recv_tag); 66710272SMatthew.Ahrens@Sun.COM return (EINVAL); 66810272SMatthew.Ahrens@Sun.COM } 6692743Sahrens 67010204SMatthew.Ahrens@Sun.COM /* must not have an incremental recv already in progress */ 67110204SMatthew.Ahrens@Sun.COM if (!mutex_tryenter(&ds->ds_recvlock)) { 67210204SMatthew.Ahrens@Sun.COM dsl_dataset_rele(ds, dmu_recv_tag); 67310204SMatthew.Ahrens@Sun.COM return (EBUSY); 67410204SMatthew.Ahrens@Sun.COM } 67510204SMatthew.Ahrens@Sun.COM 67610272SMatthew.Ahrens@Sun.COM /* tmp clone name is: tofs/%tosnap" */ 67710272SMatthew.Ahrens@Sun.COM (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), 67810272SMatthew.Ahrens@Sun.COM "%%%s", tosnap); 6795367Sahrens rbsa.force = force; 6805367Sahrens err = dsl_sync_task_do(ds->ds_dir->dd_pool, 68110272SMatthew.Ahrens@Sun.COM recv_existing_check, recv_existing_sync, ds, &rbsa, 5); 6825367Sahrens if (err) { 68310204SMatthew.Ahrens@Sun.COM mutex_exit(&ds->ds_recvlock); 6846689Smaybee dsl_dataset_rele(ds, dmu_recv_tag); 6855367Sahrens return (err); 6865367Sahrens } 6875367Sahrens drc->drc_logical_ds = ds; 6885367Sahrens drc->drc_real_ds = rbsa.ds; 68910272SMatthew.Ahrens@Sun.COM } else if (err == ENOENT) { 69010272SMatthew.Ahrens@Sun.COM /* target fs does not exist; must be a full backup or clone */ 69110272SMatthew.Ahrens@Sun.COM char *cp; 6925367Sahrens 69310272SMatthew.Ahrens@Sun.COM /* 69410272SMatthew.Ahrens@Sun.COM * If it's a non-clone incremental, we are missing the 69510272SMatthew.Ahrens@Sun.COM * target fs, so fail the recv. 69610272SMatthew.Ahrens@Sun.COM */ 69710272SMatthew.Ahrens@Sun.COM if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) 69810272SMatthew.Ahrens@Sun.COM return (ENOENT); 69910272SMatthew.Ahrens@Sun.COM 70010272SMatthew.Ahrens@Sun.COM /* Open the parent of tofs */ 70110272SMatthew.Ahrens@Sun.COM cp = strrchr(tofs, '/'); 70210272SMatthew.Ahrens@Sun.COM *cp = '\0'; 70310819SChris.Kirby@sun.com err = dsl_dataset_hold(tofs, FTAG, &ds); 70410272SMatthew.Ahrens@Sun.COM *cp = '/'; 7055367Sahrens if (err) 7065367Sahrens return (err); 7075367Sahrens 70810272SMatthew.Ahrens@Sun.COM err = dsl_sync_task_do(ds->ds_dir->dd_pool, 70910272SMatthew.Ahrens@Sun.COM recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); 71010819SChris.Kirby@sun.com dsl_dataset_rele(ds, FTAG); 7115367Sahrens if (err) 7125367Sahrens return (err); 7135367Sahrens drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; 7145367Sahrens drc->drc_newfs = B_TRUE; 7155367Sahrens } 7165367Sahrens 71710272SMatthew.Ahrens@Sun.COM return (err); 7182743Sahrens } 7192743Sahrens 7205367Sahrens struct restorearg { 7215367Sahrens int err; 7225367Sahrens int byteswap; 7235367Sahrens vnode_t *vp; 7245367Sahrens char *buf; 7255367Sahrens uint64_t voff; 7265367Sahrens int bufsize; /* amount of memory allocated for buf */ 7275367Sahrens zio_cksum_t cksum; 72811007SLori.Alt@Sun.COM avl_tree_t guid_to_ds_map; 7295326Sek110237 }; 7305326Sek110237 73111007SLori.Alt@Sun.COM typedef struct guid_map_entry { 73211007SLori.Alt@Sun.COM uint64_t guid; 73311007SLori.Alt@Sun.COM dsl_dataset_t *gme_ds; 73411007SLori.Alt@Sun.COM avl_node_t avlnode; 73511007SLori.Alt@Sun.COM } guid_map_entry_t; 73611007SLori.Alt@Sun.COM 73711007SLori.Alt@Sun.COM static int 73811007SLori.Alt@Sun.COM guid_compare(const void *arg1, const void *arg2) 73911007SLori.Alt@Sun.COM { 74011007SLori.Alt@Sun.COM const guid_map_entry_t *gmep1 = arg1; 74111007SLori.Alt@Sun.COM const guid_map_entry_t *gmep2 = arg2; 74211007SLori.Alt@Sun.COM 74311007SLori.Alt@Sun.COM if (gmep1->guid < gmep2->guid) 74411007SLori.Alt@Sun.COM return (-1); 74511007SLori.Alt@Sun.COM else if (gmep1->guid > gmep2->guid) 74611007SLori.Alt@Sun.COM return (1); 74711007SLori.Alt@Sun.COM return (0); 74811007SLori.Alt@Sun.COM } 74911007SLori.Alt@Sun.COM 75011007SLori.Alt@Sun.COM /* 75111007SLori.Alt@Sun.COM * This function is a callback used by dmu_objset_find() (which 75211007SLori.Alt@Sun.COM * enumerates the object sets) to build an avl tree that maps guids 75311007SLori.Alt@Sun.COM * to datasets. The resulting table is used when processing DRR_WRITE_BYREF 75411007SLori.Alt@Sun.COM * send stream records. These records, which are used in dedup'ed 75511007SLori.Alt@Sun.COM * streams, do not contain data themselves, but refer to a copy 75611007SLori.Alt@Sun.COM * of the data block that has already been written because it was 75711007SLori.Alt@Sun.COM * earlier in the stream. That previous copy is identified by the 75811007SLori.Alt@Sun.COM * guid of the dataset with the referenced data. 75911007SLori.Alt@Sun.COM */ 76011007SLori.Alt@Sun.COM int 76111007SLori.Alt@Sun.COM find_ds_by_guid(char *name, void *arg) 76211007SLori.Alt@Sun.COM { 76311007SLori.Alt@Sun.COM dsl_dataset_t *ds, *snapds; 76411007SLori.Alt@Sun.COM avl_tree_t *guid_map = arg; 76511007SLori.Alt@Sun.COM guid_map_entry_t *gmep; 76611007SLori.Alt@Sun.COM dsl_pool_t *dp; 76711007SLori.Alt@Sun.COM int err; 76811007SLori.Alt@Sun.COM uint64_t lastobj, firstobj; 76911007SLori.Alt@Sun.COM 77011007SLori.Alt@Sun.COM if (dsl_dataset_hold(name, FTAG, &ds) != 0) 77111007SLori.Alt@Sun.COM return (0); 77211007SLori.Alt@Sun.COM 77311007SLori.Alt@Sun.COM dp = ds->ds_dir->dd_pool; 77411007SLori.Alt@Sun.COM rw_enter(&dp->dp_config_rwlock, RW_READER); 77511007SLori.Alt@Sun.COM firstobj = ds->ds_dir->dd_phys->dd_origin_obj; 77611007SLori.Alt@Sun.COM lastobj = ds->ds_phys->ds_prev_snap_obj; 77711007SLori.Alt@Sun.COM 77811007SLori.Alt@Sun.COM while (lastobj != firstobj) { 77911007SLori.Alt@Sun.COM err = dsl_dataset_hold_obj(dp, lastobj, guid_map, &snapds); 78011007SLori.Alt@Sun.COM if (err) { 78111007SLori.Alt@Sun.COM /* 78211007SLori.Alt@Sun.COM * Skip this snapshot and move on. It's not 78311007SLori.Alt@Sun.COM * clear why this would ever happen, but the 78411007SLori.Alt@Sun.COM * remainder of the snapshot streadm can be 78511007SLori.Alt@Sun.COM * processed. 78611007SLori.Alt@Sun.COM */ 78711007SLori.Alt@Sun.COM rw_exit(&dp->dp_config_rwlock); 78811007SLori.Alt@Sun.COM dsl_dataset_rele(ds, FTAG); 78911007SLori.Alt@Sun.COM return (0); 79011007SLori.Alt@Sun.COM } 79111007SLori.Alt@Sun.COM 79211007SLori.Alt@Sun.COM gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); 79311007SLori.Alt@Sun.COM gmep->guid = snapds->ds_phys->ds_guid; 79411007SLori.Alt@Sun.COM gmep->gme_ds = snapds; 79511007SLori.Alt@Sun.COM avl_add(guid_map, gmep); 79611007SLori.Alt@Sun.COM lastobj = snapds->ds_phys->ds_prev_snap_obj; 79711007SLori.Alt@Sun.COM } 79811007SLori.Alt@Sun.COM 79911007SLori.Alt@Sun.COM rw_exit(&dp->dp_config_rwlock); 80011007SLori.Alt@Sun.COM dsl_dataset_rele(ds, FTAG); 80111007SLori.Alt@Sun.COM 80211007SLori.Alt@Sun.COM return (0); 80311007SLori.Alt@Sun.COM } 80411007SLori.Alt@Sun.COM 8052743Sahrens static void * 8062743Sahrens restore_read(struct restorearg *ra, int len) 8072743Sahrens { 8082743Sahrens void *rv; 8095367Sahrens int done = 0; 8102743Sahrens 8112743Sahrens /* some things will require 8-byte alignment, so everything must */ 8122743Sahrens ASSERT3U(len % 8, ==, 0); 8132743Sahrens 8145367Sahrens while (done < len) { 8152743Sahrens ssize_t resid; 8162743Sahrens 8172743Sahrens ra->err = vn_rdwr(UIO_READ, ra->vp, 8185367Sahrens (caddr_t)ra->buf + done, len - done, 8192743Sahrens ra->voff, UIO_SYSSPACE, FAPPEND, 8202743Sahrens RLIM64_INFINITY, CRED(), &resid); 8212743Sahrens 8225367Sahrens if (resid == len - done) 8232743Sahrens ra->err = EINVAL; 8245367Sahrens ra->voff += len - done - resid; 8255367Sahrens done = len - resid; 8262743Sahrens if (ra->err) 8272743Sahrens return (NULL); 8282743Sahrens } 8292743Sahrens 8305367Sahrens ASSERT3U(done, ==, len); 8315367Sahrens rv = ra->buf; 8322743Sahrens if (ra->byteswap) 8335367Sahrens fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 8342743Sahrens else 8355367Sahrens fletcher_4_incremental_native(rv, len, &ra->cksum); 8362743Sahrens return (rv); 8372743Sahrens } 8382743Sahrens 8392743Sahrens static void 8402743Sahrens backup_byteswap(dmu_replay_record_t *drr) 8412743Sahrens { 8422743Sahrens #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 8432743Sahrens #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 8442743Sahrens drr->drr_type = BSWAP_32(drr->drr_type); 8455367Sahrens drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 8462743Sahrens switch (drr->drr_type) { 8472743Sahrens case DRR_BEGIN: 8482743Sahrens DO64(drr_begin.drr_magic); 84911007SLori.Alt@Sun.COM DO64(drr_begin.drr_versioninfo); 8502743Sahrens DO64(drr_begin.drr_creation_time); 8512743Sahrens DO32(drr_begin.drr_type); 8525367Sahrens DO32(drr_begin.drr_flags); 8532743Sahrens DO64(drr_begin.drr_toguid); 8542743Sahrens DO64(drr_begin.drr_fromguid); 8552743Sahrens break; 8562743Sahrens case DRR_OBJECT: 8572743Sahrens DO64(drr_object.drr_object); 8582743Sahrens /* DO64(drr_object.drr_allocation_txg); */ 8592743Sahrens DO32(drr_object.drr_type); 8602743Sahrens DO32(drr_object.drr_bonustype); 8612743Sahrens DO32(drr_object.drr_blksz); 8622743Sahrens DO32(drr_object.drr_bonuslen); 86311007SLori.Alt@Sun.COM DO64(drr_object.drr_toguid); 8642743Sahrens break; 8652743Sahrens case DRR_FREEOBJECTS: 8662743Sahrens DO64(drr_freeobjects.drr_firstobj); 8672743Sahrens DO64(drr_freeobjects.drr_numobjs); 86811007SLori.Alt@Sun.COM DO64(drr_freeobjects.drr_toguid); 8692743Sahrens break; 8702743Sahrens case DRR_WRITE: 8712743Sahrens DO64(drr_write.drr_object); 8722743Sahrens DO32(drr_write.drr_type); 8732743Sahrens DO64(drr_write.drr_offset); 8742743Sahrens DO64(drr_write.drr_length); 87511007SLori.Alt@Sun.COM DO64(drr_write.drr_toguid); 87611007SLori.Alt@Sun.COM DO64(drr_write.drr_blkcksum.zc_word[0]); 87711007SLori.Alt@Sun.COM DO64(drr_write.drr_blkcksum.zc_word[1]); 87811007SLori.Alt@Sun.COM DO64(drr_write.drr_blkcksum.zc_word[2]); 87911007SLori.Alt@Sun.COM DO64(drr_write.drr_blkcksum.zc_word[3]); 88011007SLori.Alt@Sun.COM break; 88111007SLori.Alt@Sun.COM case DRR_WRITE_BYREF: 88211007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_object); 88311007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_offset); 88411007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_length); 88511007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_toguid); 88611007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_refguid); 88711007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_refobject); 88811007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_refoffset); 88911007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_blkcksum.zc_word[0]); 89011007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_blkcksum.zc_word[1]); 89111007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_blkcksum.zc_word[2]); 89211007SLori.Alt@Sun.COM DO64(drr_write_byref.drr_blkcksum.zc_word[3]); 8932743Sahrens break; 8942743Sahrens case DRR_FREE: 8952743Sahrens DO64(drr_free.drr_object); 8962743Sahrens DO64(drr_free.drr_offset); 8972743Sahrens DO64(drr_free.drr_length); 89811007SLori.Alt@Sun.COM DO64(drr_free.drr_toguid); 8992743Sahrens break; 9002743Sahrens case DRR_END: 9012743Sahrens DO64(drr_end.drr_checksum.zc_word[0]); 9022743Sahrens DO64(drr_end.drr_checksum.zc_word[1]); 9032743Sahrens DO64(drr_end.drr_checksum.zc_word[2]); 9042743Sahrens DO64(drr_end.drr_checksum.zc_word[3]); 90511007SLori.Alt@Sun.COM DO64(drr_end.drr_toguid); 9062743Sahrens break; 9072743Sahrens } 9082743Sahrens #undef DO64 9092743Sahrens #undef DO32 9102743Sahrens } 9112743Sahrens 9122743Sahrens static int 9132743Sahrens restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 9142743Sahrens { 9152743Sahrens int err; 9162743Sahrens dmu_tx_t *tx; 9177994STim.Haley@Sun.COM void *data = NULL; 9182743Sahrens 9192743Sahrens if (drro->drr_type == DMU_OT_NONE || 9202743Sahrens drro->drr_type >= DMU_OT_NUMTYPES || 9212743Sahrens drro->drr_bonustype >= DMU_OT_NUMTYPES || 92211007SLori.Alt@Sun.COM drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 9232743Sahrens drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 9242743Sahrens P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 9252743Sahrens drro->drr_blksz < SPA_MINBLOCKSIZE || 9262743Sahrens drro->drr_blksz > SPA_MAXBLOCKSIZE || 9272743Sahrens drro->drr_bonuslen > DN_MAX_BONUSLEN) { 9282743Sahrens return (EINVAL); 9292743Sahrens } 9302743Sahrens 9318986SMark.Maybee@Sun.COM err = dmu_object_info(os, drro->drr_object, NULL); 9328986SMark.Maybee@Sun.COM 9338986SMark.Maybee@Sun.COM if (err != 0 && err != ENOENT) 9348986SMark.Maybee@Sun.COM return (EINVAL); 9358986SMark.Maybee@Sun.COM 9367994STim.Haley@Sun.COM if (drro->drr_bonuslen) { 9377994STim.Haley@Sun.COM data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 9387994STim.Haley@Sun.COM if (ra->err) 9397994STim.Haley@Sun.COM return (ra->err); 9407994STim.Haley@Sun.COM } 9417994STim.Haley@Sun.COM 9422743Sahrens if (err == ENOENT) { 9432743Sahrens /* currently free, want to be allocated */ 9448986SMark.Maybee@Sun.COM tx = dmu_tx_create(os); 9452743Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 9462743Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 9472743Sahrens if (err) { 9482743Sahrens dmu_tx_abort(tx); 9492743Sahrens return (err); 9502743Sahrens } 9512743Sahrens err = dmu_object_claim(os, drro->drr_object, 9522743Sahrens drro->drr_type, drro->drr_blksz, 9532743Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 9548986SMark.Maybee@Sun.COM dmu_tx_commit(tx); 9552743Sahrens } else { 9562743Sahrens /* currently allocated, want to be allocated */ 9572743Sahrens err = dmu_object_reclaim(os, drro->drr_object, 9582743Sahrens drro->drr_type, drro->drr_blksz, 9598986SMark.Maybee@Sun.COM drro->drr_bonustype, drro->drr_bonuslen); 9602743Sahrens } 9618986SMark.Maybee@Sun.COM if (err) 9628986SMark.Maybee@Sun.COM return (EINVAL); 9638986SMark.Maybee@Sun.COM 9648986SMark.Maybee@Sun.COM tx = dmu_tx_create(os); 9658986SMark.Maybee@Sun.COM dmu_tx_hold_bonus(tx, drro->drr_object); 9668986SMark.Maybee@Sun.COM err = dmu_tx_assign(tx, TXG_WAIT); 9672743Sahrens if (err) { 9688986SMark.Maybee@Sun.COM dmu_tx_abort(tx); 9698986SMark.Maybee@Sun.COM return (err); 9702743Sahrens } 9712743Sahrens 97211007SLori.Alt@Sun.COM dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 97311007SLori.Alt@Sun.COM tx); 9742743Sahrens dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 9752743Sahrens 9767994STim.Haley@Sun.COM if (data != NULL) { 9772743Sahrens dmu_buf_t *db; 9787994STim.Haley@Sun.COM 9792743Sahrens VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 9802743Sahrens dmu_buf_will_dirty(db, tx); 9812743Sahrens 9824944Smaybee ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 9834944Smaybee bcopy(data, db->db_data, drro->drr_bonuslen); 9842743Sahrens if (ra->byteswap) { 9852743Sahrens dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 9862743Sahrens drro->drr_bonuslen); 9872743Sahrens } 9882743Sahrens dmu_buf_rele(db, FTAG); 9892743Sahrens } 9902743Sahrens dmu_tx_commit(tx); 9912743Sahrens return (0); 9922743Sahrens } 9932743Sahrens 9942743Sahrens /* ARGSUSED */ 9952743Sahrens static int 9962743Sahrens restore_freeobjects(struct restorearg *ra, objset_t *os, 9972743Sahrens struct drr_freeobjects *drrfo) 9982743Sahrens { 9992743Sahrens uint64_t obj; 10002743Sahrens 10012743Sahrens if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 10022743Sahrens return (EINVAL); 10032743Sahrens 10042743Sahrens for (obj = drrfo->drr_firstobj; 10053087Sahrens obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 10063087Sahrens (void) dmu_object_next(os, &obj, FALSE, 0)) { 10072743Sahrens int err; 10082743Sahrens 10092743Sahrens if (dmu_object_info(os, obj, NULL) != 0) 10102743Sahrens continue; 10112743Sahrens 10126992Smaybee err = dmu_free_object(os, obj); 10136992Smaybee if (err) 10142743Sahrens return (err); 10152743Sahrens } 10162743Sahrens return (0); 10172743Sahrens } 10182743Sahrens 10192743Sahrens static int 10202743Sahrens restore_write(struct restorearg *ra, objset_t *os, 10212743Sahrens struct drr_write *drrw) 10222743Sahrens { 10232743Sahrens dmu_tx_t *tx; 10242743Sahrens void *data; 10252743Sahrens int err; 10262743Sahrens 10272743Sahrens if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 10282743Sahrens drrw->drr_type >= DMU_OT_NUMTYPES) 10292743Sahrens return (EINVAL); 10302743Sahrens 10312743Sahrens data = restore_read(ra, drrw->drr_length); 10322743Sahrens if (data == NULL) 10332743Sahrens return (ra->err); 10342743Sahrens 10352743Sahrens if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 10362743Sahrens return (EINVAL); 10372743Sahrens 10382743Sahrens tx = dmu_tx_create(os); 10392743Sahrens 10402743Sahrens dmu_tx_hold_write(tx, drrw->drr_object, 10412743Sahrens drrw->drr_offset, drrw->drr_length); 10422743Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 10432743Sahrens if (err) { 10442743Sahrens dmu_tx_abort(tx); 10452743Sahrens return (err); 10462743Sahrens } 10472743Sahrens if (ra->byteswap) 10482743Sahrens dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 10492743Sahrens dmu_write(os, drrw->drr_object, 10502743Sahrens drrw->drr_offset, drrw->drr_length, data, tx); 10512743Sahrens dmu_tx_commit(tx); 10522743Sahrens return (0); 10532743Sahrens } 10542743Sahrens 105511007SLori.Alt@Sun.COM /* 105611007SLori.Alt@Sun.COM * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 105711007SLori.Alt@Sun.COM * streams to refer to a copy of the data that is already on the 105811007SLori.Alt@Sun.COM * system because it came in earlier in the stream. This function 105911007SLori.Alt@Sun.COM * finds the earlier copy of the data, and uses that copy instead of 106011007SLori.Alt@Sun.COM * data from the stream to fulfill this write. 106111007SLori.Alt@Sun.COM */ 106211007SLori.Alt@Sun.COM static int 106311007SLori.Alt@Sun.COM restore_write_byref(struct restorearg *ra, objset_t *os, 106411007SLori.Alt@Sun.COM struct drr_write_byref *drrwbr) 106511007SLori.Alt@Sun.COM { 106611007SLori.Alt@Sun.COM dmu_tx_t *tx; 106711007SLori.Alt@Sun.COM int err; 106811007SLori.Alt@Sun.COM guid_map_entry_t gmesrch; 106911007SLori.Alt@Sun.COM guid_map_entry_t *gmep; 107011007SLori.Alt@Sun.COM avl_index_t where; 107111007SLori.Alt@Sun.COM objset_t *ref_os = NULL; 107211007SLori.Alt@Sun.COM dmu_buf_t *dbp; 107311007SLori.Alt@Sun.COM 107411007SLori.Alt@Sun.COM if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 107511007SLori.Alt@Sun.COM return (EINVAL); 107611007SLori.Alt@Sun.COM 107711007SLori.Alt@Sun.COM /* 107811007SLori.Alt@Sun.COM * If the GUID of the referenced dataset is different from the 107911007SLori.Alt@Sun.COM * GUID of the target dataset, find the referenced dataset. 108011007SLori.Alt@Sun.COM */ 108111007SLori.Alt@Sun.COM if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 108211007SLori.Alt@Sun.COM gmesrch.guid = drrwbr->drr_refguid; 108311007SLori.Alt@Sun.COM if ((gmep = avl_find(&ra->guid_to_ds_map, &gmesrch, 108411007SLori.Alt@Sun.COM &where)) == NULL) { 108511007SLori.Alt@Sun.COM return (EINVAL); 108611007SLori.Alt@Sun.COM } 108711007SLori.Alt@Sun.COM if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 108811007SLori.Alt@Sun.COM return (EINVAL); 108911007SLori.Alt@Sun.COM } else { 109011007SLori.Alt@Sun.COM ref_os = os; 109111007SLori.Alt@Sun.COM } 109211007SLori.Alt@Sun.COM 109311007SLori.Alt@Sun.COM if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 109411007SLori.Alt@Sun.COM drrwbr->drr_refoffset, FTAG, &dbp)) 109511007SLori.Alt@Sun.COM return (err); 109611007SLori.Alt@Sun.COM 109711007SLori.Alt@Sun.COM tx = dmu_tx_create(os); 109811007SLori.Alt@Sun.COM 109911007SLori.Alt@Sun.COM dmu_tx_hold_write(tx, drrwbr->drr_object, 110011007SLori.Alt@Sun.COM drrwbr->drr_offset, drrwbr->drr_length); 110111007SLori.Alt@Sun.COM err = dmu_tx_assign(tx, TXG_WAIT); 110211007SLori.Alt@Sun.COM if (err) { 110311007SLori.Alt@Sun.COM dmu_tx_abort(tx); 110411007SLori.Alt@Sun.COM return (err); 110511007SLori.Alt@Sun.COM } 110611007SLori.Alt@Sun.COM dmu_write(os, drrwbr->drr_object, 110711007SLori.Alt@Sun.COM drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 110811007SLori.Alt@Sun.COM dmu_buf_rele(dbp, FTAG); 110911007SLori.Alt@Sun.COM dmu_tx_commit(tx); 111011007SLori.Alt@Sun.COM return (0); 111111007SLori.Alt@Sun.COM } 111211007SLori.Alt@Sun.COM 11132743Sahrens /* ARGSUSED */ 11142743Sahrens static int 11152743Sahrens restore_free(struct restorearg *ra, objset_t *os, 11162743Sahrens struct drr_free *drrf) 11172743Sahrens { 11182743Sahrens int err; 11192743Sahrens 11202743Sahrens if (drrf->drr_length != -1ULL && 11212743Sahrens drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 11222743Sahrens return (EINVAL); 11232743Sahrens 11242743Sahrens if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 11252743Sahrens return (EINVAL); 11262743Sahrens 11276992Smaybee err = dmu_free_long_range(os, drrf->drr_object, 11282743Sahrens drrf->drr_offset, drrf->drr_length); 11292743Sahrens return (err); 11302743Sahrens } 11312743Sahrens 11325367Sahrens /* 11335367Sahrens * NB: callers *must* call dmu_recv_end() if this succeeds. 11345367Sahrens */ 11355367Sahrens int 11365367Sahrens dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) 11375367Sahrens { 11385367Sahrens struct restorearg ra = { 0 }; 11395367Sahrens dmu_replay_record_t *drr; 11405367Sahrens objset_t *os; 11415367Sahrens zio_cksum_t pcksum; 114211007SLori.Alt@Sun.COM guid_map_entry_t *gmep; 114311007SLori.Alt@Sun.COM int featureflags; 11445367Sahrens 11455367Sahrens if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 11465367Sahrens ra.byteswap = TRUE; 11472743Sahrens 11485367Sahrens { 11495367Sahrens /* compute checksum of drr_begin record */ 11505367Sahrens dmu_replay_record_t *drr; 11515367Sahrens drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 11525367Sahrens 11535367Sahrens drr->drr_type = DRR_BEGIN; 11545367Sahrens drr->drr_u.drr_begin = *drc->drc_drrb; 11555367Sahrens if (ra.byteswap) { 11565367Sahrens fletcher_4_incremental_byteswap(drr, 11575367Sahrens sizeof (dmu_replay_record_t), &ra.cksum); 11585367Sahrens } else { 11595367Sahrens fletcher_4_incremental_native(drr, 11605367Sahrens sizeof (dmu_replay_record_t), &ra.cksum); 11615367Sahrens } 11625367Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 11632743Sahrens } 11642743Sahrens 11652743Sahrens if (ra.byteswap) { 11665367Sahrens struct drr_begin *drrb = drc->drc_drrb; 11672743Sahrens drrb->drr_magic = BSWAP_64(drrb->drr_magic); 116811007SLori.Alt@Sun.COM drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 11692743Sahrens drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 11702743Sahrens drrb->drr_type = BSWAP_32(drrb->drr_type); 11712743Sahrens drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 11722743Sahrens drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 11732743Sahrens } 11742743Sahrens 11755367Sahrens ra.vp = vp; 11765367Sahrens ra.voff = *voffp; 11775367Sahrens ra.bufsize = 1<<20; 11785367Sahrens ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 11795326Sek110237 11805367Sahrens /* these were verified in dmu_recv_begin */ 118111007SLori.Alt@Sun.COM ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == 118211007SLori.Alt@Sun.COM DMU_SUBSTREAM); 11835367Sahrens ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); 11842743Sahrens 11852743Sahrens /* 11862743Sahrens * Open the objset we are modifying. 11872743Sahrens */ 118810298SMatthew.Ahrens@Sun.COM VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); 11892743Sahrens 11905367Sahrens ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 11912743Sahrens 119211007SLori.Alt@Sun.COM featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 119311007SLori.Alt@Sun.COM 119411007SLori.Alt@Sun.COM /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 119511007SLori.Alt@Sun.COM if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 119611007SLori.Alt@Sun.COM avl_create(&ra.guid_to_ds_map, guid_compare, 119711007SLori.Alt@Sun.COM sizeof (guid_map_entry_t), 119811007SLori.Alt@Sun.COM offsetof(guid_map_entry_t, avlnode)); 119911007SLori.Alt@Sun.COM (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, 120011007SLori.Alt@Sun.COM (void *)&ra.guid_to_ds_map, 120111007SLori.Alt@Sun.COM DS_FIND_CHILDREN); 120211007SLori.Alt@Sun.COM } 120311007SLori.Alt@Sun.COM 12042743Sahrens /* 12052743Sahrens * Read records and process them. 12062743Sahrens */ 12075367Sahrens pcksum = ra.cksum; 12082743Sahrens while (ra.err == 0 && 12092743Sahrens NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 12102743Sahrens if (issig(JUSTLOOKING) && issig(FORREAL)) { 12112743Sahrens ra.err = EINTR; 12122743Sahrens goto out; 12132743Sahrens } 12142743Sahrens 12152743Sahrens if (ra.byteswap) 12162743Sahrens backup_byteswap(drr); 12172743Sahrens 12182743Sahrens switch (drr->drr_type) { 12192743Sahrens case DRR_OBJECT: 12202743Sahrens { 12212743Sahrens /* 12222743Sahrens * We need to make a copy of the record header, 12232743Sahrens * because restore_{object,write} may need to 12242743Sahrens * restore_read(), which will invalidate drr. 12252743Sahrens */ 12262743Sahrens struct drr_object drro = drr->drr_u.drr_object; 12272743Sahrens ra.err = restore_object(&ra, os, &drro); 12282743Sahrens break; 12292743Sahrens } 12302743Sahrens case DRR_FREEOBJECTS: 12312743Sahrens { 12322743Sahrens struct drr_freeobjects drrfo = 12332743Sahrens drr->drr_u.drr_freeobjects; 12342743Sahrens ra.err = restore_freeobjects(&ra, os, &drrfo); 12352743Sahrens break; 12362743Sahrens } 12372743Sahrens case DRR_WRITE: 12382743Sahrens { 12392743Sahrens struct drr_write drrw = drr->drr_u.drr_write; 12402743Sahrens ra.err = restore_write(&ra, os, &drrw); 12412743Sahrens break; 12422743Sahrens } 124311007SLori.Alt@Sun.COM case DRR_WRITE_BYREF: 124411007SLori.Alt@Sun.COM { 124511007SLori.Alt@Sun.COM struct drr_write_byref drrwbr = 124611007SLori.Alt@Sun.COM drr->drr_u.drr_write_byref; 124711007SLori.Alt@Sun.COM ra.err = restore_write_byref(&ra, os, &drrwbr); 124811007SLori.Alt@Sun.COM break; 124911007SLori.Alt@Sun.COM } 12502743Sahrens case DRR_FREE: 12512743Sahrens { 12522743Sahrens struct drr_free drrf = drr->drr_u.drr_free; 12532743Sahrens ra.err = restore_free(&ra, os, &drrf); 12542743Sahrens break; 12552743Sahrens } 12562743Sahrens case DRR_END: 12572743Sahrens { 12582743Sahrens struct drr_end drre = drr->drr_u.drr_end; 12592743Sahrens /* 12602743Sahrens * We compare against the *previous* checksum 12612743Sahrens * value, because the stored checksum is of 12622743Sahrens * everything before the DRR_END record. 12632743Sahrens */ 12646479Sahrens if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 12652743Sahrens ra.err = ECKSUM; 12662743Sahrens goto out; 12672743Sahrens } 12682743Sahrens default: 12692743Sahrens ra.err = EINVAL; 12702743Sahrens goto out; 12712743Sahrens } 12725367Sahrens pcksum = ra.cksum; 12732743Sahrens } 12746479Sahrens ASSERT(ra.err != 0); 12752743Sahrens 12762743Sahrens out: 12775367Sahrens if (ra.err != 0) { 12782743Sahrens /* 127910204SMatthew.Ahrens@Sun.COM * destroy what we created, so we don't leave it in the 128010204SMatthew.Ahrens@Sun.COM * inconsistent restoring state. 12812743Sahrens */ 12825367Sahrens txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); 128310204SMatthew.Ahrens@Sun.COM 128410242Schris.kirby@sun.com (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, 128510242Schris.kirby@sun.com B_FALSE); 128610204SMatthew.Ahrens@Sun.COM if (drc->drc_real_ds != drc->drc_logical_ds) { 128710204SMatthew.Ahrens@Sun.COM mutex_exit(&drc->drc_logical_ds->ds_recvlock); 128810204SMatthew.Ahrens@Sun.COM dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); 128910204SMatthew.Ahrens@Sun.COM } 12902743Sahrens } 12912743Sahrens 129211007SLori.Alt@Sun.COM if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 129311007SLori.Alt@Sun.COM void *cookie = NULL; 129411007SLori.Alt@Sun.COM 129511007SLori.Alt@Sun.COM while (gmep = avl_destroy_nodes(&ra.guid_to_ds_map, &cookie)) { 129611007SLori.Alt@Sun.COM dsl_dataset_rele(gmep->gme_ds, &ra.guid_to_ds_map); 129711007SLori.Alt@Sun.COM kmem_free(gmep, sizeof (guid_map_entry_t)); 129811007SLori.Alt@Sun.COM } 129911007SLori.Alt@Sun.COM avl_destroy(&ra.guid_to_ds_map); 130011007SLori.Alt@Sun.COM } 130111007SLori.Alt@Sun.COM 13022743Sahrens kmem_free(ra.buf, ra.bufsize); 13035367Sahrens *voffp = ra.voff; 13042743Sahrens return (ra.err); 13052743Sahrens } 13065326Sek110237 13075367Sahrens struct recvendsyncarg { 13085367Sahrens char *tosnap; 13095367Sahrens uint64_t creation_time; 13105367Sahrens uint64_t toguid; 13115367Sahrens }; 13125367Sahrens 13135367Sahrens static int 13145367Sahrens recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) 13155367Sahrens { 13165367Sahrens dsl_dataset_t *ds = arg1; 13175367Sahrens struct recvendsyncarg *resa = arg2; 13185367Sahrens 13195367Sahrens return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); 13205367Sahrens } 13215367Sahrens 13225367Sahrens static void 13235367Sahrens recv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 13245326Sek110237 { 13255367Sahrens dsl_dataset_t *ds = arg1; 13265367Sahrens struct recvendsyncarg *resa = arg2; 13275367Sahrens 13285367Sahrens dsl_dataset_snapshot_sync(ds, resa->tosnap, cr, tx); 13295367Sahrens 13305367Sahrens /* set snapshot's creation time and guid */ 13315367Sahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 13325367Sahrens ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; 13335367Sahrens ds->ds_prev->ds_phys->ds_guid = resa->toguid; 13345367Sahrens ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 13355367Sahrens 13365367Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 13375367Sahrens ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 13385367Sahrens } 13395367Sahrens 134010272SMatthew.Ahrens@Sun.COM static int 134110272SMatthew.Ahrens@Sun.COM dmu_recv_existing_end(dmu_recv_cookie_t *drc) 13425367Sahrens { 13436689Smaybee struct recvendsyncarg resa; 13446689Smaybee dsl_dataset_t *ds = drc->drc_logical_ds; 13456689Smaybee int err; 13465367Sahrens 13475367Sahrens /* 134810272SMatthew.Ahrens@Sun.COM * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() 134910272SMatthew.Ahrens@Sun.COM * expects it to have a ds_user_ptr (and zil), but clone_swap() 135010272SMatthew.Ahrens@Sun.COM * can close it. 13515367Sahrens */ 13526689Smaybee txg_wait_synced(ds->ds_dir->dd_pool, 0); 13535326Sek110237 135410272SMatthew.Ahrens@Sun.COM if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { 135510272SMatthew.Ahrens@Sun.COM err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, 135610272SMatthew.Ahrens@Sun.COM drc->drc_force); 135710272SMatthew.Ahrens@Sun.COM if (err) 135810272SMatthew.Ahrens@Sun.COM goto out; 135910272SMatthew.Ahrens@Sun.COM } else { 136010272SMatthew.Ahrens@Sun.COM mutex_exit(&ds->ds_recvlock); 136110272SMatthew.Ahrens@Sun.COM dsl_dataset_rele(ds, dmu_recv_tag); 136210242Schris.kirby@sun.com (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, 136310242Schris.kirby@sun.com B_FALSE); 136410272SMatthew.Ahrens@Sun.COM return (EBUSY); 13655367Sahrens } 13665367Sahrens 13676689Smaybee resa.creation_time = drc->drc_drrb->drr_creation_time; 13686689Smaybee resa.toguid = drc->drc_drrb->drr_toguid; 13696689Smaybee resa.tosnap = drc->drc_tosnap; 13706689Smaybee 13716689Smaybee err = dsl_sync_task_do(ds->ds_dir->dd_pool, 13726689Smaybee recv_end_check, recv_end_sync, ds, &resa, 3); 13736689Smaybee if (err) { 137410272SMatthew.Ahrens@Sun.COM /* swap back */ 137510272SMatthew.Ahrens@Sun.COM (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); 13765367Sahrens } 13775367Sahrens 137810272SMatthew.Ahrens@Sun.COM out: 137910272SMatthew.Ahrens@Sun.COM mutex_exit(&ds->ds_recvlock); 13806689Smaybee dsl_dataset_disown(ds, dmu_recv_tag); 138110272SMatthew.Ahrens@Sun.COM (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); 13825326Sek110237 return (err); 13835326Sek110237 } 138410272SMatthew.Ahrens@Sun.COM 138510272SMatthew.Ahrens@Sun.COM static int 138610272SMatthew.Ahrens@Sun.COM dmu_recv_new_end(dmu_recv_cookie_t *drc) 138710272SMatthew.Ahrens@Sun.COM { 138810272SMatthew.Ahrens@Sun.COM struct recvendsyncarg resa; 138910272SMatthew.Ahrens@Sun.COM dsl_dataset_t *ds = drc->drc_logical_ds; 139010272SMatthew.Ahrens@Sun.COM int err; 139110272SMatthew.Ahrens@Sun.COM 139210272SMatthew.Ahrens@Sun.COM /* 139310272SMatthew.Ahrens@Sun.COM * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() 139410272SMatthew.Ahrens@Sun.COM * expects it to have a ds_user_ptr (and zil), but clone_swap() 139510272SMatthew.Ahrens@Sun.COM * can close it. 139610272SMatthew.Ahrens@Sun.COM */ 139710272SMatthew.Ahrens@Sun.COM txg_wait_synced(ds->ds_dir->dd_pool, 0); 139810272SMatthew.Ahrens@Sun.COM 139910272SMatthew.Ahrens@Sun.COM resa.creation_time = drc->drc_drrb->drr_creation_time; 140010272SMatthew.Ahrens@Sun.COM resa.toguid = drc->drc_drrb->drr_toguid; 140110272SMatthew.Ahrens@Sun.COM resa.tosnap = drc->drc_tosnap; 140210272SMatthew.Ahrens@Sun.COM 140310272SMatthew.Ahrens@Sun.COM err = dsl_sync_task_do(ds->ds_dir->dd_pool, 140410272SMatthew.Ahrens@Sun.COM recv_end_check, recv_end_sync, ds, &resa, 3); 140510272SMatthew.Ahrens@Sun.COM if (err) { 140610272SMatthew.Ahrens@Sun.COM /* clean up the fs we just recv'd into */ 140710272SMatthew.Ahrens@Sun.COM (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); 140810272SMatthew.Ahrens@Sun.COM } else { 140910272SMatthew.Ahrens@Sun.COM /* release the hold from dmu_recv_begin */ 141010272SMatthew.Ahrens@Sun.COM dsl_dataset_disown(ds, dmu_recv_tag); 141110272SMatthew.Ahrens@Sun.COM } 141210272SMatthew.Ahrens@Sun.COM return (err); 141310272SMatthew.Ahrens@Sun.COM } 141410272SMatthew.Ahrens@Sun.COM 141510272SMatthew.Ahrens@Sun.COM int 141610272SMatthew.Ahrens@Sun.COM dmu_recv_end(dmu_recv_cookie_t *drc) 141710272SMatthew.Ahrens@Sun.COM { 141810272SMatthew.Ahrens@Sun.COM if (drc->drc_logical_ds != drc->drc_real_ds) 141910272SMatthew.Ahrens@Sun.COM return (dmu_recv_existing_end(drc)); 142010272SMatthew.Ahrens@Sun.COM else 142110272SMatthew.Ahrens@Sun.COM return (dmu_recv_new_end(drc)); 142210272SMatthew.Ahrens@Sun.COM } 1423