1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*eda14cbcSMatt Macy * Copyright (c) 2012 Cyril Plisko. All rights reserved. 24*eda14cbcSMatt Macy * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 25*eda14cbcSMatt Macy */ 26*eda14cbcSMatt Macy 27*eda14cbcSMatt Macy #include <sys/types.h> 28*eda14cbcSMatt Macy #include <sys/param.h> 29*eda14cbcSMatt Macy #include <sys/sysmacros.h> 30*eda14cbcSMatt Macy #include <sys/cmn_err.h> 31*eda14cbcSMatt Macy #include <sys/kmem.h> 32*eda14cbcSMatt Macy #include <sys/thread.h> 33*eda14cbcSMatt Macy #include <sys/file.h> 34*eda14cbcSMatt Macy #include <sys/fcntl.h> 35*eda14cbcSMatt Macy #include <sys/vfs.h> 36*eda14cbcSMatt Macy #include <sys/fs/zfs.h> 37*eda14cbcSMatt Macy #include <sys/zfs_znode.h> 38*eda14cbcSMatt Macy #include <sys/zfs_dir.h> 39*eda14cbcSMatt Macy #include <sys/zfs_acl.h> 40*eda14cbcSMatt Macy #include <sys/zfs_fuid.h> 41*eda14cbcSMatt Macy #include <sys/zfs_vnops.h> 42*eda14cbcSMatt Macy #include <sys/spa.h> 43*eda14cbcSMatt Macy #include <sys/zil.h> 44*eda14cbcSMatt Macy #include <sys/byteorder.h> 45*eda14cbcSMatt Macy #include <sys/stat.h> 46*eda14cbcSMatt Macy #include <sys/acl.h> 47*eda14cbcSMatt Macy #include <sys/atomic.h> 48*eda14cbcSMatt Macy #include <sys/cred.h> 49*eda14cbcSMatt Macy #include <sys/zpl.h> 50*eda14cbcSMatt Macy 51*eda14cbcSMatt Macy /* 52*eda14cbcSMatt Macy * NB: FreeBSD expects to be able to do vnode locking in lookup and 53*eda14cbcSMatt Macy * hold the locks across all subsequent VOPs until vput is called. 54*eda14cbcSMatt Macy * This means that its zfs vnops routines can't do any internal locking. 55*eda14cbcSMatt Macy * In order to have the same contract as the Linux vnops there would 56*eda14cbcSMatt Macy * needed to be duplicate locked vnops. If the vnops were used more widely 57*eda14cbcSMatt Macy * in common code this would likely be preferable. However, currently 58*eda14cbcSMatt Macy * this is the only file where this is the case. 59*eda14cbcSMatt Macy */ 60*eda14cbcSMatt Macy 61*eda14cbcSMatt Macy /* 62*eda14cbcSMatt Macy * Functions to replay ZFS intent log (ZIL) records 63*eda14cbcSMatt Macy * The functions are called through a function vector (zfs_replay_vector) 64*eda14cbcSMatt Macy * which is indexed by the transaction type. 65*eda14cbcSMatt Macy */ 66*eda14cbcSMatt Macy 67*eda14cbcSMatt Macy static void 68*eda14cbcSMatt Macy zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, 69*eda14cbcSMatt Macy uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) 70*eda14cbcSMatt Macy { 71*eda14cbcSMatt Macy bzero(vap, sizeof (*vap)); 72*eda14cbcSMatt Macy vap->va_mask = (uint_t)mask; 73*eda14cbcSMatt Macy vap->va_mode = mode; 74*eda14cbcSMatt Macy #ifdef __FreeBSD__ 75*eda14cbcSMatt Macy vap->va_type = IFTOVT(mode); 76*eda14cbcSMatt Macy #endif 77*eda14cbcSMatt Macy vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; 78*eda14cbcSMatt Macy vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; 79*eda14cbcSMatt Macy vap->va_rdev = zfs_cmpldev(rdev); 80*eda14cbcSMatt Macy vap->va_nodeid = nodeid; 81*eda14cbcSMatt Macy } 82*eda14cbcSMatt Macy 83*eda14cbcSMatt Macy /* ARGSUSED */ 84*eda14cbcSMatt Macy static int 85*eda14cbcSMatt Macy zfs_replay_error(void *arg1, void *arg2, boolean_t byteswap) 86*eda14cbcSMatt Macy { 87*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 88*eda14cbcSMatt Macy } 89*eda14cbcSMatt Macy 90*eda14cbcSMatt Macy static void 91*eda14cbcSMatt Macy zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) 92*eda14cbcSMatt Macy { 93*eda14cbcSMatt Macy xoptattr_t *xoap = NULL; 94*eda14cbcSMatt Macy uint64_t *attrs; 95*eda14cbcSMatt Macy uint64_t *crtime; 96*eda14cbcSMatt Macy uint32_t *bitmap; 97*eda14cbcSMatt Macy void *scanstamp; 98*eda14cbcSMatt Macy int i; 99*eda14cbcSMatt Macy 100*eda14cbcSMatt Macy xvap->xva_vattr.va_mask |= ATTR_XVATTR; 101*eda14cbcSMatt Macy if ((xoap = xva_getxoptattr(xvap)) == NULL) { 102*eda14cbcSMatt Macy xvap->xva_vattr.va_mask &= ~ATTR_XVATTR; /* shouldn't happen */ 103*eda14cbcSMatt Macy return; 104*eda14cbcSMatt Macy } 105*eda14cbcSMatt Macy 106*eda14cbcSMatt Macy ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize); 107*eda14cbcSMatt Macy 108*eda14cbcSMatt Macy bitmap = &lrattr->lr_attr_bitmap; 109*eda14cbcSMatt Macy for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++) 110*eda14cbcSMatt Macy xvap->xva_reqattrmap[i] = *bitmap; 111*eda14cbcSMatt Macy 112*eda14cbcSMatt Macy attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1); 113*eda14cbcSMatt Macy crtime = attrs + 1; 114*eda14cbcSMatt Macy scanstamp = (caddr_t)(crtime + 2); 115*eda14cbcSMatt Macy 116*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) 117*eda14cbcSMatt Macy xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0); 118*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) 119*eda14cbcSMatt Macy xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0); 120*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) 121*eda14cbcSMatt Macy xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0); 122*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_READONLY)) 123*eda14cbcSMatt Macy xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0); 124*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) 125*eda14cbcSMatt Macy xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0); 126*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) 127*eda14cbcSMatt Macy xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0); 128*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) 129*eda14cbcSMatt Macy xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0); 130*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) 131*eda14cbcSMatt Macy xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0); 132*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) 133*eda14cbcSMatt Macy xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0); 134*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) 135*eda14cbcSMatt Macy xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0); 136*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) 137*eda14cbcSMatt Macy xoap->xoa_av_quarantined = 138*eda14cbcSMatt Macy ((*attrs & XAT0_AV_QUARANTINED) != 0); 139*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 140*eda14cbcSMatt Macy ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime); 141*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 142*eda14cbcSMatt Macy ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID)); 143*eda14cbcSMatt Macy 144*eda14cbcSMatt Macy bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ); 145*eda14cbcSMatt Macy } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 146*eda14cbcSMatt Macy /* 147*eda14cbcSMatt Macy * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid 148*eda14cbcSMatt Macy * at the same time, so we can share the same space. 149*eda14cbcSMatt Macy */ 150*eda14cbcSMatt Macy bcopy(scanstamp, &xoap->xoa_projid, sizeof (uint64_t)); 151*eda14cbcSMatt Macy } 152*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) 153*eda14cbcSMatt Macy xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0); 154*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) 155*eda14cbcSMatt Macy xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0); 156*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) 157*eda14cbcSMatt Macy xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0); 158*eda14cbcSMatt Macy if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) 159*eda14cbcSMatt Macy xoap->xoa_projinherit = ((*attrs & XAT0_PROJINHERIT) != 0); 160*eda14cbcSMatt Macy } 161*eda14cbcSMatt Macy 162*eda14cbcSMatt Macy static int 163*eda14cbcSMatt Macy zfs_replay_domain_cnt(uint64_t uid, uint64_t gid) 164*eda14cbcSMatt Macy { 165*eda14cbcSMatt Macy uint64_t uid_idx; 166*eda14cbcSMatt Macy uint64_t gid_idx; 167*eda14cbcSMatt Macy int domcnt = 0; 168*eda14cbcSMatt Macy 169*eda14cbcSMatt Macy uid_idx = FUID_INDEX(uid); 170*eda14cbcSMatt Macy gid_idx = FUID_INDEX(gid); 171*eda14cbcSMatt Macy if (uid_idx) 172*eda14cbcSMatt Macy domcnt++; 173*eda14cbcSMatt Macy if (gid_idx > 0 && gid_idx != uid_idx) 174*eda14cbcSMatt Macy domcnt++; 175*eda14cbcSMatt Macy 176*eda14cbcSMatt Macy return (domcnt); 177*eda14cbcSMatt Macy } 178*eda14cbcSMatt Macy 179*eda14cbcSMatt Macy static void * 180*eda14cbcSMatt Macy zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start, 181*eda14cbcSMatt Macy int domcnt) 182*eda14cbcSMatt Macy { 183*eda14cbcSMatt Macy int i; 184*eda14cbcSMatt Macy 185*eda14cbcSMatt Macy for (i = 0; i != domcnt; i++) { 186*eda14cbcSMatt Macy fuid_infop->z_domain_table[i] = start; 187*eda14cbcSMatt Macy start = (caddr_t)start + strlen(start) + 1; 188*eda14cbcSMatt Macy } 189*eda14cbcSMatt Macy 190*eda14cbcSMatt Macy return (start); 191*eda14cbcSMatt Macy } 192*eda14cbcSMatt Macy 193*eda14cbcSMatt Macy /* 194*eda14cbcSMatt Macy * Set the uid/gid in the fuid_info structure. 195*eda14cbcSMatt Macy */ 196*eda14cbcSMatt Macy static void 197*eda14cbcSMatt Macy zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid) 198*eda14cbcSMatt Macy { 199*eda14cbcSMatt Macy /* 200*eda14cbcSMatt Macy * If owner or group are log specific FUIDs then slurp up 201*eda14cbcSMatt Macy * domain information and build zfs_fuid_info_t 202*eda14cbcSMatt Macy */ 203*eda14cbcSMatt Macy if (IS_EPHEMERAL(uid)) 204*eda14cbcSMatt Macy fuid_infop->z_fuid_owner = uid; 205*eda14cbcSMatt Macy 206*eda14cbcSMatt Macy if (IS_EPHEMERAL(gid)) 207*eda14cbcSMatt Macy fuid_infop->z_fuid_group = gid; 208*eda14cbcSMatt Macy } 209*eda14cbcSMatt Macy 210*eda14cbcSMatt Macy /* 211*eda14cbcSMatt Macy * Load fuid domains into fuid_info_t 212*eda14cbcSMatt Macy */ 213*eda14cbcSMatt Macy static zfs_fuid_info_t * 214*eda14cbcSMatt Macy zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid) 215*eda14cbcSMatt Macy { 216*eda14cbcSMatt Macy int domcnt; 217*eda14cbcSMatt Macy 218*eda14cbcSMatt Macy zfs_fuid_info_t *fuid_infop; 219*eda14cbcSMatt Macy 220*eda14cbcSMatt Macy fuid_infop = zfs_fuid_info_alloc(); 221*eda14cbcSMatt Macy 222*eda14cbcSMatt Macy domcnt = zfs_replay_domain_cnt(uid, gid); 223*eda14cbcSMatt Macy 224*eda14cbcSMatt Macy if (domcnt == 0) 225*eda14cbcSMatt Macy return (fuid_infop); 226*eda14cbcSMatt Macy 227*eda14cbcSMatt Macy fuid_infop->z_domain_table = 228*eda14cbcSMatt Macy kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP); 229*eda14cbcSMatt Macy 230*eda14cbcSMatt Macy zfs_replay_fuid_ugid(fuid_infop, uid, gid); 231*eda14cbcSMatt Macy 232*eda14cbcSMatt Macy fuid_infop->z_domain_cnt = domcnt; 233*eda14cbcSMatt Macy *end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt); 234*eda14cbcSMatt Macy return (fuid_infop); 235*eda14cbcSMatt Macy } 236*eda14cbcSMatt Macy 237*eda14cbcSMatt Macy /* 238*eda14cbcSMatt Macy * load zfs_fuid_t's and fuid_domains into fuid_info_t 239*eda14cbcSMatt Macy */ 240*eda14cbcSMatt Macy static zfs_fuid_info_t * 241*eda14cbcSMatt Macy zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid, 242*eda14cbcSMatt Macy uint64_t gid) 243*eda14cbcSMatt Macy { 244*eda14cbcSMatt Macy uint64_t *log_fuid = (uint64_t *)start; 245*eda14cbcSMatt Macy zfs_fuid_info_t *fuid_infop; 246*eda14cbcSMatt Macy int i; 247*eda14cbcSMatt Macy 248*eda14cbcSMatt Macy fuid_infop = zfs_fuid_info_alloc(); 249*eda14cbcSMatt Macy fuid_infop->z_domain_cnt = domcnt; 250*eda14cbcSMatt Macy 251*eda14cbcSMatt Macy fuid_infop->z_domain_table = 252*eda14cbcSMatt Macy kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP); 253*eda14cbcSMatt Macy 254*eda14cbcSMatt Macy for (i = 0; i != idcnt; i++) { 255*eda14cbcSMatt Macy zfs_fuid_t *zfuid; 256*eda14cbcSMatt Macy 257*eda14cbcSMatt Macy zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP); 258*eda14cbcSMatt Macy zfuid->z_logfuid = *log_fuid; 259*eda14cbcSMatt Macy zfuid->z_id = -1; 260*eda14cbcSMatt Macy zfuid->z_domidx = 0; 261*eda14cbcSMatt Macy list_insert_tail(&fuid_infop->z_fuids, zfuid); 262*eda14cbcSMatt Macy log_fuid++; 263*eda14cbcSMatt Macy } 264*eda14cbcSMatt Macy 265*eda14cbcSMatt Macy zfs_replay_fuid_ugid(fuid_infop, uid, gid); 266*eda14cbcSMatt Macy 267*eda14cbcSMatt Macy *end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt); 268*eda14cbcSMatt Macy return (fuid_infop); 269*eda14cbcSMatt Macy } 270*eda14cbcSMatt Macy 271*eda14cbcSMatt Macy static void 272*eda14cbcSMatt Macy zfs_replay_swap_attrs(lr_attr_t *lrattr) 273*eda14cbcSMatt Macy { 274*eda14cbcSMatt Macy /* swap the lr_attr structure */ 275*eda14cbcSMatt Macy byteswap_uint32_array(lrattr, sizeof (*lrattr)); 276*eda14cbcSMatt Macy /* swap the bitmap */ 277*eda14cbcSMatt Macy byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) * 278*eda14cbcSMatt Macy sizeof (uint32_t)); 279*eda14cbcSMatt Macy /* swap the attributes, create time + 64 bit word for attributes */ 280*eda14cbcSMatt Macy byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) * 281*eda14cbcSMatt Macy (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t)); 282*eda14cbcSMatt Macy } 283*eda14cbcSMatt Macy 284*eda14cbcSMatt Macy /* 285*eda14cbcSMatt Macy * Replay file create with optional ACL, xvattr information as well 286*eda14cbcSMatt Macy * as option FUID information. 287*eda14cbcSMatt Macy */ 288*eda14cbcSMatt Macy static int 289*eda14cbcSMatt Macy zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) 290*eda14cbcSMatt Macy { 291*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 292*eda14cbcSMatt Macy lr_acl_create_t *lracl = arg2; 293*eda14cbcSMatt Macy char *name = NULL; /* location determined later */ 294*eda14cbcSMatt Macy lr_create_t *lr = (lr_create_t *)lracl; 295*eda14cbcSMatt Macy znode_t *dzp; 296*eda14cbcSMatt Macy znode_t *zp; 297*eda14cbcSMatt Macy xvattr_t xva; 298*eda14cbcSMatt Macy int vflg = 0; 299*eda14cbcSMatt Macy vsecattr_t vsec = { 0 }; 300*eda14cbcSMatt Macy lr_attr_t *lrattr; 301*eda14cbcSMatt Macy void *aclstart; 302*eda14cbcSMatt Macy void *fuidstart; 303*eda14cbcSMatt Macy size_t xvatlen = 0; 304*eda14cbcSMatt Macy uint64_t txtype; 305*eda14cbcSMatt Macy uint64_t objid; 306*eda14cbcSMatt Macy uint64_t dnodesize; 307*eda14cbcSMatt Macy int error; 308*eda14cbcSMatt Macy 309*eda14cbcSMatt Macy txtype = (lr->lr_common.lrc_txtype & ~TX_CI); 310*eda14cbcSMatt Macy if (byteswap) { 311*eda14cbcSMatt Macy byteswap_uint64_array(lracl, sizeof (*lracl)); 312*eda14cbcSMatt Macy if (txtype == TX_CREATE_ACL_ATTR || 313*eda14cbcSMatt Macy txtype == TX_MKDIR_ACL_ATTR) { 314*eda14cbcSMatt Macy lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); 315*eda14cbcSMatt Macy zfs_replay_swap_attrs(lrattr); 316*eda14cbcSMatt Macy xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); 317*eda14cbcSMatt Macy } 318*eda14cbcSMatt Macy 319*eda14cbcSMatt Macy aclstart = (caddr_t)(lracl + 1) + xvatlen; 320*eda14cbcSMatt Macy zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE); 321*eda14cbcSMatt Macy /* swap fuids */ 322*eda14cbcSMatt Macy if (lracl->lr_fuidcnt) { 323*eda14cbcSMatt Macy byteswap_uint64_array((caddr_t)aclstart + 324*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lracl->lr_acl_bytes), 325*eda14cbcSMatt Macy lracl->lr_fuidcnt * sizeof (uint64_t)); 326*eda14cbcSMatt Macy } 327*eda14cbcSMatt Macy } 328*eda14cbcSMatt Macy 329*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 330*eda14cbcSMatt Macy return (error); 331*eda14cbcSMatt Macy 332*eda14cbcSMatt Macy objid = LR_FOID_GET_OBJ(lr->lr_foid); 333*eda14cbcSMatt Macy dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT; 334*eda14cbcSMatt Macy 335*eda14cbcSMatt Macy xva_init(&xva); 336*eda14cbcSMatt Macy zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, 337*eda14cbcSMatt Macy lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid); 338*eda14cbcSMatt Macy 339*eda14cbcSMatt Macy /* 340*eda14cbcSMatt Macy * All forms of zfs create (create, mkdir, mkxattrdir, symlink) 341*eda14cbcSMatt Macy * eventually end up in zfs_mknode(), which assigns the object's 342*eda14cbcSMatt Macy * creation time, generation number, and dnode size. The generic 343*eda14cbcSMatt Macy * zfs_create() has no concept of these attributes, so we smuggle 344*eda14cbcSMatt Macy * the values inside the vattr's otherwise unused va_ctime, 345*eda14cbcSMatt Macy * va_nblocks, and va_fsid fields. 346*eda14cbcSMatt Macy */ 347*eda14cbcSMatt Macy ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); 348*eda14cbcSMatt Macy xva.xva_vattr.va_nblocks = lr->lr_gen; 349*eda14cbcSMatt Macy xva.xva_vattr.va_fsid = dnodesize; 350*eda14cbcSMatt Macy 351*eda14cbcSMatt Macy error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); 352*eda14cbcSMatt Macy if (error) 353*eda14cbcSMatt Macy goto bail; 354*eda14cbcSMatt Macy 355*eda14cbcSMatt Macy if (lr->lr_common.lrc_txtype & TX_CI) 356*eda14cbcSMatt Macy vflg |= FIGNORECASE; 357*eda14cbcSMatt Macy switch (txtype) { 358*eda14cbcSMatt Macy case TX_CREATE_ACL: 359*eda14cbcSMatt Macy aclstart = (caddr_t)(lracl + 1); 360*eda14cbcSMatt Macy fuidstart = (caddr_t)aclstart + 361*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lracl->lr_acl_bytes); 362*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, 363*eda14cbcSMatt Macy (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, 364*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 365*eda14cbcSMatt Macy /*FALLTHROUGH*/ 366*eda14cbcSMatt Macy case TX_CREATE_ACL_ATTR: 367*eda14cbcSMatt Macy if (name == NULL) { 368*eda14cbcSMatt Macy lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); 369*eda14cbcSMatt Macy xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); 370*eda14cbcSMatt Macy xva.xva_vattr.va_mask |= ATTR_XVATTR; 371*eda14cbcSMatt Macy zfs_replay_xvattr(lrattr, &xva); 372*eda14cbcSMatt Macy } 373*eda14cbcSMatt Macy vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; 374*eda14cbcSMatt Macy vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; 375*eda14cbcSMatt Macy vsec.vsa_aclcnt = lracl->lr_aclcnt; 376*eda14cbcSMatt Macy vsec.vsa_aclentsz = lracl->lr_acl_bytes; 377*eda14cbcSMatt Macy vsec.vsa_aclflags = lracl->lr_acl_flags; 378*eda14cbcSMatt Macy if (zfsvfs->z_fuid_replay == NULL) { 379*eda14cbcSMatt Macy fuidstart = (caddr_t)(lracl + 1) + xvatlen + 380*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lracl->lr_acl_bytes); 381*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = 382*eda14cbcSMatt Macy zfs_replay_fuids(fuidstart, 383*eda14cbcSMatt Macy (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, 384*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 385*eda14cbcSMatt Macy } 386*eda14cbcSMatt Macy 387*eda14cbcSMatt Macy error = zfs_create(dzp, name, &xva.xva_vattr, 388*eda14cbcSMatt Macy 0, 0, &zp, kcred, vflg, &vsec); 389*eda14cbcSMatt Macy break; 390*eda14cbcSMatt Macy case TX_MKDIR_ACL: 391*eda14cbcSMatt Macy aclstart = (caddr_t)(lracl + 1); 392*eda14cbcSMatt Macy fuidstart = (caddr_t)aclstart + 393*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lracl->lr_acl_bytes); 394*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, 395*eda14cbcSMatt Macy (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, 396*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 397*eda14cbcSMatt Macy /*FALLTHROUGH*/ 398*eda14cbcSMatt Macy case TX_MKDIR_ACL_ATTR: 399*eda14cbcSMatt Macy if (name == NULL) { 400*eda14cbcSMatt Macy lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); 401*eda14cbcSMatt Macy xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); 402*eda14cbcSMatt Macy zfs_replay_xvattr(lrattr, &xva); 403*eda14cbcSMatt Macy } 404*eda14cbcSMatt Macy vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; 405*eda14cbcSMatt Macy vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; 406*eda14cbcSMatt Macy vsec.vsa_aclcnt = lracl->lr_aclcnt; 407*eda14cbcSMatt Macy vsec.vsa_aclentsz = lracl->lr_acl_bytes; 408*eda14cbcSMatt Macy vsec.vsa_aclflags = lracl->lr_acl_flags; 409*eda14cbcSMatt Macy if (zfsvfs->z_fuid_replay == NULL) { 410*eda14cbcSMatt Macy fuidstart = (caddr_t)(lracl + 1) + xvatlen + 411*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lracl->lr_acl_bytes); 412*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = 413*eda14cbcSMatt Macy zfs_replay_fuids(fuidstart, 414*eda14cbcSMatt Macy (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, 415*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 416*eda14cbcSMatt Macy } 417*eda14cbcSMatt Macy error = zfs_mkdir(dzp, name, &xva.xva_vattr, 418*eda14cbcSMatt Macy &zp, kcred, vflg, &vsec); 419*eda14cbcSMatt Macy break; 420*eda14cbcSMatt Macy default: 421*eda14cbcSMatt Macy error = SET_ERROR(ENOTSUP); 422*eda14cbcSMatt Macy } 423*eda14cbcSMatt Macy 424*eda14cbcSMatt Macy bail: 425*eda14cbcSMatt Macy if (error == 0 && zp != NULL) { 426*eda14cbcSMatt Macy #ifdef __FreeBSD__ 427*eda14cbcSMatt Macy VOP_UNLOCK1(ZTOV(zp)); 428*eda14cbcSMatt Macy #endif 429*eda14cbcSMatt Macy zrele(zp); 430*eda14cbcSMatt Macy } 431*eda14cbcSMatt Macy zrele(dzp); 432*eda14cbcSMatt Macy 433*eda14cbcSMatt Macy if (zfsvfs->z_fuid_replay) 434*eda14cbcSMatt Macy zfs_fuid_info_free(zfsvfs->z_fuid_replay); 435*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = NULL; 436*eda14cbcSMatt Macy 437*eda14cbcSMatt Macy return (error); 438*eda14cbcSMatt Macy } 439*eda14cbcSMatt Macy 440*eda14cbcSMatt Macy static int 441*eda14cbcSMatt Macy zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) 442*eda14cbcSMatt Macy { 443*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 444*eda14cbcSMatt Macy lr_create_t *lr = arg2; 445*eda14cbcSMatt Macy char *name = NULL; /* location determined later */ 446*eda14cbcSMatt Macy char *link; /* symlink content follows name */ 447*eda14cbcSMatt Macy znode_t *dzp; 448*eda14cbcSMatt Macy znode_t *zp = NULL; 449*eda14cbcSMatt Macy xvattr_t xva; 450*eda14cbcSMatt Macy int vflg = 0; 451*eda14cbcSMatt Macy size_t lrsize = sizeof (lr_create_t); 452*eda14cbcSMatt Macy lr_attr_t *lrattr; 453*eda14cbcSMatt Macy void *start; 454*eda14cbcSMatt Macy size_t xvatlen; 455*eda14cbcSMatt Macy uint64_t txtype; 456*eda14cbcSMatt Macy uint64_t objid; 457*eda14cbcSMatt Macy uint64_t dnodesize; 458*eda14cbcSMatt Macy int error; 459*eda14cbcSMatt Macy 460*eda14cbcSMatt Macy txtype = (lr->lr_common.lrc_txtype & ~TX_CI); 461*eda14cbcSMatt Macy if (byteswap) { 462*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 463*eda14cbcSMatt Macy if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR) 464*eda14cbcSMatt Macy zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); 465*eda14cbcSMatt Macy } 466*eda14cbcSMatt Macy 467*eda14cbcSMatt Macy 468*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 469*eda14cbcSMatt Macy return (error); 470*eda14cbcSMatt Macy 471*eda14cbcSMatt Macy objid = LR_FOID_GET_OBJ(lr->lr_foid); 472*eda14cbcSMatt Macy dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT; 473*eda14cbcSMatt Macy 474*eda14cbcSMatt Macy xva_init(&xva); 475*eda14cbcSMatt Macy zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, 476*eda14cbcSMatt Macy lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid); 477*eda14cbcSMatt Macy 478*eda14cbcSMatt Macy /* 479*eda14cbcSMatt Macy * All forms of zfs create (create, mkdir, mkxattrdir, symlink) 480*eda14cbcSMatt Macy * eventually end up in zfs_mknode(), which assigns the object's 481*eda14cbcSMatt Macy * creation time, generation number, and dnode slot count. The 482*eda14cbcSMatt Macy * generic zfs_create() has no concept of these attributes, so 483*eda14cbcSMatt Macy * we smuggle the values inside the vattr's otherwise unused 484*eda14cbcSMatt Macy * va_ctime, va_nblocks, and va_fsid fields. 485*eda14cbcSMatt Macy */ 486*eda14cbcSMatt Macy ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); 487*eda14cbcSMatt Macy xva.xva_vattr.va_nblocks = lr->lr_gen; 488*eda14cbcSMatt Macy xva.xva_vattr.va_fsid = dnodesize; 489*eda14cbcSMatt Macy 490*eda14cbcSMatt Macy error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); 491*eda14cbcSMatt Macy if (error) 492*eda14cbcSMatt Macy goto out; 493*eda14cbcSMatt Macy 494*eda14cbcSMatt Macy if (lr->lr_common.lrc_txtype & TX_CI) 495*eda14cbcSMatt Macy vflg |= FIGNORECASE; 496*eda14cbcSMatt Macy 497*eda14cbcSMatt Macy /* 498*eda14cbcSMatt Macy * Symlinks don't have fuid info, and CIFS never creates 499*eda14cbcSMatt Macy * symlinks. 500*eda14cbcSMatt Macy * 501*eda14cbcSMatt Macy * The _ATTR versions will grab the fuid info in their subcases. 502*eda14cbcSMatt Macy */ 503*eda14cbcSMatt Macy if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK && 504*eda14cbcSMatt Macy (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR && 505*eda14cbcSMatt Macy (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) { 506*eda14cbcSMatt Macy start = (lr + 1); 507*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = 508*eda14cbcSMatt Macy zfs_replay_fuid_domain(start, &start, 509*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 510*eda14cbcSMatt Macy } 511*eda14cbcSMatt Macy 512*eda14cbcSMatt Macy switch (txtype) { 513*eda14cbcSMatt Macy case TX_CREATE_ATTR: 514*eda14cbcSMatt Macy lrattr = (lr_attr_t *)(caddr_t)(lr + 1); 515*eda14cbcSMatt Macy xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); 516*eda14cbcSMatt Macy zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); 517*eda14cbcSMatt Macy start = (caddr_t)(lr + 1) + xvatlen; 518*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = 519*eda14cbcSMatt Macy zfs_replay_fuid_domain(start, &start, 520*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 521*eda14cbcSMatt Macy name = (char *)start; 522*eda14cbcSMatt Macy 523*eda14cbcSMatt Macy /*FALLTHROUGH*/ 524*eda14cbcSMatt Macy case TX_CREATE: 525*eda14cbcSMatt Macy if (name == NULL) 526*eda14cbcSMatt Macy name = (char *)start; 527*eda14cbcSMatt Macy 528*eda14cbcSMatt Macy error = zfs_create(dzp, name, &xva.xva_vattr, 529*eda14cbcSMatt Macy 0, 0, &zp, kcred, vflg, NULL); 530*eda14cbcSMatt Macy break; 531*eda14cbcSMatt Macy case TX_MKDIR_ATTR: 532*eda14cbcSMatt Macy lrattr = (lr_attr_t *)(caddr_t)(lr + 1); 533*eda14cbcSMatt Macy xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); 534*eda14cbcSMatt Macy zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); 535*eda14cbcSMatt Macy start = (caddr_t)(lr + 1) + xvatlen; 536*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = 537*eda14cbcSMatt Macy zfs_replay_fuid_domain(start, &start, 538*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 539*eda14cbcSMatt Macy name = (char *)start; 540*eda14cbcSMatt Macy 541*eda14cbcSMatt Macy /*FALLTHROUGH*/ 542*eda14cbcSMatt Macy case TX_MKDIR: 543*eda14cbcSMatt Macy if (name == NULL) 544*eda14cbcSMatt Macy name = (char *)(lr + 1); 545*eda14cbcSMatt Macy 546*eda14cbcSMatt Macy error = zfs_mkdir(dzp, name, &xva.xva_vattr, 547*eda14cbcSMatt Macy &zp, kcred, vflg, NULL); 548*eda14cbcSMatt Macy break; 549*eda14cbcSMatt Macy case TX_MKXATTR: 550*eda14cbcSMatt Macy error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred); 551*eda14cbcSMatt Macy break; 552*eda14cbcSMatt Macy case TX_SYMLINK: 553*eda14cbcSMatt Macy name = (char *)(lr + 1); 554*eda14cbcSMatt Macy link = name + strlen(name) + 1; 555*eda14cbcSMatt Macy error = zfs_symlink(dzp, name, &xva.xva_vattr, 556*eda14cbcSMatt Macy link, &zp, kcred, vflg); 557*eda14cbcSMatt Macy break; 558*eda14cbcSMatt Macy default: 559*eda14cbcSMatt Macy error = SET_ERROR(ENOTSUP); 560*eda14cbcSMatt Macy } 561*eda14cbcSMatt Macy 562*eda14cbcSMatt Macy out: 563*eda14cbcSMatt Macy if (error == 0 && zp != NULL) { 564*eda14cbcSMatt Macy #ifdef __FreeBSD__ 565*eda14cbcSMatt Macy VOP_UNLOCK1(ZTOV(zp)); 566*eda14cbcSMatt Macy #endif 567*eda14cbcSMatt Macy zrele(zp); 568*eda14cbcSMatt Macy } 569*eda14cbcSMatt Macy zrele(dzp); 570*eda14cbcSMatt Macy 571*eda14cbcSMatt Macy if (zfsvfs->z_fuid_replay) 572*eda14cbcSMatt Macy zfs_fuid_info_free(zfsvfs->z_fuid_replay); 573*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = NULL; 574*eda14cbcSMatt Macy return (error); 575*eda14cbcSMatt Macy } 576*eda14cbcSMatt Macy 577*eda14cbcSMatt Macy static int 578*eda14cbcSMatt Macy zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap) 579*eda14cbcSMatt Macy { 580*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 581*eda14cbcSMatt Macy lr_remove_t *lr = arg2; 582*eda14cbcSMatt Macy char *name = (char *)(lr + 1); /* name follows lr_remove_t */ 583*eda14cbcSMatt Macy znode_t *dzp; 584*eda14cbcSMatt Macy int error; 585*eda14cbcSMatt Macy int vflg = 0; 586*eda14cbcSMatt Macy 587*eda14cbcSMatt Macy if (byteswap) 588*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 589*eda14cbcSMatt Macy 590*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 591*eda14cbcSMatt Macy return (error); 592*eda14cbcSMatt Macy 593*eda14cbcSMatt Macy if (lr->lr_common.lrc_txtype & TX_CI) 594*eda14cbcSMatt Macy vflg |= FIGNORECASE; 595*eda14cbcSMatt Macy 596*eda14cbcSMatt Macy switch ((int)lr->lr_common.lrc_txtype) { 597*eda14cbcSMatt Macy case TX_REMOVE: 598*eda14cbcSMatt Macy error = zfs_remove(dzp, name, kcred, vflg); 599*eda14cbcSMatt Macy break; 600*eda14cbcSMatt Macy case TX_RMDIR: 601*eda14cbcSMatt Macy error = zfs_rmdir(dzp, name, NULL, kcred, vflg); 602*eda14cbcSMatt Macy break; 603*eda14cbcSMatt Macy default: 604*eda14cbcSMatt Macy error = SET_ERROR(ENOTSUP); 605*eda14cbcSMatt Macy } 606*eda14cbcSMatt Macy 607*eda14cbcSMatt Macy zrele(dzp); 608*eda14cbcSMatt Macy 609*eda14cbcSMatt Macy return (error); 610*eda14cbcSMatt Macy } 611*eda14cbcSMatt Macy 612*eda14cbcSMatt Macy static int 613*eda14cbcSMatt Macy zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) 614*eda14cbcSMatt Macy { 615*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 616*eda14cbcSMatt Macy lr_link_t *lr = arg2; 617*eda14cbcSMatt Macy char *name = (char *)(lr + 1); /* name follows lr_link_t */ 618*eda14cbcSMatt Macy znode_t *dzp, *zp; 619*eda14cbcSMatt Macy int error; 620*eda14cbcSMatt Macy int vflg = 0; 621*eda14cbcSMatt Macy 622*eda14cbcSMatt Macy if (byteswap) 623*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 624*eda14cbcSMatt Macy 625*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 626*eda14cbcSMatt Macy return (error); 627*eda14cbcSMatt Macy 628*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { 629*eda14cbcSMatt Macy zrele(dzp); 630*eda14cbcSMatt Macy return (error); 631*eda14cbcSMatt Macy } 632*eda14cbcSMatt Macy 633*eda14cbcSMatt Macy if (lr->lr_common.lrc_txtype & TX_CI) 634*eda14cbcSMatt Macy vflg |= FIGNORECASE; 635*eda14cbcSMatt Macy 636*eda14cbcSMatt Macy error = zfs_link(dzp, zp, name, kcred, vflg); 637*eda14cbcSMatt Macy zrele(zp); 638*eda14cbcSMatt Macy zrele(dzp); 639*eda14cbcSMatt Macy 640*eda14cbcSMatt Macy return (error); 641*eda14cbcSMatt Macy } 642*eda14cbcSMatt Macy 643*eda14cbcSMatt Macy static int 644*eda14cbcSMatt Macy zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) 645*eda14cbcSMatt Macy { 646*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 647*eda14cbcSMatt Macy lr_rename_t *lr = arg2; 648*eda14cbcSMatt Macy char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ 649*eda14cbcSMatt Macy char *tname = sname + strlen(sname) + 1; 650*eda14cbcSMatt Macy znode_t *sdzp, *tdzp; 651*eda14cbcSMatt Macy int error; 652*eda14cbcSMatt Macy int vflg = 0; 653*eda14cbcSMatt Macy 654*eda14cbcSMatt Macy if (byteswap) 655*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 656*eda14cbcSMatt Macy 657*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) 658*eda14cbcSMatt Macy return (error); 659*eda14cbcSMatt Macy 660*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { 661*eda14cbcSMatt Macy zrele(sdzp); 662*eda14cbcSMatt Macy return (error); 663*eda14cbcSMatt Macy } 664*eda14cbcSMatt Macy 665*eda14cbcSMatt Macy if (lr->lr_common.lrc_txtype & TX_CI) 666*eda14cbcSMatt Macy vflg |= FIGNORECASE; 667*eda14cbcSMatt Macy 668*eda14cbcSMatt Macy error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg); 669*eda14cbcSMatt Macy 670*eda14cbcSMatt Macy zrele(tdzp); 671*eda14cbcSMatt Macy zrele(sdzp); 672*eda14cbcSMatt Macy return (error); 673*eda14cbcSMatt Macy } 674*eda14cbcSMatt Macy 675*eda14cbcSMatt Macy static int 676*eda14cbcSMatt Macy zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) 677*eda14cbcSMatt Macy { 678*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 679*eda14cbcSMatt Macy lr_write_t *lr = arg2; 680*eda14cbcSMatt Macy char *data = (char *)(lr + 1); /* data follows lr_write_t */ 681*eda14cbcSMatt Macy znode_t *zp; 682*eda14cbcSMatt Macy int error; 683*eda14cbcSMatt Macy uint64_t eod, offset, length; 684*eda14cbcSMatt Macy 685*eda14cbcSMatt Macy if (byteswap) 686*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 687*eda14cbcSMatt Macy 688*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { 689*eda14cbcSMatt Macy /* 690*eda14cbcSMatt Macy * As we can log writes out of order, it's possible the 691*eda14cbcSMatt Macy * file has been removed. In this case just drop the write 692*eda14cbcSMatt Macy * and return success. 693*eda14cbcSMatt Macy */ 694*eda14cbcSMatt Macy if (error == ENOENT) 695*eda14cbcSMatt Macy error = 0; 696*eda14cbcSMatt Macy return (error); 697*eda14cbcSMatt Macy } 698*eda14cbcSMatt Macy 699*eda14cbcSMatt Macy offset = lr->lr_offset; 700*eda14cbcSMatt Macy length = lr->lr_length; 701*eda14cbcSMatt Macy eod = offset + length; /* end of data for this write */ 702*eda14cbcSMatt Macy 703*eda14cbcSMatt Macy /* 704*eda14cbcSMatt Macy * This may be a write from a dmu_sync() for a whole block, 705*eda14cbcSMatt Macy * and may extend beyond the current end of the file. 706*eda14cbcSMatt Macy * We can't just replay what was written for this TX_WRITE as 707*eda14cbcSMatt Macy * a future TX_WRITE2 may extend the eof and the data for that 708*eda14cbcSMatt Macy * write needs to be there. So we write the whole block and 709*eda14cbcSMatt Macy * reduce the eof. This needs to be done within the single dmu 710*eda14cbcSMatt Macy * transaction created within vn_rdwr -> zfs_write. So a possible 711*eda14cbcSMatt Macy * new end of file is passed through in zfsvfs->z_replay_eof 712*eda14cbcSMatt Macy */ 713*eda14cbcSMatt Macy 714*eda14cbcSMatt Macy zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */ 715*eda14cbcSMatt Macy 716*eda14cbcSMatt Macy /* If it's a dmu_sync() block, write the whole block */ 717*eda14cbcSMatt Macy if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 718*eda14cbcSMatt Macy uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 719*eda14cbcSMatt Macy if (length < blocksize) { 720*eda14cbcSMatt Macy offset -= offset % blocksize; 721*eda14cbcSMatt Macy length = blocksize; 722*eda14cbcSMatt Macy } 723*eda14cbcSMatt Macy if (zp->z_size < eod) 724*eda14cbcSMatt Macy zfsvfs->z_replay_eof = eod; 725*eda14cbcSMatt Macy } 726*eda14cbcSMatt Macy error = zfs_write_simple(zp, data, length, offset, NULL); 727*eda14cbcSMatt Macy zrele(zp); 728*eda14cbcSMatt Macy zfsvfs->z_replay_eof = 0; /* safety */ 729*eda14cbcSMatt Macy 730*eda14cbcSMatt Macy return (error); 731*eda14cbcSMatt Macy } 732*eda14cbcSMatt Macy 733*eda14cbcSMatt Macy /* 734*eda14cbcSMatt Macy * TX_WRITE2 are only generated when dmu_sync() returns EALREADY 735*eda14cbcSMatt Macy * meaning the pool block is already being synced. So now that we always write 736*eda14cbcSMatt Macy * out full blocks, all we have to do is expand the eof if 737*eda14cbcSMatt Macy * the file is grown. 738*eda14cbcSMatt Macy */ 739*eda14cbcSMatt Macy static int 740*eda14cbcSMatt Macy zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap) 741*eda14cbcSMatt Macy { 742*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 743*eda14cbcSMatt Macy lr_write_t *lr = arg2; 744*eda14cbcSMatt Macy znode_t *zp; 745*eda14cbcSMatt Macy int error; 746*eda14cbcSMatt Macy uint64_t end; 747*eda14cbcSMatt Macy 748*eda14cbcSMatt Macy if (byteswap) 749*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 750*eda14cbcSMatt Macy 751*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) 752*eda14cbcSMatt Macy return (error); 753*eda14cbcSMatt Macy 754*eda14cbcSMatt Macy top: 755*eda14cbcSMatt Macy end = lr->lr_offset + lr->lr_length; 756*eda14cbcSMatt Macy if (end > zp->z_size) { 757*eda14cbcSMatt Macy dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 758*eda14cbcSMatt Macy 759*eda14cbcSMatt Macy zp->z_size = end; 760*eda14cbcSMatt Macy dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 761*eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 762*eda14cbcSMatt Macy if (error) { 763*eda14cbcSMatt Macy zrele(zp); 764*eda14cbcSMatt Macy if (error == ERESTART) { 765*eda14cbcSMatt Macy dmu_tx_wait(tx); 766*eda14cbcSMatt Macy dmu_tx_abort(tx); 767*eda14cbcSMatt Macy goto top; 768*eda14cbcSMatt Macy } 769*eda14cbcSMatt Macy dmu_tx_abort(tx); 770*eda14cbcSMatt Macy return (error); 771*eda14cbcSMatt Macy } 772*eda14cbcSMatt Macy (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 773*eda14cbcSMatt Macy (void *)&zp->z_size, sizeof (uint64_t), tx); 774*eda14cbcSMatt Macy 775*eda14cbcSMatt Macy /* Ensure the replayed seq is updated */ 776*eda14cbcSMatt Macy (void) zil_replaying(zfsvfs->z_log, tx); 777*eda14cbcSMatt Macy 778*eda14cbcSMatt Macy dmu_tx_commit(tx); 779*eda14cbcSMatt Macy } 780*eda14cbcSMatt Macy 781*eda14cbcSMatt Macy zrele(zp); 782*eda14cbcSMatt Macy 783*eda14cbcSMatt Macy return (error); 784*eda14cbcSMatt Macy } 785*eda14cbcSMatt Macy 786*eda14cbcSMatt Macy static int 787*eda14cbcSMatt Macy zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) 788*eda14cbcSMatt Macy { 789*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 790*eda14cbcSMatt Macy lr_truncate_t *lr = arg2; 791*eda14cbcSMatt Macy znode_t *zp; 792*eda14cbcSMatt Macy flock64_t fl; 793*eda14cbcSMatt Macy int error; 794*eda14cbcSMatt Macy 795*eda14cbcSMatt Macy if (byteswap) 796*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 797*eda14cbcSMatt Macy 798*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) 799*eda14cbcSMatt Macy return (error); 800*eda14cbcSMatt Macy 801*eda14cbcSMatt Macy bzero(&fl, sizeof (fl)); 802*eda14cbcSMatt Macy fl.l_type = F_WRLCK; 803*eda14cbcSMatt Macy fl.l_whence = SEEK_SET; 804*eda14cbcSMatt Macy fl.l_start = lr->lr_offset; 805*eda14cbcSMatt Macy fl.l_len = lr->lr_length; 806*eda14cbcSMatt Macy 807*eda14cbcSMatt Macy error = zfs_space(zp, F_FREESP, &fl, O_RDWR | O_LARGEFILE, 808*eda14cbcSMatt Macy lr->lr_offset, kcred); 809*eda14cbcSMatt Macy 810*eda14cbcSMatt Macy zrele(zp); 811*eda14cbcSMatt Macy 812*eda14cbcSMatt Macy return (error); 813*eda14cbcSMatt Macy } 814*eda14cbcSMatt Macy 815*eda14cbcSMatt Macy static int 816*eda14cbcSMatt Macy zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) 817*eda14cbcSMatt Macy { 818*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 819*eda14cbcSMatt Macy lr_setattr_t *lr = arg2; 820*eda14cbcSMatt Macy znode_t *zp; 821*eda14cbcSMatt Macy xvattr_t xva; 822*eda14cbcSMatt Macy vattr_t *vap = &xva.xva_vattr; 823*eda14cbcSMatt Macy int error; 824*eda14cbcSMatt Macy void *start; 825*eda14cbcSMatt Macy 826*eda14cbcSMatt Macy xva_init(&xva); 827*eda14cbcSMatt Macy if (byteswap) { 828*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 829*eda14cbcSMatt Macy 830*eda14cbcSMatt Macy if ((lr->lr_mask & ATTR_XVATTR) && 831*eda14cbcSMatt Macy zfsvfs->z_version >= ZPL_VERSION_INITIAL) 832*eda14cbcSMatt Macy zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); 833*eda14cbcSMatt Macy } 834*eda14cbcSMatt Macy 835*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) 836*eda14cbcSMatt Macy return (error); 837*eda14cbcSMatt Macy 838*eda14cbcSMatt Macy zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, 839*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); 840*eda14cbcSMatt Macy 841*eda14cbcSMatt Macy vap->va_size = lr->lr_size; 842*eda14cbcSMatt Macy ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); 843*eda14cbcSMatt Macy ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); 844*eda14cbcSMatt Macy gethrestime(&vap->va_ctime); 845*eda14cbcSMatt Macy vap->va_mask |= ATTR_CTIME; 846*eda14cbcSMatt Macy 847*eda14cbcSMatt Macy /* 848*eda14cbcSMatt Macy * Fill in xvattr_t portions if necessary. 849*eda14cbcSMatt Macy */ 850*eda14cbcSMatt Macy 851*eda14cbcSMatt Macy start = (lr_setattr_t *)(lr + 1); 852*eda14cbcSMatt Macy if (vap->va_mask & ATTR_XVATTR) { 853*eda14cbcSMatt Macy zfs_replay_xvattr((lr_attr_t *)start, &xva); 854*eda14cbcSMatt Macy start = (caddr_t)start + 855*eda14cbcSMatt Macy ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); 856*eda14cbcSMatt Macy } else 857*eda14cbcSMatt Macy xva.xva_vattr.va_mask &= ~ATTR_XVATTR; 858*eda14cbcSMatt Macy 859*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, 860*eda14cbcSMatt Macy lr->lr_uid, lr->lr_gid); 861*eda14cbcSMatt Macy 862*eda14cbcSMatt Macy error = zfs_setattr(zp, vap, 0, kcred); 863*eda14cbcSMatt Macy 864*eda14cbcSMatt Macy zfs_fuid_info_free(zfsvfs->z_fuid_replay); 865*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = NULL; 866*eda14cbcSMatt Macy zrele(zp); 867*eda14cbcSMatt Macy 868*eda14cbcSMatt Macy return (error); 869*eda14cbcSMatt Macy } 870*eda14cbcSMatt Macy 871*eda14cbcSMatt Macy static int 872*eda14cbcSMatt Macy zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap) 873*eda14cbcSMatt Macy { 874*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 875*eda14cbcSMatt Macy lr_acl_v0_t *lr = arg2; 876*eda14cbcSMatt Macy ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ 877*eda14cbcSMatt Macy vsecattr_t vsa; 878*eda14cbcSMatt Macy znode_t *zp; 879*eda14cbcSMatt Macy int error; 880*eda14cbcSMatt Macy 881*eda14cbcSMatt Macy if (byteswap) { 882*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 883*eda14cbcSMatt Macy zfs_oldace_byteswap(ace, lr->lr_aclcnt); 884*eda14cbcSMatt Macy } 885*eda14cbcSMatt Macy 886*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) 887*eda14cbcSMatt Macy return (error); 888*eda14cbcSMatt Macy 889*eda14cbcSMatt Macy bzero(&vsa, sizeof (vsa)); 890*eda14cbcSMatt Macy vsa.vsa_mask = VSA_ACE | VSA_ACECNT; 891*eda14cbcSMatt Macy vsa.vsa_aclcnt = lr->lr_aclcnt; 892*eda14cbcSMatt Macy vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt; 893*eda14cbcSMatt Macy vsa.vsa_aclflags = 0; 894*eda14cbcSMatt Macy vsa.vsa_aclentp = ace; 895*eda14cbcSMatt Macy 896*eda14cbcSMatt Macy error = zfs_setsecattr(zp, &vsa, 0, kcred); 897*eda14cbcSMatt Macy 898*eda14cbcSMatt Macy zrele(zp); 899*eda14cbcSMatt Macy 900*eda14cbcSMatt Macy return (error); 901*eda14cbcSMatt Macy } 902*eda14cbcSMatt Macy 903*eda14cbcSMatt Macy /* 904*eda14cbcSMatt Macy * Replaying ACLs is complicated by FUID support. 905*eda14cbcSMatt Macy * The log record may contain some optional data 906*eda14cbcSMatt Macy * to be used for replaying FUID's. These pieces 907*eda14cbcSMatt Macy * are the actual FUIDs that were created initially. 908*eda14cbcSMatt Macy * The FUID table index may no longer be valid and 909*eda14cbcSMatt Macy * during zfs_create() a new index may be assigned. 910*eda14cbcSMatt Macy * Because of this the log will contain the original 911*eda14cbcSMatt Macy * domain+rid in order to create a new FUID. 912*eda14cbcSMatt Macy * 913*eda14cbcSMatt Macy * The individual ACEs may contain an ephemeral uid/gid which is no 914*eda14cbcSMatt Macy * longer valid and will need to be replaced with an actual FUID. 915*eda14cbcSMatt Macy * 916*eda14cbcSMatt Macy */ 917*eda14cbcSMatt Macy static int 918*eda14cbcSMatt Macy zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) 919*eda14cbcSMatt Macy { 920*eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg1; 921*eda14cbcSMatt Macy lr_acl_t *lr = arg2; 922*eda14cbcSMatt Macy ace_t *ace = (ace_t *)(lr + 1); 923*eda14cbcSMatt Macy vsecattr_t vsa; 924*eda14cbcSMatt Macy znode_t *zp; 925*eda14cbcSMatt Macy int error; 926*eda14cbcSMatt Macy 927*eda14cbcSMatt Macy if (byteswap) { 928*eda14cbcSMatt Macy byteswap_uint64_array(lr, sizeof (*lr)); 929*eda14cbcSMatt Macy zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); 930*eda14cbcSMatt Macy if (lr->lr_fuidcnt) { 931*eda14cbcSMatt Macy byteswap_uint64_array((caddr_t)ace + 932*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lr->lr_acl_bytes), 933*eda14cbcSMatt Macy lr->lr_fuidcnt * sizeof (uint64_t)); 934*eda14cbcSMatt Macy } 935*eda14cbcSMatt Macy } 936*eda14cbcSMatt Macy 937*eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) 938*eda14cbcSMatt Macy return (error); 939*eda14cbcSMatt Macy 940*eda14cbcSMatt Macy bzero(&vsa, sizeof (vsa)); 941*eda14cbcSMatt Macy vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; 942*eda14cbcSMatt Macy vsa.vsa_aclcnt = lr->lr_aclcnt; 943*eda14cbcSMatt Macy vsa.vsa_aclentp = ace; 944*eda14cbcSMatt Macy vsa.vsa_aclentsz = lr->lr_acl_bytes; 945*eda14cbcSMatt Macy vsa.vsa_aclflags = lr->lr_acl_flags; 946*eda14cbcSMatt Macy 947*eda14cbcSMatt Macy if (lr->lr_fuidcnt) { 948*eda14cbcSMatt Macy void *fuidstart = (caddr_t)ace + 949*eda14cbcSMatt Macy ZIL_ACE_LENGTH(lr->lr_acl_bytes); 950*eda14cbcSMatt Macy 951*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = 952*eda14cbcSMatt Macy zfs_replay_fuids(fuidstart, &fuidstart, 953*eda14cbcSMatt Macy lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); 954*eda14cbcSMatt Macy } 955*eda14cbcSMatt Macy 956*eda14cbcSMatt Macy error = zfs_setsecattr(zp, &vsa, 0, kcred); 957*eda14cbcSMatt Macy 958*eda14cbcSMatt Macy if (zfsvfs->z_fuid_replay) 959*eda14cbcSMatt Macy zfs_fuid_info_free(zfsvfs->z_fuid_replay); 960*eda14cbcSMatt Macy 961*eda14cbcSMatt Macy zfsvfs->z_fuid_replay = NULL; 962*eda14cbcSMatt Macy zrele(zp); 963*eda14cbcSMatt Macy 964*eda14cbcSMatt Macy return (error); 965*eda14cbcSMatt Macy } 966*eda14cbcSMatt Macy 967*eda14cbcSMatt Macy /* 968*eda14cbcSMatt Macy * Callback vectors for replaying records 969*eda14cbcSMatt Macy */ 970*eda14cbcSMatt Macy zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { 971*eda14cbcSMatt Macy zfs_replay_error, /* no such type */ 972*eda14cbcSMatt Macy zfs_replay_create, /* TX_CREATE */ 973*eda14cbcSMatt Macy zfs_replay_create, /* TX_MKDIR */ 974*eda14cbcSMatt Macy zfs_replay_create, /* TX_MKXATTR */ 975*eda14cbcSMatt Macy zfs_replay_create, /* TX_SYMLINK */ 976*eda14cbcSMatt Macy zfs_replay_remove, /* TX_REMOVE */ 977*eda14cbcSMatt Macy zfs_replay_remove, /* TX_RMDIR */ 978*eda14cbcSMatt Macy zfs_replay_link, /* TX_LINK */ 979*eda14cbcSMatt Macy zfs_replay_rename, /* TX_RENAME */ 980*eda14cbcSMatt Macy zfs_replay_write, /* TX_WRITE */ 981*eda14cbcSMatt Macy zfs_replay_truncate, /* TX_TRUNCATE */ 982*eda14cbcSMatt Macy zfs_replay_setattr, /* TX_SETATTR */ 983*eda14cbcSMatt Macy zfs_replay_acl_v0, /* TX_ACL_V0 */ 984*eda14cbcSMatt Macy zfs_replay_acl, /* TX_ACL */ 985*eda14cbcSMatt Macy zfs_replay_create_acl, /* TX_CREATE_ACL */ 986*eda14cbcSMatt Macy zfs_replay_create, /* TX_CREATE_ATTR */ 987*eda14cbcSMatt Macy zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */ 988*eda14cbcSMatt Macy zfs_replay_create_acl, /* TX_MKDIR_ACL */ 989*eda14cbcSMatt Macy zfs_replay_create, /* TX_MKDIR_ATTR */ 990*eda14cbcSMatt Macy zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ 991*eda14cbcSMatt Macy zfs_replay_write2, /* TX_WRITE2 */ 992*eda14cbcSMatt Macy }; 993