1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51460Smarks * Common Development and Distribution License (the "License"). 61460Smarks * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 228636SMark.Maybee@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 264144Speteh /* Portions Copyright 2007 Jeremy Teo */ 274144Speteh 28789Sahrens #include <sys/types.h> 29789Sahrens #include <sys/param.h> 30789Sahrens #include <sys/time.h> 31789Sahrens #include <sys/systm.h> 32789Sahrens #include <sys/sysmacros.h> 33789Sahrens #include <sys/resource.h> 34789Sahrens #include <sys/vfs.h> 353898Srsb #include <sys/vfs_opreg.h> 36789Sahrens #include <sys/vnode.h> 37789Sahrens #include <sys/file.h> 38789Sahrens #include <sys/stat.h> 39789Sahrens #include <sys/kmem.h> 40789Sahrens #include <sys/taskq.h> 41789Sahrens #include <sys/uio.h> 42789Sahrens #include <sys/vmsystm.h> 43789Sahrens #include <sys/atomic.h> 442688Smaybee #include <sys/vm.h> 45789Sahrens #include <vm/seg_vn.h> 46789Sahrens #include <vm/pvn.h> 47789Sahrens #include <vm/as.h> 487315SJonathan.Adams@Sun.COM #include <vm/kpm.h> 497315SJonathan.Adams@Sun.COM #include <vm/seg_kpm.h> 50789Sahrens #include <sys/mman.h> 51789Sahrens #include <sys/pathname.h> 52789Sahrens #include <sys/cmn_err.h> 53789Sahrens #include <sys/errno.h> 54789Sahrens #include <sys/unistd.h> 55789Sahrens #include <sys/zfs_dir.h> 56789Sahrens #include <sys/zfs_acl.h> 57789Sahrens #include <sys/zfs_ioctl.h> 58789Sahrens #include <sys/fs/zfs.h> 59789Sahrens #include <sys/dmu.h> 60789Sahrens #include <sys/spa.h> 61789Sahrens #include <sys/txg.h> 62789Sahrens #include <sys/dbuf.h> 63789Sahrens #include <sys/zap.h> 64789Sahrens #include <sys/dirent.h> 65789Sahrens #include <sys/policy.h> 66789Sahrens #include <sys/sunddi.h> 67789Sahrens #include <sys/filio.h> 687847SMark.Shellenbaum@Sun.COM #include <sys/sid.h> 69789Sahrens #include "fs/fs_subr.h" 70789Sahrens #include <sys/zfs_ctldir.h> 715331Samw #include <sys/zfs_fuid.h> 721484Sek110237 #include <sys/dnlc.h> 731669Sperrin #include <sys/zfs_rlock.h> 745331Samw #include <sys/extdirent.h> 755331Samw #include <sys/kidmap.h> 7611134SCasper.Dik@Sun.COM #include <sys/cred.h> 775663Sck153898 #include <sys/attr.h> 78789Sahrens 79789Sahrens /* 80789Sahrens * Programming rules. 81789Sahrens * 82789Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 83789Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 84789Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 855331Samw * and wait for the intent log to commit if it is a synchronous operation. 865331Samw * Moreover, the vnode ops must work in both normal and log replay context. 87789Sahrens * The ordering of events is important to avoid deadlocks and references 88789Sahrens * to freed memory. The example below illustrates the following Big Rules: 89789Sahrens * 90789Sahrens * (1) A check must be made in each zfs thread for a mounted file system. 915367Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 925367Sahrens * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 935367Sahrens * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 945367Sahrens * can return EIO from the calling function. 95789Sahrens * 96789Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 972638Sperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98789Sahrens * First, if it's the last reference, the vnode/znode 99789Sahrens * can be freed, so the zp may point to freed memory. Second, the last 100789Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 1011669Sperrin * pushing cached pages (which acquires range locks) and syncing out 102789Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 103789Sahrens * which could deadlock the system if you were already holding one. 1049321SNeil.Perrin@Sun.COM * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105789Sahrens * 1061757Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1071757Sperrin * as they can span dmu_tx_assign() calls. 1081757Sperrin * 1098227SNeil.Perrin@Sun.COM * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 110789Sahrens * This is critical because we don't want to block while holding locks. 111789Sahrens * Note, in particular, that if a lock is sometimes acquired before 112789Sahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing to 113789Sahrens * use a non-blocking assign can deadlock the system. The scenario: 114789Sahrens * 115789Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 116789Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 117789Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 118789Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 119789Sahrens * 120789Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 1212113Sahrens * then drop all locks, call dmu_tx_wait(), and try again. 122789Sahrens * 1231757Sperrin * (5) If the operation succeeded, generate the intent log entry for it 124789Sahrens * before dropping locks. This ensures that the ordering of events 125789Sahrens * in the intent log matches the order in which they actually occurred. 1268227SNeil.Perrin@Sun.COM * During ZIL replay the zfs_log_* functions will update the sequence 1278227SNeil.Perrin@Sun.COM * number to indicate the zil transaction has replayed. 128789Sahrens * 1291757Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 130789Sahrens * regardless of whether there were any errors. 131789Sahrens * 1322638Sperrin * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 133789Sahrens * to ensure that synchronous semantics are provided when necessary. 134789Sahrens * 135789Sahrens * In general, this is how things should be ordered in each vnode op: 136789Sahrens * 137789Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 138789Sahrens * top: 139789Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 140789Sahrens * rw_enter(...); // grab any other locks you need 141789Sahrens * tx = dmu_tx_create(...); // get DMU tx 142789Sahrens * dmu_tx_hold_*(); // hold each object you might modify 1438227SNeil.Perrin@Sun.COM * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 144789Sahrens * if (error) { 145789Sahrens * rw_exit(...); // drop locks 146789Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 147789Sahrens * VN_RELE(...); // release held vnodes 1488227SNeil.Perrin@Sun.COM * if (error == ERESTART) { 1492113Sahrens * dmu_tx_wait(tx); 1502113Sahrens * dmu_tx_abort(tx); 151789Sahrens * goto top; 152789Sahrens * } 1532113Sahrens * dmu_tx_abort(tx); // abort DMU tx 154789Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 155789Sahrens * return (error); // really out of space 156789Sahrens * } 157789Sahrens * error = do_real_work(); // do whatever this VOP does 158789Sahrens * if (error == 0) 1592638Sperrin * zfs_log_*(...); // on success, make ZIL entry 160789Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 161789Sahrens * rw_exit(...); // drop locks 162789Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 163789Sahrens * VN_RELE(...); // release held vnodes 1642638Sperrin * zil_commit(zilog, seq, foid); // synchronous when necessary 165789Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 166789Sahrens * return (error); // done, report error 167789Sahrens */ 1685367Sahrens 169789Sahrens /* ARGSUSED */ 170789Sahrens static int 1715331Samw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 172789Sahrens { 1733063Sperrin znode_t *zp = VTOZ(*vpp); 1747844SMark.Shellenbaum@Sun.COM zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1757844SMark.Shellenbaum@Sun.COM 1767844SMark.Shellenbaum@Sun.COM ZFS_ENTER(zfsvfs); 1777844SMark.Shellenbaum@Sun.COM ZFS_VERIFY_ZP(zp); 1783063Sperrin 1795331Samw if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 1805331Samw ((flag & FAPPEND) == 0)) { 1817844SMark.Shellenbaum@Sun.COM ZFS_EXIT(zfsvfs); 1825331Samw return (EPERM); 1835331Samw } 1845331Samw 1855331Samw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 1865331Samw ZTOV(zp)->v_type == VREG && 1875331Samw !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 1887844SMark.Shellenbaum@Sun.COM zp->z_phys->zp_size > 0) { 1897844SMark.Shellenbaum@Sun.COM if (fs_vscan(*vpp, cr, 0) != 0) { 1907844SMark.Shellenbaum@Sun.COM ZFS_EXIT(zfsvfs); 1915331Samw return (EACCES); 1927844SMark.Shellenbaum@Sun.COM } 1937844SMark.Shellenbaum@Sun.COM } 1945331Samw 1953063Sperrin /* Keep a count of the synchronous opens in the znode */ 1963063Sperrin if (flag & (FSYNC | FDSYNC)) 1973063Sperrin atomic_inc_32(&zp->z_sync_cnt); 1985331Samw 1997844SMark.Shellenbaum@Sun.COM ZFS_EXIT(zfsvfs); 200789Sahrens return (0); 201789Sahrens } 202789Sahrens 203789Sahrens /* ARGSUSED */ 204789Sahrens static int 2055331Samw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 2065331Samw caller_context_t *ct) 207789Sahrens { 2083063Sperrin znode_t *zp = VTOZ(vp); 2097844SMark.Shellenbaum@Sun.COM zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2107844SMark.Shellenbaum@Sun.COM 2119909Schris.kirby@sun.com /* 2129909Schris.kirby@sun.com * Clean up any locks held by this process on the vp. 2139909Schris.kirby@sun.com */ 2149909Schris.kirby@sun.com cleanlocks(vp, ddi_get_pid(), 0); 2159909Schris.kirby@sun.com cleanshares(vp, ddi_get_pid()); 2169909Schris.kirby@sun.com 2177844SMark.Shellenbaum@Sun.COM ZFS_ENTER(zfsvfs); 2187844SMark.Shellenbaum@Sun.COM ZFS_VERIFY_ZP(zp); 2193063Sperrin 2203063Sperrin /* Decrement the synchronous opens in the znode */ 2214339Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 2223063Sperrin atomic_dec_32(&zp->z_sync_cnt); 2233063Sperrin 2245331Samw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 2255331Samw ZTOV(zp)->v_type == VREG && 2265331Samw !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 2275331Samw zp->z_phys->zp_size > 0) 2285331Samw VERIFY(fs_vscan(vp, cr, 1) == 0); 2295331Samw 2307844SMark.Shellenbaum@Sun.COM ZFS_EXIT(zfsvfs); 231789Sahrens return (0); 232789Sahrens } 233789Sahrens 234789Sahrens /* 235789Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 236789Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 237789Sahrens */ 238789Sahrens static int 239789Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 240789Sahrens { 241789Sahrens znode_t *zp = VTOZ(vp); 242789Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 243789Sahrens uint64_t file_sz; 244789Sahrens int error; 245789Sahrens boolean_t hole; 246789Sahrens 247789Sahrens file_sz = zp->z_phys->zp_size; 248789Sahrens if (noff >= file_sz) { 249789Sahrens return (ENXIO); 250789Sahrens } 251789Sahrens 252789Sahrens if (cmd == _FIO_SEEK_HOLE) 253789Sahrens hole = B_TRUE; 254789Sahrens else 255789Sahrens hole = B_FALSE; 256789Sahrens 257789Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 258789Sahrens 259789Sahrens /* end of file? */ 260789Sahrens if ((error == ESRCH) || (noff > file_sz)) { 261789Sahrens /* 262789Sahrens * Handle the virtual hole at the end of file. 263789Sahrens */ 264789Sahrens if (hole) { 265789Sahrens *off = file_sz; 266789Sahrens return (0); 267789Sahrens } 268789Sahrens return (ENXIO); 269789Sahrens } 270789Sahrens 271789Sahrens if (noff < *off) 272789Sahrens return (error); 273789Sahrens *off = noff; 274789Sahrens return (error); 275789Sahrens } 276789Sahrens 277789Sahrens /* ARGSUSED */ 278789Sahrens static int 279789Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 2805331Samw int *rvalp, caller_context_t *ct) 281789Sahrens { 282789Sahrens offset_t off; 283789Sahrens int error; 284789Sahrens zfsvfs_t *zfsvfs; 2855326Sek110237 znode_t *zp; 286789Sahrens 287789Sahrens switch (com) { 2884339Sperrin case _FIOFFS: 289789Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 290789Sahrens 2911544Seschrock /* 2921544Seschrock * The following two ioctls are used by bfu. Faking out, 2931544Seschrock * necessary to avoid bfu errors. 2941544Seschrock */ 2954339Sperrin case _FIOGDIO: 2964339Sperrin case _FIOSDIO: 2971544Seschrock return (0); 2981544Seschrock 2994339Sperrin case _FIO_SEEK_DATA: 3004339Sperrin case _FIO_SEEK_HOLE: 301789Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 302789Sahrens return (EFAULT); 303789Sahrens 3045326Sek110237 zp = VTOZ(vp); 3055326Sek110237 zfsvfs = zp->z_zfsvfs; 3065367Sahrens ZFS_ENTER(zfsvfs); 3075367Sahrens ZFS_VERIFY_ZP(zp); 308789Sahrens 309789Sahrens /* offset parameter is in/out */ 310789Sahrens error = zfs_holey(vp, com, &off); 311789Sahrens ZFS_EXIT(zfsvfs); 312789Sahrens if (error) 313789Sahrens return (error); 314789Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 315789Sahrens return (EFAULT); 316789Sahrens return (0); 317789Sahrens } 318789Sahrens return (ENOTTY); 319789Sahrens } 320789Sahrens 321789Sahrens /* 3227315SJonathan.Adams@Sun.COM * Utility functions to map and unmap a single physical page. These 3237315SJonathan.Adams@Sun.COM * are used to manage the mappable copies of ZFS file data, and therefore 3247315SJonathan.Adams@Sun.COM * do not update ref/mod bits. 3257315SJonathan.Adams@Sun.COM */ 3267315SJonathan.Adams@Sun.COM caddr_t 3277315SJonathan.Adams@Sun.COM zfs_map_page(page_t *pp, enum seg_rw rw) 3287315SJonathan.Adams@Sun.COM { 3297315SJonathan.Adams@Sun.COM if (kpm_enable) 3307315SJonathan.Adams@Sun.COM return (hat_kpm_mapin(pp, 0)); 3317315SJonathan.Adams@Sun.COM ASSERT(rw == S_READ || rw == S_WRITE); 3327315SJonathan.Adams@Sun.COM return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 3337315SJonathan.Adams@Sun.COM (caddr_t)-1)); 3347315SJonathan.Adams@Sun.COM } 3357315SJonathan.Adams@Sun.COM 3367315SJonathan.Adams@Sun.COM void 3377315SJonathan.Adams@Sun.COM zfs_unmap_page(page_t *pp, caddr_t addr) 3387315SJonathan.Adams@Sun.COM { 3397315SJonathan.Adams@Sun.COM if (kpm_enable) { 3407315SJonathan.Adams@Sun.COM hat_kpm_mapout(pp, 0, addr); 3417315SJonathan.Adams@Sun.COM } else { 3427315SJonathan.Adams@Sun.COM ppmapout(addr); 3437315SJonathan.Adams@Sun.COM } 3447315SJonathan.Adams@Sun.COM } 3457315SJonathan.Adams@Sun.COM 3467315SJonathan.Adams@Sun.COM /* 347789Sahrens * When a file is memory mapped, we must keep the IO data synchronized 348789Sahrens * between the DMU cache and the memory mapped pages. What this means: 349789Sahrens * 350789Sahrens * On Write: If we find a memory mapped page, we write to *both* 351789Sahrens * the page and the dmu buffer. 352789Sahrens */ 3538636SMark.Maybee@Sun.COM static void 3548636SMark.Maybee@Sun.COM update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 355789Sahrens { 3568636SMark.Maybee@Sun.COM int64_t off; 3578636SMark.Maybee@Sun.COM 358789Sahrens off = start & PAGEOFFSET; 359789Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 360789Sahrens page_t *pp; 3618636SMark.Maybee@Sun.COM uint64_t nbytes = MIN(PAGESIZE - off, len); 3628636SMark.Maybee@Sun.COM 363789Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 364789Sahrens caddr_t va; 365789Sahrens 3667315SJonathan.Adams@Sun.COM va = zfs_map_page(pp, S_WRITE); 3679512SNeil.Perrin@Sun.COM (void) dmu_read(os, oid, start+off, nbytes, va+off, 3689512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 3697315SJonathan.Adams@Sun.COM zfs_unmap_page(pp, va); 370789Sahrens page_unlock(pp); 371789Sahrens } 3728636SMark.Maybee@Sun.COM len -= nbytes; 373789Sahrens off = 0; 374789Sahrens } 375789Sahrens } 376789Sahrens 377789Sahrens /* 378789Sahrens * When a file is memory mapped, we must keep the IO data synchronized 379789Sahrens * between the DMU cache and the memory mapped pages. What this means: 380789Sahrens * 381789Sahrens * On Read: We "read" preferentially from memory mapped pages, 382789Sahrens * else we default from the dmu buffer. 383789Sahrens * 384789Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 385789Sahrens * the file is memory mapped. 386789Sahrens */ 387789Sahrens static int 3883638Sbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 389789Sahrens { 3903638Sbillm znode_t *zp = VTOZ(vp); 3913638Sbillm objset_t *os = zp->z_zfsvfs->z_os; 3923638Sbillm int64_t start, off; 393789Sahrens int len = nbytes; 394789Sahrens int error = 0; 395789Sahrens 396789Sahrens start = uio->uio_loffset; 397789Sahrens off = start & PAGEOFFSET; 398789Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 399789Sahrens page_t *pp; 4003638Sbillm uint64_t bytes = MIN(PAGESIZE - off, len); 4013638Sbillm 402789Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 403789Sahrens caddr_t va; 404789Sahrens 4057315SJonathan.Adams@Sun.COM va = zfs_map_page(pp, S_READ); 406789Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4077315SJonathan.Adams@Sun.COM zfs_unmap_page(pp, va); 408789Sahrens page_unlock(pp); 409789Sahrens } else { 4103638Sbillm error = dmu_read_uio(os, zp->z_id, uio, bytes); 411789Sahrens } 412789Sahrens len -= bytes; 413789Sahrens off = 0; 414789Sahrens if (error) 415789Sahrens break; 416789Sahrens } 417789Sahrens return (error); 418789Sahrens } 419789Sahrens 4203638Sbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 421789Sahrens 422789Sahrens /* 423789Sahrens * Read bytes from specified file into supplied buffer. 424789Sahrens * 425789Sahrens * IN: vp - vnode of file to be read from. 426789Sahrens * uio - structure supplying read location, range info, 427789Sahrens * and return buffer. 428789Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 429789Sahrens * cr - credentials of caller. 4305331Samw * ct - caller context 431789Sahrens * 432789Sahrens * OUT: uio - updated offset and range, buffer filled. 433789Sahrens * 434789Sahrens * RETURN: 0 if success 435789Sahrens * error code if failure 436789Sahrens * 437789Sahrens * Side Effects: 438789Sahrens * vp - atime updated if byte count > 0 439789Sahrens */ 440789Sahrens /* ARGSUSED */ 441789Sahrens static int 442789Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 443789Sahrens { 444789Sahrens znode_t *zp = VTOZ(vp); 445789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4465326Sek110237 objset_t *os; 4473638Sbillm ssize_t n, nbytes; 4483638Sbillm int error; 4491669Sperrin rl_t *rl; 450789Sahrens 4515367Sahrens ZFS_ENTER(zfsvfs); 4525367Sahrens ZFS_VERIFY_ZP(zp); 4535326Sek110237 os = zfsvfs->z_os; 454789Sahrens 4555929Smarks if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 4565929Smarks ZFS_EXIT(zfsvfs); 4575929Smarks return (EACCES); 4585929Smarks } 4595929Smarks 460789Sahrens /* 461789Sahrens * Validate file offset 462789Sahrens */ 463789Sahrens if (uio->uio_loffset < (offset_t)0) { 464789Sahrens ZFS_EXIT(zfsvfs); 465789Sahrens return (EINVAL); 466789Sahrens } 467789Sahrens 468789Sahrens /* 469789Sahrens * Fasttrack empty reads 470789Sahrens */ 471789Sahrens if (uio->uio_resid == 0) { 472789Sahrens ZFS_EXIT(zfsvfs); 473789Sahrens return (0); 474789Sahrens } 475789Sahrens 476789Sahrens /* 4771669Sperrin * Check for mandatory locks 478789Sahrens */ 479789Sahrens if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 480789Sahrens if (error = chklock(vp, FREAD, 481789Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 482789Sahrens ZFS_EXIT(zfsvfs); 483789Sahrens return (error); 484789Sahrens } 485789Sahrens } 486789Sahrens 487789Sahrens /* 488789Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 489789Sahrens */ 4902638Sperrin if (ioflag & FRSYNC) 4912638Sperrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 492789Sahrens 493789Sahrens /* 4941669Sperrin * Lock the range against changes. 495789Sahrens */ 4961669Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 4971669Sperrin 498789Sahrens /* 499789Sahrens * If we are reading past end-of-file we can skip 500789Sahrens * to the end; but we might still need to set atime. 501789Sahrens */ 502789Sahrens if (uio->uio_loffset >= zp->z_phys->zp_size) { 503789Sahrens error = 0; 504789Sahrens goto out; 505789Sahrens } 506789Sahrens 5073638Sbillm ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 5083638Sbillm n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 5093638Sbillm 5103638Sbillm while (n > 0) { 5113638Sbillm nbytes = MIN(n, zfs_read_chunk_size - 5123638Sbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 5133638Sbillm 5143638Sbillm if (vn_has_cached_data(vp)) 5153638Sbillm error = mappedread(vp, nbytes, uio); 5163638Sbillm else 5173638Sbillm error = dmu_read_uio(os, zp->z_id, uio, nbytes); 5187294Sperrin if (error) { 5197294Sperrin /* convert checksum errors into IO errors */ 5207294Sperrin if (error == ECKSUM) 5217294Sperrin error = EIO; 5223638Sbillm break; 5237294Sperrin } 5243638Sbillm 5253638Sbillm n -= nbytes; 526789Sahrens } 5273638Sbillm 528789Sahrens out: 5292237Smaybee zfs_range_unlock(rl); 530789Sahrens 531789Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 532789Sahrens ZFS_EXIT(zfsvfs); 533789Sahrens return (error); 534789Sahrens } 535789Sahrens 536789Sahrens /* 537789Sahrens * Write the bytes to a file. 538789Sahrens * 539789Sahrens * IN: vp - vnode of file to be written to. 540789Sahrens * uio - structure supplying write location, range info, 541789Sahrens * and data buffer. 542789Sahrens * ioflag - FAPPEND flag set if in append mode. 543789Sahrens * cr - credentials of caller. 5445331Samw * ct - caller context (NFS/CIFS fem monitor only) 545789Sahrens * 546789Sahrens * OUT: uio - updated offset and range. 547789Sahrens * 548789Sahrens * RETURN: 0 if success 549789Sahrens * error code if failure 550789Sahrens * 551789Sahrens * Timestamps: 552789Sahrens * vp - ctime|mtime updated if byte count > 0 553789Sahrens */ 554789Sahrens /* ARGSUSED */ 555789Sahrens static int 556789Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 557789Sahrens { 558789Sahrens znode_t *zp = VTOZ(vp); 559789Sahrens rlim64_t limit = uio->uio_llimit; 560789Sahrens ssize_t start_resid = uio->uio_resid; 561789Sahrens ssize_t tx_bytes; 562789Sahrens uint64_t end_size; 563789Sahrens dmu_tx_t *tx; 564789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5655326Sek110237 zilog_t *zilog; 566789Sahrens offset_t woff; 567789Sahrens ssize_t n, nbytes; 5681669Sperrin rl_t *rl; 569789Sahrens int max_blksz = zfsvfs->z_max_blksz; 5706743Smarks uint64_t pflags; 5711669Sperrin int error; 5729412SAleksandr.Guzovskiy@Sun.COM arc_buf_t *abuf; 573789Sahrens 574789Sahrens /* 575789Sahrens * Fasttrack empty write 576789Sahrens */ 5771669Sperrin n = start_resid; 578789Sahrens if (n == 0) 579789Sahrens return (0); 580789Sahrens 5811669Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 5821669Sperrin limit = MAXOFFSET_T; 5831669Sperrin 5845367Sahrens ZFS_ENTER(zfsvfs); 5855367Sahrens ZFS_VERIFY_ZP(zp); 5866743Smarks 5876743Smarks /* 5886743Smarks * If immutable or not appending then return EPERM 5896743Smarks */ 5906743Smarks pflags = zp->z_phys->zp_flags; 5916743Smarks if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 5926743Smarks ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 5936743Smarks (uio->uio_loffset < zp->z_phys->zp_size))) { 5946743Smarks ZFS_EXIT(zfsvfs); 5956743Smarks return (EPERM); 5966743Smarks } 5976743Smarks 5985326Sek110237 zilog = zfsvfs->z_log; 599789Sahrens 600789Sahrens /* 60111083Swilliam.gorrell@sun.com * Validate file offset 60211083Swilliam.gorrell@sun.com */ 60311083Swilliam.gorrell@sun.com woff = ioflag & FAPPEND ? zp->z_phys->zp_size : uio->uio_loffset; 60411083Swilliam.gorrell@sun.com if (woff < 0) { 60511083Swilliam.gorrell@sun.com ZFS_EXIT(zfsvfs); 60611083Swilliam.gorrell@sun.com return (EINVAL); 60711083Swilliam.gorrell@sun.com } 60811083Swilliam.gorrell@sun.com 60911083Swilliam.gorrell@sun.com /* 61011083Swilliam.gorrell@sun.com * Check for mandatory locks before calling zfs_range_lock() 61111083Swilliam.gorrell@sun.com * in order to prevent a deadlock with locks set via fcntl(). 61211083Swilliam.gorrell@sun.com */ 61311083Swilliam.gorrell@sun.com if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 61411083Swilliam.gorrell@sun.com (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 61511083Swilliam.gorrell@sun.com ZFS_EXIT(zfsvfs); 61611083Swilliam.gorrell@sun.com return (error); 61711083Swilliam.gorrell@sun.com } 61811083Swilliam.gorrell@sun.com 61911083Swilliam.gorrell@sun.com /* 6202237Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 6211669Sperrin * don't hold up txg. 622789Sahrens */ 6238059SDonghai.Qiao@Sun.COM uio_prefaultpages(n, uio); 624789Sahrens 625789Sahrens /* 626789Sahrens * If in append mode, set the io offset pointer to eof. 627789Sahrens */ 6281669Sperrin if (ioflag & FAPPEND) { 6291669Sperrin /* 63011083Swilliam.gorrell@sun.com * Obtain an appending range lock to guarantee file append 63111083Swilliam.gorrell@sun.com * semantics. We reset the write offset once we have the lock. 6321669Sperrin */ 6331669Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 63411083Swilliam.gorrell@sun.com woff = rl->r_off; 6351669Sperrin if (rl->r_len == UINT64_MAX) { 63611083Swilliam.gorrell@sun.com /* 63711083Swilliam.gorrell@sun.com * We overlocked the file because this write will cause 63811083Swilliam.gorrell@sun.com * the file block size to increase. 63911083Swilliam.gorrell@sun.com * Note that zp_size cannot change with this lock held. 64011083Swilliam.gorrell@sun.com */ 64111083Swilliam.gorrell@sun.com woff = zp->z_phys->zp_size; 6421669Sperrin } 64311083Swilliam.gorrell@sun.com uio->uio_loffset = woff; 644789Sahrens } else { 645789Sahrens /* 64611083Swilliam.gorrell@sun.com * Note that if the file block size will change as a result of 64711083Swilliam.gorrell@sun.com * this write, then this range lock will lock the entire file 64811083Swilliam.gorrell@sun.com * so that we can re-write the block safely. 649789Sahrens */ 6501669Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 651789Sahrens } 652789Sahrens 653789Sahrens if (woff >= limit) { 6543638Sbillm zfs_range_unlock(rl); 6553638Sbillm ZFS_EXIT(zfsvfs); 6563638Sbillm return (EFBIG); 657789Sahrens } 658789Sahrens 659789Sahrens if ((woff + n) > limit || woff > (limit - n)) 660789Sahrens n = limit - woff; 661789Sahrens 6621669Sperrin end_size = MAX(zp->z_phys->zp_size, woff + n); 663789Sahrens 6641669Sperrin /* 6653638Sbillm * Write the file in reasonable size chunks. Each chunk is written 6663638Sbillm * in a separate transaction; this keeps the intent log records small 6673638Sbillm * and allows us to do more fine-grained space accounting. 668789Sahrens */ 669789Sahrens while (n > 0) { 6709412SAleksandr.Guzovskiy@Sun.COM abuf = NULL; 6719412SAleksandr.Guzovskiy@Sun.COM woff = uio->uio_loffset; 6729412SAleksandr.Guzovskiy@Sun.COM 6739412SAleksandr.Guzovskiy@Sun.COM again: 6749396SMatthew.Ahrens@Sun.COM if (zfs_usergroup_overquota(zfsvfs, 6759396SMatthew.Ahrens@Sun.COM B_FALSE, zp->z_phys->zp_uid) || 6769396SMatthew.Ahrens@Sun.COM zfs_usergroup_overquota(zfsvfs, 6779396SMatthew.Ahrens@Sun.COM B_TRUE, zp->z_phys->zp_gid)) { 6789412SAleksandr.Guzovskiy@Sun.COM if (abuf != NULL) 6799412SAleksandr.Guzovskiy@Sun.COM dmu_return_arcbuf(abuf); 6809396SMatthew.Ahrens@Sun.COM error = EDQUOT; 6819396SMatthew.Ahrens@Sun.COM break; 6829396SMatthew.Ahrens@Sun.COM } 6839412SAleksandr.Guzovskiy@Sun.COM 6849412SAleksandr.Guzovskiy@Sun.COM /* 6859412SAleksandr.Guzovskiy@Sun.COM * If dmu_assign_arcbuf() is expected to execute with minimum 6869412SAleksandr.Guzovskiy@Sun.COM * overhead loan an arc buffer and copy user data to it before 6879412SAleksandr.Guzovskiy@Sun.COM * we enter a txg. This avoids holding a txg forever while we 6889412SAleksandr.Guzovskiy@Sun.COM * pagefault on a hanging NFS server mapping. 6899412SAleksandr.Guzovskiy@Sun.COM */ 6909412SAleksandr.Guzovskiy@Sun.COM if (abuf == NULL && n >= max_blksz && 6919412SAleksandr.Guzovskiy@Sun.COM woff >= zp->z_phys->zp_size && 6929412SAleksandr.Guzovskiy@Sun.COM P2PHASE(woff, max_blksz) == 0 && 6939412SAleksandr.Guzovskiy@Sun.COM zp->z_blksz == max_blksz) { 6949412SAleksandr.Guzovskiy@Sun.COM size_t cbytes; 6959412SAleksandr.Guzovskiy@Sun.COM 6969412SAleksandr.Guzovskiy@Sun.COM abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); 6979412SAleksandr.Guzovskiy@Sun.COM ASSERT(abuf != NULL); 6989412SAleksandr.Guzovskiy@Sun.COM ASSERT(arc_buf_size(abuf) == max_blksz); 6999412SAleksandr.Guzovskiy@Sun.COM if (error = uiocopy(abuf->b_data, max_blksz, 7009412SAleksandr.Guzovskiy@Sun.COM UIO_WRITE, uio, &cbytes)) { 7019412SAleksandr.Guzovskiy@Sun.COM dmu_return_arcbuf(abuf); 7029412SAleksandr.Guzovskiy@Sun.COM break; 7039412SAleksandr.Guzovskiy@Sun.COM } 7049412SAleksandr.Guzovskiy@Sun.COM ASSERT(cbytes == max_blksz); 7059412SAleksandr.Guzovskiy@Sun.COM } 7069412SAleksandr.Guzovskiy@Sun.COM 7079412SAleksandr.Guzovskiy@Sun.COM /* 7089412SAleksandr.Guzovskiy@Sun.COM * Start a transaction. 7099412SAleksandr.Guzovskiy@Sun.COM */ 710789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 711789Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 712789Sahrens dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 7138227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 714789Sahrens if (error) { 7158227SNeil.Perrin@Sun.COM if (error == ERESTART) { 7162113Sahrens dmu_tx_wait(tx); 7172113Sahrens dmu_tx_abort(tx); 7189412SAleksandr.Guzovskiy@Sun.COM goto again; 719789Sahrens } 7202113Sahrens dmu_tx_abort(tx); 7219412SAleksandr.Guzovskiy@Sun.COM if (abuf != NULL) 7229412SAleksandr.Guzovskiy@Sun.COM dmu_return_arcbuf(abuf); 7233638Sbillm break; 7243638Sbillm } 7253638Sbillm 7263638Sbillm /* 7273638Sbillm * If zfs_range_lock() over-locked we grow the blocksize 7283638Sbillm * and then reduce the lock range. This will only happen 7293638Sbillm * on the first iteration since zfs_range_reduce() will 7303638Sbillm * shrink down r_len to the appropriate size. 7313638Sbillm */ 7323638Sbillm if (rl->r_len == UINT64_MAX) { 7333638Sbillm uint64_t new_blksz; 7343638Sbillm 7353638Sbillm if (zp->z_blksz > max_blksz) { 7363638Sbillm ASSERT(!ISP2(zp->z_blksz)); 7373638Sbillm new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 7383638Sbillm } else { 7393638Sbillm new_blksz = MIN(end_size, max_blksz); 7403638Sbillm } 7413638Sbillm zfs_grow_blocksize(zp, new_blksz, tx); 7423638Sbillm zfs_range_reduce(rl, woff, n); 7433638Sbillm } 7443638Sbillm 7453638Sbillm /* 7463638Sbillm * XXX - should we really limit each write to z_max_blksz? 7473638Sbillm * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 7483638Sbillm */ 7493638Sbillm nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 7503638Sbillm 7519412SAleksandr.Guzovskiy@Sun.COM if (abuf == NULL) { 7529412SAleksandr.Guzovskiy@Sun.COM tx_bytes = uio->uio_resid; 7539412SAleksandr.Guzovskiy@Sun.COM error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, 7549412SAleksandr.Guzovskiy@Sun.COM nbytes, tx); 7559412SAleksandr.Guzovskiy@Sun.COM tx_bytes -= uio->uio_resid; 7569412SAleksandr.Guzovskiy@Sun.COM } else { 7579412SAleksandr.Guzovskiy@Sun.COM tx_bytes = nbytes; 7589412SAleksandr.Guzovskiy@Sun.COM ASSERT(tx_bytes == max_blksz); 7599412SAleksandr.Guzovskiy@Sun.COM dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); 7609412SAleksandr.Guzovskiy@Sun.COM ASSERT(tx_bytes <= uio->uio_resid); 7619412SAleksandr.Guzovskiy@Sun.COM uioskip(uio, tx_bytes); 7629412SAleksandr.Guzovskiy@Sun.COM } 7639412SAleksandr.Guzovskiy@Sun.COM if (tx_bytes && vn_has_cached_data(vp)) { 7648636SMark.Maybee@Sun.COM update_pages(vp, woff, 7658636SMark.Maybee@Sun.COM tx_bytes, zfsvfs->z_os, zp->z_id); 7669412SAleksandr.Guzovskiy@Sun.COM } 7673638Sbillm 7683638Sbillm /* 7693638Sbillm * If we made no progress, we're done. If we made even 7703638Sbillm * partial progress, update the znode and ZIL accordingly. 7713638Sbillm */ 7723638Sbillm if (tx_bytes == 0) { 7733897Smaybee dmu_tx_commit(tx); 7743638Sbillm ASSERT(error != 0); 7753638Sbillm break; 7763638Sbillm } 7773638Sbillm 778789Sahrens /* 7793638Sbillm * Clear Set-UID/Set-GID bits on successful write if not 7803638Sbillm * privileged and at least one of the excute bits is set. 7813638Sbillm * 7823638Sbillm * It would be nice to to this after all writes have 7833638Sbillm * been done, but that would still expose the ISUID/ISGID 7843638Sbillm * to another app after the partial write is committed. 7855331Samw * 7865331Samw * Note: we don't call zfs_fuid_map_id() here because 7875331Samw * user 0 is not an ephemeral uid. 788789Sahrens */ 7893638Sbillm mutex_enter(&zp->z_acl_lock); 7903638Sbillm if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 7913638Sbillm (S_IXUSR >> 6))) != 0 && 7923638Sbillm (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 7933638Sbillm secpolicy_vnode_setid_retain(cr, 7943638Sbillm (zp->z_phys->zp_mode & S_ISUID) != 0 && 7953638Sbillm zp->z_phys->zp_uid == 0) != 0) { 7964339Sperrin zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 7973638Sbillm } 7983638Sbillm mutex_exit(&zp->z_acl_lock); 7993638Sbillm 8003638Sbillm /* 8013638Sbillm * Update time stamp. NOTE: This marks the bonus buffer as 8023638Sbillm * dirty, so we don't have to do it again for zp_size. 8033638Sbillm */ 8043638Sbillm zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 8053638Sbillm 8063638Sbillm /* 8073638Sbillm * Update the file size (zp_size) if it has changed; 8083638Sbillm * account for possible concurrent updates. 8093638Sbillm */ 8103638Sbillm while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 811789Sahrens (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 812789Sahrens uio->uio_loffset); 8133638Sbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 8143638Sbillm dmu_tx_commit(tx); 8153638Sbillm 8163638Sbillm if (error != 0) 8173638Sbillm break; 8183638Sbillm ASSERT(tx_bytes == nbytes); 8193638Sbillm n -= nbytes; 820789Sahrens } 821789Sahrens 8222237Smaybee zfs_range_unlock(rl); 823789Sahrens 824789Sahrens /* 825789Sahrens * If we're in replay mode, or we made no progress, return error. 826789Sahrens * Otherwise, it's at least a partial write, so it's successful. 827789Sahrens */ 8288227SNeil.Perrin@Sun.COM if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 829789Sahrens ZFS_EXIT(zfsvfs); 830789Sahrens return (error); 831789Sahrens } 832789Sahrens 8332638Sperrin if (ioflag & (FSYNC | FDSYNC)) 8342638Sperrin zil_commit(zilog, zp->z_last_itx, zp->z_id); 835789Sahrens 836789Sahrens ZFS_EXIT(zfsvfs); 837789Sahrens return (0); 838789Sahrens } 839789Sahrens 8402237Smaybee void 84110922SJeff.Bonwick@Sun.COM zfs_get_done(zgd_t *zgd, int error) 8422237Smaybee { 84310922SJeff.Bonwick@Sun.COM znode_t *zp = zgd->zgd_private; 84410922SJeff.Bonwick@Sun.COM objset_t *os = zp->z_zfsvfs->z_os; 84510922SJeff.Bonwick@Sun.COM 84610922SJeff.Bonwick@Sun.COM if (zgd->zgd_db) 84710922SJeff.Bonwick@Sun.COM dmu_buf_rele(zgd->zgd_db, zgd); 84810922SJeff.Bonwick@Sun.COM 84910922SJeff.Bonwick@Sun.COM zfs_range_unlock(zgd->zgd_rl); 85010922SJeff.Bonwick@Sun.COM 8519321SNeil.Perrin@Sun.COM /* 8529321SNeil.Perrin@Sun.COM * Release the vnode asynchronously as we currently have the 8539321SNeil.Perrin@Sun.COM * txg stopped from syncing. 8549321SNeil.Perrin@Sun.COM */ 85510922SJeff.Bonwick@Sun.COM VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 85610922SJeff.Bonwick@Sun.COM 85710922SJeff.Bonwick@Sun.COM if (error == 0 && zgd->zgd_bp) 85810922SJeff.Bonwick@Sun.COM zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 85910922SJeff.Bonwick@Sun.COM 8603063Sperrin kmem_free(zgd, sizeof (zgd_t)); 8612237Smaybee } 8622237Smaybee 86310209SMark.Musante@Sun.COM #ifdef DEBUG 86410209SMark.Musante@Sun.COM static int zil_fault_io = 0; 86510209SMark.Musante@Sun.COM #endif 86610209SMark.Musante@Sun.COM 867789Sahrens /* 868789Sahrens * Get data to generate a TX_WRITE intent log record. 869789Sahrens */ 870789Sahrens int 8712237Smaybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 872789Sahrens { 873789Sahrens zfsvfs_t *zfsvfs = arg; 874789Sahrens objset_t *os = zfsvfs->z_os; 875789Sahrens znode_t *zp; 87610922SJeff.Bonwick@Sun.COM uint64_t object = lr->lr_foid; 87710922SJeff.Bonwick@Sun.COM uint64_t offset = lr->lr_offset; 87810922SJeff.Bonwick@Sun.COM uint64_t size = lr->lr_length; 87910922SJeff.Bonwick@Sun.COM blkptr_t *bp = &lr->lr_blkptr; 8802237Smaybee dmu_buf_t *db; 8813063Sperrin zgd_t *zgd; 882789Sahrens int error = 0; 883789Sahrens 88410922SJeff.Bonwick@Sun.COM ASSERT(zio != NULL); 88510922SJeff.Bonwick@Sun.COM ASSERT(size != 0); 886789Sahrens 887789Sahrens /* 8881669Sperrin * Nothing to do if the file has been removed 889789Sahrens */ 89010922SJeff.Bonwick@Sun.COM if (zfs_zget(zfsvfs, object, &zp) != 0) 891789Sahrens return (ENOENT); 8923461Sahrens if (zp->z_unlinked) { 8939321SNeil.Perrin@Sun.COM /* 8949321SNeil.Perrin@Sun.COM * Release the vnode asynchronously as we currently have the 8959321SNeil.Perrin@Sun.COM * txg stopped from syncing. 8969321SNeil.Perrin@Sun.COM */ 8979321SNeil.Perrin@Sun.COM VN_RELE_ASYNC(ZTOV(zp), 8989321SNeil.Perrin@Sun.COM dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 899789Sahrens return (ENOENT); 900789Sahrens } 901789Sahrens 90210922SJeff.Bonwick@Sun.COM zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 90310922SJeff.Bonwick@Sun.COM zgd->zgd_zilog = zfsvfs->z_log; 90410922SJeff.Bonwick@Sun.COM zgd->zgd_private = zp; 90510922SJeff.Bonwick@Sun.COM 906789Sahrens /* 907789Sahrens * Write records come in two flavors: immediate and indirect. 908789Sahrens * For small writes it's cheaper to store the data with the 909789Sahrens * log record (immediate); for large writes it's cheaper to 910789Sahrens * sync the data and get a pointer to it (indirect) so that 911789Sahrens * we don't have to write the data twice. 912789Sahrens */ 9131669Sperrin if (buf != NULL) { /* immediate write */ 91410922SJeff.Bonwick@Sun.COM zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 9151669Sperrin /* test for truncation needs to be done while range locked */ 91610922SJeff.Bonwick@Sun.COM if (offset >= zp->z_phys->zp_size) { 9171669Sperrin error = ENOENT; 91810922SJeff.Bonwick@Sun.COM } else { 91910922SJeff.Bonwick@Sun.COM error = dmu_read(os, object, offset, size, buf, 92010922SJeff.Bonwick@Sun.COM DMU_READ_NO_PREFETCH); 9211669Sperrin } 92210922SJeff.Bonwick@Sun.COM ASSERT(error == 0 || error == ENOENT); 9231669Sperrin } else { /* indirect write */ 924789Sahrens /* 9251669Sperrin * Have to lock the whole block to ensure when it's 9261669Sperrin * written out and it's checksum is being calculated 9271669Sperrin * that no one can change the data. We need to re-check 9281669Sperrin * blocksize after we get the lock in case it's changed! 929789Sahrens */ 9301669Sperrin for (;;) { 93110922SJeff.Bonwick@Sun.COM uint64_t blkoff; 93210922SJeff.Bonwick@Sun.COM size = zp->z_blksz; 93310945SJeff.Bonwick@Sun.COM blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 93410922SJeff.Bonwick@Sun.COM offset -= blkoff; 93510922SJeff.Bonwick@Sun.COM zgd->zgd_rl = zfs_range_lock(zp, offset, size, 93610922SJeff.Bonwick@Sun.COM RL_READER); 93710922SJeff.Bonwick@Sun.COM if (zp->z_blksz == size) 9381669Sperrin break; 93910922SJeff.Bonwick@Sun.COM offset += blkoff; 94010922SJeff.Bonwick@Sun.COM zfs_range_unlock(zgd->zgd_rl); 9411669Sperrin } 9421669Sperrin /* test for truncation needs to be done while range locked */ 94310945SJeff.Bonwick@Sun.COM if (lr->lr_offset >= zp->z_phys->zp_size) 9441669Sperrin error = ENOENT; 94510209SMark.Musante@Sun.COM #ifdef DEBUG 94610209SMark.Musante@Sun.COM if (zil_fault_io) { 94710209SMark.Musante@Sun.COM error = EIO; 94810209SMark.Musante@Sun.COM zil_fault_io = 0; 94910209SMark.Musante@Sun.COM } 95010209SMark.Musante@Sun.COM #endif 95110922SJeff.Bonwick@Sun.COM if (error == 0) 95210922SJeff.Bonwick@Sun.COM error = dmu_buf_hold(os, object, offset, zgd, &db); 95310922SJeff.Bonwick@Sun.COM 95410800SNeil.Perrin@Sun.COM if (error == 0) { 95510922SJeff.Bonwick@Sun.COM zgd->zgd_db = db; 95610922SJeff.Bonwick@Sun.COM zgd->zgd_bp = bp; 95710922SJeff.Bonwick@Sun.COM 95810922SJeff.Bonwick@Sun.COM ASSERT(db->db_offset == offset); 95910922SJeff.Bonwick@Sun.COM ASSERT(db->db_size == size); 96010922SJeff.Bonwick@Sun.COM 96110922SJeff.Bonwick@Sun.COM error = dmu_sync(zio, lr->lr_common.lrc_txg, 96210922SJeff.Bonwick@Sun.COM zfs_get_done, zgd); 96310922SJeff.Bonwick@Sun.COM ASSERT(error || lr->lr_length <= zp->z_blksz); 96410922SJeff.Bonwick@Sun.COM 96510800SNeil.Perrin@Sun.COM /* 96610922SJeff.Bonwick@Sun.COM * On success, we need to wait for the write I/O 96710922SJeff.Bonwick@Sun.COM * initiated by dmu_sync() to complete before we can 96810922SJeff.Bonwick@Sun.COM * release this dbuf. We will finish everything up 96910922SJeff.Bonwick@Sun.COM * in the zfs_get_done() callback. 97010800SNeil.Perrin@Sun.COM */ 97110922SJeff.Bonwick@Sun.COM if (error == 0) 97210922SJeff.Bonwick@Sun.COM return (0); 97310922SJeff.Bonwick@Sun.COM 97410922SJeff.Bonwick@Sun.COM if (error == EALREADY) { 97510922SJeff.Bonwick@Sun.COM lr->lr_common.lrc_txtype = TX_WRITE2; 97610922SJeff.Bonwick@Sun.COM error = 0; 97710922SJeff.Bonwick@Sun.COM } 97810800SNeil.Perrin@Sun.COM } 979789Sahrens } 98010922SJeff.Bonwick@Sun.COM 98110922SJeff.Bonwick@Sun.COM zfs_get_done(zgd, error); 98210922SJeff.Bonwick@Sun.COM 983789Sahrens return (error); 984789Sahrens } 985789Sahrens 986789Sahrens /*ARGSUSED*/ 987789Sahrens static int 9885331Samw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 9895331Samw caller_context_t *ct) 990789Sahrens { 991789Sahrens znode_t *zp = VTOZ(vp); 992789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 993789Sahrens int error; 994789Sahrens 9955367Sahrens ZFS_ENTER(zfsvfs); 9965367Sahrens ZFS_VERIFY_ZP(zp); 9975331Samw 9985331Samw if (flag & V_ACE_MASK) 9995331Samw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 10005331Samw else 10015331Samw error = zfs_zaccess_rwx(zp, mode, flag, cr); 10025331Samw 1003789Sahrens ZFS_EXIT(zfsvfs); 1004789Sahrens return (error); 1005789Sahrens } 1006789Sahrens 1007789Sahrens /* 10089981STim.Haley@Sun.COM * If vnode is for a device return a specfs vnode instead. 10099981STim.Haley@Sun.COM */ 10109981STim.Haley@Sun.COM static int 10119981STim.Haley@Sun.COM specvp_check(vnode_t **vpp, cred_t *cr) 10129981STim.Haley@Sun.COM { 10139981STim.Haley@Sun.COM int error = 0; 10149981STim.Haley@Sun.COM 10159981STim.Haley@Sun.COM if (IS_DEVVP(*vpp)) { 10169981STim.Haley@Sun.COM struct vnode *svp; 10179981STim.Haley@Sun.COM 10189981STim.Haley@Sun.COM svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 10199981STim.Haley@Sun.COM VN_RELE(*vpp); 10209981STim.Haley@Sun.COM if (svp == NULL) 10219981STim.Haley@Sun.COM error = ENOSYS; 10229981STim.Haley@Sun.COM *vpp = svp; 10239981STim.Haley@Sun.COM } 10249981STim.Haley@Sun.COM return (error); 10259981STim.Haley@Sun.COM } 10269981STim.Haley@Sun.COM 10279981STim.Haley@Sun.COM 10289981STim.Haley@Sun.COM /* 1029789Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1030789Sahrens * If it exists, return a held vnode reference for it. 1031789Sahrens * 1032789Sahrens * IN: dvp - vnode of directory to search. 1033789Sahrens * nm - name of entry to lookup. 1034789Sahrens * pnp - full pathname to lookup [UNUSED]. 1035789Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1036789Sahrens * rdir - root directory vnode [UNUSED]. 1037789Sahrens * cr - credentials of caller. 10385331Samw * ct - caller context 10395331Samw * direntflags - directory lookup flags 10405331Samw * realpnp - returned pathname. 1041789Sahrens * 1042789Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1043789Sahrens * 1044789Sahrens * RETURN: 0 if success 1045789Sahrens * error code if failure 1046789Sahrens * 1047789Sahrens * Timestamps: 1048789Sahrens * NA 1049789Sahrens */ 1050789Sahrens /* ARGSUSED */ 1051789Sahrens static int 1052789Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 10535331Samw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 10545331Samw int *direntflags, pathname_t *realpnp) 1055789Sahrens { 1056789Sahrens znode_t *zdp = VTOZ(dvp); 1057789Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 10589981STim.Haley@Sun.COM int error = 0; 10599981STim.Haley@Sun.COM 10609981STim.Haley@Sun.COM /* fast path */ 10619981STim.Haley@Sun.COM if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 10629981STim.Haley@Sun.COM 10639981STim.Haley@Sun.COM if (dvp->v_type != VDIR) { 10649981STim.Haley@Sun.COM return (ENOTDIR); 10659981STim.Haley@Sun.COM } else if (zdp->z_dbuf == NULL) { 10669981STim.Haley@Sun.COM return (EIO); 10679981STim.Haley@Sun.COM } 10689981STim.Haley@Sun.COM 10699981STim.Haley@Sun.COM if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 10709981STim.Haley@Sun.COM error = zfs_fastaccesschk_execute(zdp, cr); 10719981STim.Haley@Sun.COM if (!error) { 10729981STim.Haley@Sun.COM *vpp = dvp; 10739981STim.Haley@Sun.COM VN_HOLD(*vpp); 10749981STim.Haley@Sun.COM return (0); 10759981STim.Haley@Sun.COM } 10769981STim.Haley@Sun.COM return (error); 10779981STim.Haley@Sun.COM } else { 10789981STim.Haley@Sun.COM vnode_t *tvp = dnlc_lookup(dvp, nm); 10799981STim.Haley@Sun.COM 10809981STim.Haley@Sun.COM if (tvp) { 10819981STim.Haley@Sun.COM error = zfs_fastaccesschk_execute(zdp, cr); 10829981STim.Haley@Sun.COM if (error) { 10839981STim.Haley@Sun.COM VN_RELE(tvp); 10849981STim.Haley@Sun.COM return (error); 10859981STim.Haley@Sun.COM } 10869981STim.Haley@Sun.COM if (tvp == DNLC_NO_VNODE) { 10879981STim.Haley@Sun.COM VN_RELE(tvp); 10889981STim.Haley@Sun.COM return (ENOENT); 10899981STim.Haley@Sun.COM } else { 10909981STim.Haley@Sun.COM *vpp = tvp; 10919981STim.Haley@Sun.COM return (specvp_check(vpp, cr)); 10929981STim.Haley@Sun.COM } 10939981STim.Haley@Sun.COM } 10949981STim.Haley@Sun.COM } 10959981STim.Haley@Sun.COM } 10969981STim.Haley@Sun.COM 10979981STim.Haley@Sun.COM DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1098789Sahrens 10995367Sahrens ZFS_ENTER(zfsvfs); 11005367Sahrens ZFS_VERIFY_ZP(zdp); 1101789Sahrens 1102789Sahrens *vpp = NULL; 1103789Sahrens 1104789Sahrens if (flags & LOOKUP_XATTR) { 1105789Sahrens /* 11063234Sck153898 * If the xattr property is off, refuse the lookup request. 11073234Sck153898 */ 11083234Sck153898 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 11093234Sck153898 ZFS_EXIT(zfsvfs); 11103234Sck153898 return (EINVAL); 11113234Sck153898 } 11123234Sck153898 11133234Sck153898 /* 1114789Sahrens * We don't allow recursive attributes.. 1115789Sahrens * Maybe someday we will. 1116789Sahrens */ 1117789Sahrens if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1118789Sahrens ZFS_EXIT(zfsvfs); 1119789Sahrens return (EINVAL); 1120789Sahrens } 1121789Sahrens 11223280Sck153898 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1123789Sahrens ZFS_EXIT(zfsvfs); 1124789Sahrens return (error); 1125789Sahrens } 1126789Sahrens 1127789Sahrens /* 1128789Sahrens * Do we have permission to get into attribute directory? 1129789Sahrens */ 1130789Sahrens 11315331Samw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 11325331Samw B_FALSE, cr)) { 1133789Sahrens VN_RELE(*vpp); 11345331Samw *vpp = NULL; 1135789Sahrens } 1136789Sahrens 1137789Sahrens ZFS_EXIT(zfsvfs); 1138789Sahrens return (error); 1139789Sahrens } 1140789Sahrens 11411512Sek110237 if (dvp->v_type != VDIR) { 11421512Sek110237 ZFS_EXIT(zfsvfs); 11431460Smarks return (ENOTDIR); 11441512Sek110237 } 11451460Smarks 1146789Sahrens /* 1147789Sahrens * Check accessibility of directory. 1148789Sahrens */ 1149789Sahrens 11505331Samw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1151789Sahrens ZFS_EXIT(zfsvfs); 1152789Sahrens return (error); 1153789Sahrens } 1154789Sahrens 11555498Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 11565331Samw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 11575331Samw ZFS_EXIT(zfsvfs); 11585331Samw return (EILSEQ); 11595331Samw } 11605331Samw 11615331Samw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 11629981STim.Haley@Sun.COM if (error == 0) 11639981STim.Haley@Sun.COM error = specvp_check(vpp, cr); 1164789Sahrens 1165789Sahrens ZFS_EXIT(zfsvfs); 1166789Sahrens return (error); 1167789Sahrens } 1168789Sahrens 1169789Sahrens /* 1170789Sahrens * Attempt to create a new entry in a directory. If the entry 1171789Sahrens * already exists, truncate the file if permissible, else return 1172789Sahrens * an error. Return the vp of the created or trunc'd file. 1173789Sahrens * 1174789Sahrens * IN: dvp - vnode of directory to put new file entry in. 1175789Sahrens * name - name of new file entry. 1176789Sahrens * vap - attributes of new file. 1177789Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1178789Sahrens * mode - mode to open file with. 1179789Sahrens * cr - credentials of caller. 1180789Sahrens * flag - large file flag [UNUSED]. 11815331Samw * ct - caller context 11825331Samw * vsecp - ACL to be set 1183789Sahrens * 1184789Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1185789Sahrens * 1186789Sahrens * RETURN: 0 if success 1187789Sahrens * error code if failure 1188789Sahrens * 1189789Sahrens * Timestamps: 1190789Sahrens * dvp - ctime|mtime updated if new entry created 1191789Sahrens * vp - ctime|mtime always, atime if new 1192789Sahrens */ 11935331Samw 1194789Sahrens /* ARGSUSED */ 1195789Sahrens static int 1196789Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 11975331Samw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 11985331Samw vsecattr_t *vsecp) 1199789Sahrens { 1200789Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1201789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 12025326Sek110237 zilog_t *zilog; 12035326Sek110237 objset_t *os; 1204789Sahrens zfs_dirlock_t *dl; 1205789Sahrens dmu_tx_t *tx; 1206789Sahrens int error; 12077847SMark.Shellenbaum@Sun.COM ksid_t *ksid; 12087847SMark.Shellenbaum@Sun.COM uid_t uid; 12097847SMark.Shellenbaum@Sun.COM gid_t gid = crgetgid(cr); 12109179SMark.Shellenbaum@Sun.COM zfs_acl_ids_t acl_ids; 12119179SMark.Shellenbaum@Sun.COM boolean_t fuid_dirtied; 12125331Samw 12135331Samw /* 12145331Samw * If we have an ephemeral id, ACL, or XVATTR then 12155331Samw * make sure file system is at proper version 12165331Samw */ 12175331Samw 12187847SMark.Shellenbaum@Sun.COM ksid = crgetsid(cr, KSID_OWNER); 12197847SMark.Shellenbaum@Sun.COM if (ksid) 12207847SMark.Shellenbaum@Sun.COM uid = ksid_getid(ksid); 12217847SMark.Shellenbaum@Sun.COM else 12227847SMark.Shellenbaum@Sun.COM uid = crgetuid(cr); 12237847SMark.Shellenbaum@Sun.COM 12245331Samw if (zfsvfs->z_use_fuids == B_FALSE && 12255331Samw (vsecp || (vap->va_mask & AT_XVATTR) || 12267847SMark.Shellenbaum@Sun.COM IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 12275331Samw return (EINVAL); 1228789Sahrens 12295367Sahrens ZFS_ENTER(zfsvfs); 12305367Sahrens ZFS_VERIFY_ZP(dzp); 12315326Sek110237 os = zfsvfs->z_os; 12325326Sek110237 zilog = zfsvfs->z_log; 1233789Sahrens 12345498Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 12355331Samw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 12365331Samw ZFS_EXIT(zfsvfs); 12375331Samw return (EILSEQ); 12385331Samw } 12395331Samw 12405331Samw if (vap->va_mask & AT_XVATTR) { 12415331Samw if ((error = secpolicy_xvattr((xvattr_t *)vap, 12425331Samw crgetuid(cr), cr, vap->va_type)) != 0) { 12435331Samw ZFS_EXIT(zfsvfs); 12445331Samw return (error); 12455331Samw } 12465331Samw } 1247789Sahrens top: 1248789Sahrens *vpp = NULL; 1249789Sahrens 1250789Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1251789Sahrens vap->va_mode &= ~VSVTX; 1252789Sahrens 1253789Sahrens if (*name == '\0') { 1254789Sahrens /* 1255789Sahrens * Null component name refers to the directory itself. 1256789Sahrens */ 1257789Sahrens VN_HOLD(dvp); 1258789Sahrens zp = dzp; 1259789Sahrens dl = NULL; 1260789Sahrens error = 0; 1261789Sahrens } else { 1262789Sahrens /* possible VN_HOLD(zp) */ 12635331Samw int zflg = 0; 12645331Samw 12655331Samw if (flag & FIGNORECASE) 12665331Samw zflg |= ZCILOOK; 12675331Samw 12685331Samw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 12695331Samw NULL, NULL); 12705331Samw if (error) { 1271789Sahrens if (strcmp(name, "..") == 0) 1272789Sahrens error = EISDIR; 1273789Sahrens ZFS_EXIT(zfsvfs); 1274789Sahrens return (error); 1275789Sahrens } 1276789Sahrens } 1277789Sahrens if (zp == NULL) { 12785331Samw uint64_t txtype; 12795331Samw 1280789Sahrens /* 1281789Sahrens * Create a new file object and update the directory 1282789Sahrens * to reference it. 1283789Sahrens */ 12845331Samw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1285789Sahrens goto out; 1286789Sahrens } 1287789Sahrens 1288789Sahrens /* 1289789Sahrens * We only support the creation of regular files in 1290789Sahrens * extended attribute directories. 1291789Sahrens */ 1292789Sahrens if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1293789Sahrens (vap->va_type != VREG)) { 1294789Sahrens error = EINVAL; 1295789Sahrens goto out; 1296789Sahrens } 1297789Sahrens 12989179SMark.Shellenbaum@Sun.COM if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 12999179SMark.Shellenbaum@Sun.COM &acl_ids)) != 0) 13009179SMark.Shellenbaum@Sun.COM goto out; 13019396SMatthew.Ahrens@Sun.COM if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 130210143STim.Haley@Sun.COM zfs_acl_ids_free(&acl_ids); 13039396SMatthew.Ahrens@Sun.COM error = EDQUOT; 13049396SMatthew.Ahrens@Sun.COM goto out; 13059396SMatthew.Ahrens@Sun.COM } 13069179SMark.Shellenbaum@Sun.COM 1307789Sahrens tx = dmu_tx_create(os); 1308789Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 13099179SMark.Shellenbaum@Sun.COM fuid_dirtied = zfsvfs->z_fuid_dirty; 13109396SMatthew.Ahrens@Sun.COM if (fuid_dirtied) 13119396SMatthew.Ahrens@Sun.COM zfs_fuid_txhold(zfsvfs, tx); 1312789Sahrens dmu_tx_hold_bonus(tx, dzp->z_id); 13131544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 13149179SMark.Shellenbaum@Sun.COM if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1315789Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1316789Sahrens 0, SPA_MAXBLOCKSIZE); 13175331Samw } 13188227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 1319789Sahrens if (error) { 13209179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids); 1321789Sahrens zfs_dirent_unlock(dl); 13228227SNeil.Perrin@Sun.COM if (error == ERESTART) { 13232113Sahrens dmu_tx_wait(tx); 13242113Sahrens dmu_tx_abort(tx); 1325789Sahrens goto top; 1326789Sahrens } 13272113Sahrens dmu_tx_abort(tx); 1328789Sahrens ZFS_EXIT(zfsvfs); 1329789Sahrens return (error); 1330789Sahrens } 13319179SMark.Shellenbaum@Sun.COM zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 13329179SMark.Shellenbaum@Sun.COM 13339179SMark.Shellenbaum@Sun.COM if (fuid_dirtied) 13349179SMark.Shellenbaum@Sun.COM zfs_fuid_sync(zfsvfs, tx); 13359179SMark.Shellenbaum@Sun.COM 1336789Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 13379179SMark.Shellenbaum@Sun.COM 13385331Samw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 13395331Samw if (flag & FIGNORECASE) 13405331Samw txtype |= TX_CI; 13415331Samw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 13429179SMark.Shellenbaum@Sun.COM vsecp, acl_ids.z_fuidp, vap); 13439179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids); 1344789Sahrens dmu_tx_commit(tx); 1345789Sahrens } else { 13465331Samw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 13475331Samw 1348789Sahrens /* 1349789Sahrens * A directory entry already exists for this name. 1350789Sahrens */ 1351789Sahrens /* 1352789Sahrens * Can't truncate an existing file if in exclusive mode. 1353789Sahrens */ 1354789Sahrens if (excl == EXCL) { 1355789Sahrens error = EEXIST; 1356789Sahrens goto out; 1357789Sahrens } 1358789Sahrens /* 1359789Sahrens * Can't open a directory for writing. 1360789Sahrens */ 1361789Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1362789Sahrens error = EISDIR; 1363789Sahrens goto out; 1364789Sahrens } 1365789Sahrens /* 1366789Sahrens * Verify requested access to file. 1367789Sahrens */ 13685331Samw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1369789Sahrens goto out; 1370789Sahrens } 1371789Sahrens 1372789Sahrens mutex_enter(&dzp->z_lock); 1373789Sahrens dzp->z_seq++; 1374789Sahrens mutex_exit(&dzp->z_lock); 1375789Sahrens 13761878Smaybee /* 13771878Smaybee * Truncate regular files if requested. 13781878Smaybee */ 13791878Smaybee if ((ZTOV(zp)->v_type == VREG) && 1380789Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 13816992Smaybee /* we can't hold any locks when calling zfs_freesp() */ 13826992Smaybee zfs_dirent_unlock(dl); 13836992Smaybee dl = NULL; 13841878Smaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 13854863Spraks if (error == 0) { 13865331Samw vnevent_create(ZTOV(zp), ct); 13874863Spraks } 1388789Sahrens } 1389789Sahrens } 1390789Sahrens out: 1391789Sahrens 1392789Sahrens if (dl) 1393789Sahrens zfs_dirent_unlock(dl); 1394789Sahrens 1395789Sahrens if (error) { 1396789Sahrens if (zp) 1397789Sahrens VN_RELE(ZTOV(zp)); 1398789Sahrens } else { 1399789Sahrens *vpp = ZTOV(zp); 14009981STim.Haley@Sun.COM error = specvp_check(vpp, cr); 1401789Sahrens } 1402789Sahrens 1403789Sahrens ZFS_EXIT(zfsvfs); 1404789Sahrens return (error); 1405789Sahrens } 1406789Sahrens 1407789Sahrens /* 1408789Sahrens * Remove an entry from a directory. 1409789Sahrens * 1410789Sahrens * IN: dvp - vnode of directory to remove entry from. 1411789Sahrens * name - name of entry to remove. 1412789Sahrens * cr - credentials of caller. 14135331Samw * ct - caller context 14145331Samw * flags - case flags 1415789Sahrens * 1416789Sahrens * RETURN: 0 if success 1417789Sahrens * error code if failure 1418789Sahrens * 1419789Sahrens * Timestamps: 1420789Sahrens * dvp - ctime|mtime 1421789Sahrens * vp - ctime (if nlink > 0) 1422789Sahrens */ 14235331Samw /*ARGSUSED*/ 1424789Sahrens static int 14255331Samw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 14265331Samw int flags) 1427789Sahrens { 1428789Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1429789Sahrens znode_t *xzp = NULL; 1430789Sahrens vnode_t *vp; 1431789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 14325326Sek110237 zilog_t *zilog; 1433789Sahrens uint64_t acl_obj, xattr_obj; 1434789Sahrens zfs_dirlock_t *dl; 1435789Sahrens dmu_tx_t *tx; 14363461Sahrens boolean_t may_delete_now, delete_now = FALSE; 14376992Smaybee boolean_t unlinked, toobig = FALSE; 14385331Samw uint64_t txtype; 14395331Samw pathname_t *realnmp = NULL; 14405331Samw pathname_t realnm; 1441789Sahrens int error; 14425331Samw int zflg = ZEXISTS; 1443789Sahrens 14445367Sahrens ZFS_ENTER(zfsvfs); 14455367Sahrens ZFS_VERIFY_ZP(dzp); 14465326Sek110237 zilog = zfsvfs->z_log; 1447789Sahrens 14485331Samw if (flags & FIGNORECASE) { 14495331Samw zflg |= ZCILOOK; 14505331Samw pn_alloc(&realnm); 14515331Samw realnmp = &realnm; 14525331Samw } 14535331Samw 1454789Sahrens top: 1455789Sahrens /* 1456789Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1457789Sahrens */ 14585331Samw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 14595331Samw NULL, realnmp)) { 14605331Samw if (realnmp) 14615331Samw pn_free(realnmp); 1462789Sahrens ZFS_EXIT(zfsvfs); 1463789Sahrens return (error); 1464789Sahrens } 1465789Sahrens 1466789Sahrens vp = ZTOV(zp); 1467789Sahrens 1468789Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1469789Sahrens goto out; 1470789Sahrens } 1471789Sahrens 1472789Sahrens /* 1473789Sahrens * Need to use rmdir for removing directories. 1474789Sahrens */ 1475789Sahrens if (vp->v_type == VDIR) { 1476789Sahrens error = EPERM; 1477789Sahrens goto out; 1478789Sahrens } 1479789Sahrens 14805331Samw vnevent_remove(vp, dvp, name, ct); 14815331Samw 14825331Samw if (realnmp) 14836492Stimh dnlc_remove(dvp, realnmp->pn_buf); 14845331Samw else 14855331Samw dnlc_remove(dvp, name); 14861484Sek110237 1487789Sahrens mutex_enter(&vp->v_lock); 1488789Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1489789Sahrens mutex_exit(&vp->v_lock); 1490789Sahrens 1491789Sahrens /* 14923461Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1493789Sahrens * it depends on whether we're the last link, and on whether there are 1494789Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1495789Sahrens * allow for either case. 1496789Sahrens */ 1497789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 14981544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1499789Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 15006992Smaybee if (may_delete_now) { 15016992Smaybee toobig = 15026992Smaybee zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 15036992Smaybee /* if the file is too big, only hold_free a token amount */ 15046992Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 15056992Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 15066992Smaybee } 1507789Sahrens 1508789Sahrens /* are there any extended attributes? */ 1509789Sahrens if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1510789Sahrens /* XXX - do we need this if we are deleting? */ 1511789Sahrens dmu_tx_hold_bonus(tx, xattr_obj); 1512789Sahrens } 1513789Sahrens 1514789Sahrens /* are there any additional acls */ 1515789Sahrens if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1516789Sahrens may_delete_now) 1517789Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1518789Sahrens 1519789Sahrens /* charge as an update -- would be nice not to charge at all */ 15203461Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1521789Sahrens 15228227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 1523789Sahrens if (error) { 1524789Sahrens zfs_dirent_unlock(dl); 1525789Sahrens VN_RELE(vp); 15268227SNeil.Perrin@Sun.COM if (error == ERESTART) { 15272113Sahrens dmu_tx_wait(tx); 15282113Sahrens dmu_tx_abort(tx); 1529789Sahrens goto top; 1530789Sahrens } 15315331Samw if (realnmp) 15325331Samw pn_free(realnmp); 15332113Sahrens dmu_tx_abort(tx); 1534789Sahrens ZFS_EXIT(zfsvfs); 1535789Sahrens return (error); 1536789Sahrens } 1537789Sahrens 1538789Sahrens /* 1539789Sahrens * Remove the directory entry. 1540789Sahrens */ 15415331Samw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1542789Sahrens 1543789Sahrens if (error) { 1544789Sahrens dmu_tx_commit(tx); 1545789Sahrens goto out; 1546789Sahrens } 1547789Sahrens 15483461Sahrens if (unlinked) { 1549789Sahrens mutex_enter(&vp->v_lock); 15506992Smaybee delete_now = may_delete_now && !toobig && 1551789Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 1552789Sahrens zp->z_phys->zp_xattr == xattr_obj && 1553789Sahrens zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1554789Sahrens mutex_exit(&vp->v_lock); 1555789Sahrens } 1556789Sahrens 1557789Sahrens if (delete_now) { 1558789Sahrens if (zp->z_phys->zp_xattr) { 1559789Sahrens error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1560789Sahrens ASSERT3U(error, ==, 0); 1561789Sahrens ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1562789Sahrens dmu_buf_will_dirty(xzp->z_dbuf, tx); 1563789Sahrens mutex_enter(&xzp->z_lock); 15643461Sahrens xzp->z_unlinked = 1; 1565789Sahrens xzp->z_phys->zp_links = 0; 1566789Sahrens mutex_exit(&xzp->z_lock); 15673461Sahrens zfs_unlinked_add(xzp, tx); 1568789Sahrens zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1569789Sahrens } 1570789Sahrens mutex_enter(&zp->z_lock); 1571789Sahrens mutex_enter(&vp->v_lock); 1572789Sahrens vp->v_count--; 1573789Sahrens ASSERT3U(vp->v_count, ==, 0); 1574789Sahrens mutex_exit(&vp->v_lock); 1575789Sahrens mutex_exit(&zp->z_lock); 1576789Sahrens zfs_znode_delete(zp, tx); 15773461Sahrens } else if (unlinked) { 15783461Sahrens zfs_unlinked_add(zp, tx); 1579789Sahrens } 1580789Sahrens 15815331Samw txtype = TX_REMOVE; 15825331Samw if (flags & FIGNORECASE) 15835331Samw txtype |= TX_CI; 15845331Samw zfs_log_remove(zilog, tx, txtype, dzp, name); 1585789Sahrens 1586789Sahrens dmu_tx_commit(tx); 1587789Sahrens out: 15885331Samw if (realnmp) 15895331Samw pn_free(realnmp); 15905331Samw 1591789Sahrens zfs_dirent_unlock(dl); 1592789Sahrens 1593789Sahrens if (!delete_now) { 1594789Sahrens VN_RELE(vp); 1595789Sahrens } else if (xzp) { 15966992Smaybee /* this rele is delayed to prevent nesting transactions */ 1597789Sahrens VN_RELE(ZTOV(xzp)); 1598789Sahrens } 1599789Sahrens 1600789Sahrens ZFS_EXIT(zfsvfs); 1601789Sahrens return (error); 1602789Sahrens } 1603789Sahrens 1604789Sahrens /* 1605789Sahrens * Create a new directory and insert it into dvp using the name 1606789Sahrens * provided. Return a pointer to the inserted directory. 1607789Sahrens * 1608789Sahrens * IN: dvp - vnode of directory to add subdir to. 1609789Sahrens * dirname - name of new directory. 1610789Sahrens * vap - attributes of new directory. 1611789Sahrens * cr - credentials of caller. 16125331Samw * ct - caller context 16135331Samw * vsecp - ACL to be set 1614789Sahrens * 1615789Sahrens * OUT: vpp - vnode of created directory. 1616789Sahrens * 1617789Sahrens * RETURN: 0 if success 1618789Sahrens * error code if failure 1619789Sahrens * 1620789Sahrens * Timestamps: 1621789Sahrens * dvp - ctime|mtime updated 1622789Sahrens * vp - ctime|mtime|atime updated 1623789Sahrens */ 16245331Samw /*ARGSUSED*/ 1625789Sahrens static int 16265331Samw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 16275331Samw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1628789Sahrens { 1629789Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1630789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 16315326Sek110237 zilog_t *zilog; 1632789Sahrens zfs_dirlock_t *dl; 16335331Samw uint64_t txtype; 1634789Sahrens dmu_tx_t *tx; 1635789Sahrens int error; 16365331Samw int zf = ZNEW; 16377847SMark.Shellenbaum@Sun.COM ksid_t *ksid; 16387847SMark.Shellenbaum@Sun.COM uid_t uid; 16397847SMark.Shellenbaum@Sun.COM gid_t gid = crgetgid(cr); 16409179SMark.Shellenbaum@Sun.COM zfs_acl_ids_t acl_ids; 16419179SMark.Shellenbaum@Sun.COM boolean_t fuid_dirtied; 1642789Sahrens 1643789Sahrens ASSERT(vap->va_type == VDIR); 1644789Sahrens 16455331Samw /* 16465331Samw * If we have an ephemeral id, ACL, or XVATTR then 16475331Samw * make sure file system is at proper version 16485331Samw */ 16495331Samw 16507847SMark.Shellenbaum@Sun.COM ksid = crgetsid(cr, KSID_OWNER); 16517847SMark.Shellenbaum@Sun.COM if (ksid) 16527847SMark.Shellenbaum@Sun.COM uid = ksid_getid(ksid); 16537847SMark.Shellenbaum@Sun.COM else 16547847SMark.Shellenbaum@Sun.COM uid = crgetuid(cr); 16555331Samw if (zfsvfs->z_use_fuids == B_FALSE && 16567847SMark.Shellenbaum@Sun.COM (vsecp || (vap->va_mask & AT_XVATTR) || 16577876SMark.Shellenbaum@Sun.COM IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 16585331Samw return (EINVAL); 16595331Samw 16605367Sahrens ZFS_ENTER(zfsvfs); 16615367Sahrens ZFS_VERIFY_ZP(dzp); 16625326Sek110237 zilog = zfsvfs->z_log; 1663789Sahrens 1664789Sahrens if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1665789Sahrens ZFS_EXIT(zfsvfs); 1666789Sahrens return (EINVAL); 1667789Sahrens } 16685331Samw 16695498Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 16705331Samw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 16715331Samw ZFS_EXIT(zfsvfs); 16725331Samw return (EILSEQ); 16735331Samw } 16745331Samw if (flags & FIGNORECASE) 16755331Samw zf |= ZCILOOK; 16765331Samw 16775331Samw if (vap->va_mask & AT_XVATTR) 16785331Samw if ((error = secpolicy_xvattr((xvattr_t *)vap, 16795331Samw crgetuid(cr), cr, vap->va_type)) != 0) { 16805331Samw ZFS_EXIT(zfsvfs); 16815331Samw return (error); 16825331Samw } 1683789Sahrens 1684789Sahrens /* 1685789Sahrens * First make sure the new directory doesn't exist. 1686789Sahrens */ 16875331Samw top: 16885331Samw *vpp = NULL; 16895331Samw 16905331Samw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 16915331Samw NULL, NULL)) { 1692789Sahrens ZFS_EXIT(zfsvfs); 1693789Sahrens return (error); 1694789Sahrens } 1695789Sahrens 16965331Samw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 16971231Smarks zfs_dirent_unlock(dl); 16981231Smarks ZFS_EXIT(zfsvfs); 16991231Smarks return (error); 17001231Smarks } 17011231Smarks 17029179SMark.Shellenbaum@Sun.COM if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 17039179SMark.Shellenbaum@Sun.COM &acl_ids)) != 0) { 17049179SMark.Shellenbaum@Sun.COM zfs_dirent_unlock(dl); 17059179SMark.Shellenbaum@Sun.COM ZFS_EXIT(zfsvfs); 17069179SMark.Shellenbaum@Sun.COM return (error); 17075331Samw } 17089396SMatthew.Ahrens@Sun.COM if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 170910143STim.Haley@Sun.COM zfs_acl_ids_free(&acl_ids); 17109396SMatthew.Ahrens@Sun.COM zfs_dirent_unlock(dl); 17119396SMatthew.Ahrens@Sun.COM ZFS_EXIT(zfsvfs); 17129396SMatthew.Ahrens@Sun.COM return (EDQUOT); 17139396SMatthew.Ahrens@Sun.COM } 17149179SMark.Shellenbaum@Sun.COM 1715789Sahrens /* 1716789Sahrens * Add a new entry to the directory. 1717789Sahrens */ 1718789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 17191544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 17201544Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 17219179SMark.Shellenbaum@Sun.COM fuid_dirtied = zfsvfs->z_fuid_dirty; 17229396SMatthew.Ahrens@Sun.COM if (fuid_dirtied) 17239396SMatthew.Ahrens@Sun.COM zfs_fuid_txhold(zfsvfs, tx); 17249179SMark.Shellenbaum@Sun.COM if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 1725789Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1726789Sahrens 0, SPA_MAXBLOCKSIZE); 17278227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 1728789Sahrens if (error) { 17299179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids); 1730789Sahrens zfs_dirent_unlock(dl); 17318227SNeil.Perrin@Sun.COM if (error == ERESTART) { 17322113Sahrens dmu_tx_wait(tx); 17332113Sahrens dmu_tx_abort(tx); 1734789Sahrens goto top; 1735789Sahrens } 17362113Sahrens dmu_tx_abort(tx); 1737789Sahrens ZFS_EXIT(zfsvfs); 1738789Sahrens return (error); 1739789Sahrens } 1740789Sahrens 1741789Sahrens /* 1742789Sahrens * Create new node. 1743789Sahrens */ 17449179SMark.Shellenbaum@Sun.COM zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 17459179SMark.Shellenbaum@Sun.COM 17469179SMark.Shellenbaum@Sun.COM if (fuid_dirtied) 17479179SMark.Shellenbaum@Sun.COM zfs_fuid_sync(zfsvfs, tx); 1748789Sahrens /* 1749789Sahrens * Now put new name in parent dir. 1750789Sahrens */ 1751789Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1752789Sahrens 1753789Sahrens *vpp = ZTOV(zp); 1754789Sahrens 17555331Samw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 17565331Samw if (flags & FIGNORECASE) 17575331Samw txtype |= TX_CI; 17589179SMark.Shellenbaum@Sun.COM zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 17599179SMark.Shellenbaum@Sun.COM acl_ids.z_fuidp, vap); 17609179SMark.Shellenbaum@Sun.COM 17619179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids); 1762789Sahrens dmu_tx_commit(tx); 1763789Sahrens 1764789Sahrens zfs_dirent_unlock(dl); 1765789Sahrens 1766789Sahrens ZFS_EXIT(zfsvfs); 1767789Sahrens return (0); 1768789Sahrens } 1769789Sahrens 1770789Sahrens /* 1771789Sahrens * Remove a directory subdir entry. If the current working 1772789Sahrens * directory is the same as the subdir to be removed, the 1773789Sahrens * remove will fail. 1774789Sahrens * 1775789Sahrens * IN: dvp - vnode of directory to remove from. 1776789Sahrens * name - name of directory to be removed. 1777789Sahrens * cwd - vnode of current working directory. 1778789Sahrens * cr - credentials of caller. 17795331Samw * ct - caller context 17805331Samw * flags - case flags 1781789Sahrens * 1782789Sahrens * RETURN: 0 if success 1783789Sahrens * error code if failure 1784789Sahrens * 1785789Sahrens * Timestamps: 1786789Sahrens * dvp - ctime|mtime updated 1787789Sahrens */ 17885331Samw /*ARGSUSED*/ 1789789Sahrens static int 17905331Samw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 17915331Samw caller_context_t *ct, int flags) 1792789Sahrens { 1793789Sahrens znode_t *dzp = VTOZ(dvp); 1794789Sahrens znode_t *zp; 1795789Sahrens vnode_t *vp; 1796789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 17975326Sek110237 zilog_t *zilog; 1798789Sahrens zfs_dirlock_t *dl; 1799789Sahrens dmu_tx_t *tx; 1800789Sahrens int error; 18015331Samw int zflg = ZEXISTS; 1802789Sahrens 18035367Sahrens ZFS_ENTER(zfsvfs); 18045367Sahrens ZFS_VERIFY_ZP(dzp); 18055326Sek110237 zilog = zfsvfs->z_log; 1806789Sahrens 18075331Samw if (flags & FIGNORECASE) 18085331Samw zflg |= ZCILOOK; 1809789Sahrens top: 1810789Sahrens zp = NULL; 1811789Sahrens 1812789Sahrens /* 1813789Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1814789Sahrens */ 18155331Samw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 18165331Samw NULL, NULL)) { 1817789Sahrens ZFS_EXIT(zfsvfs); 1818789Sahrens return (error); 1819789Sahrens } 1820789Sahrens 1821789Sahrens vp = ZTOV(zp); 1822789Sahrens 1823789Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1824789Sahrens goto out; 1825789Sahrens } 1826789Sahrens 1827789Sahrens if (vp->v_type != VDIR) { 1828789Sahrens error = ENOTDIR; 1829789Sahrens goto out; 1830789Sahrens } 1831789Sahrens 1832789Sahrens if (vp == cwd) { 1833789Sahrens error = EINVAL; 1834789Sahrens goto out; 1835789Sahrens } 1836789Sahrens 18375331Samw vnevent_rmdir(vp, dvp, name, ct); 1838789Sahrens 1839789Sahrens /* 18403897Smaybee * Grab a lock on the directory to make sure that noone is 18413897Smaybee * trying to add (or lookup) entries while we are removing it. 18423897Smaybee */ 18433897Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 18443897Smaybee 18453897Smaybee /* 18463897Smaybee * Grab a lock on the parent pointer to make sure we play well 1847789Sahrens * with the treewalk and directory rename code. 1848789Sahrens */ 1849789Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 1850789Sahrens 1851789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 18521544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1853789Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 18543461Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 18558227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 1856789Sahrens if (error) { 1857789Sahrens rw_exit(&zp->z_parent_lock); 18583897Smaybee rw_exit(&zp->z_name_lock); 1859789Sahrens zfs_dirent_unlock(dl); 1860789Sahrens VN_RELE(vp); 18618227SNeil.Perrin@Sun.COM if (error == ERESTART) { 18622113Sahrens dmu_tx_wait(tx); 18632113Sahrens dmu_tx_abort(tx); 1864789Sahrens goto top; 1865789Sahrens } 18662113Sahrens dmu_tx_abort(tx); 1867789Sahrens ZFS_EXIT(zfsvfs); 1868789Sahrens return (error); 1869789Sahrens } 1870789Sahrens 18715331Samw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 18725331Samw 18735331Samw if (error == 0) { 18745331Samw uint64_t txtype = TX_RMDIR; 18755331Samw if (flags & FIGNORECASE) 18765331Samw txtype |= TX_CI; 18775331Samw zfs_log_remove(zilog, tx, txtype, dzp, name); 18785331Samw } 1879789Sahrens 1880789Sahrens dmu_tx_commit(tx); 1881789Sahrens 1882789Sahrens rw_exit(&zp->z_parent_lock); 18833897Smaybee rw_exit(&zp->z_name_lock); 1884789Sahrens out: 1885789Sahrens zfs_dirent_unlock(dl); 1886789Sahrens 1887789Sahrens VN_RELE(vp); 1888789Sahrens 1889789Sahrens ZFS_EXIT(zfsvfs); 1890789Sahrens return (error); 1891789Sahrens } 1892789Sahrens 1893789Sahrens /* 1894789Sahrens * Read as many directory entries as will fit into the provided 1895789Sahrens * buffer from the given directory cursor position (specified in 1896789Sahrens * the uio structure. 1897789Sahrens * 1898789Sahrens * IN: vp - vnode of directory to read. 1899789Sahrens * uio - structure supplying read location, range info, 1900789Sahrens * and return buffer. 1901789Sahrens * cr - credentials of caller. 19025331Samw * ct - caller context 19035331Samw * flags - case flags 1904789Sahrens * 1905789Sahrens * OUT: uio - updated offset and range, buffer filled. 1906789Sahrens * eofp - set to true if end-of-file detected. 1907789Sahrens * 1908789Sahrens * RETURN: 0 if success 1909789Sahrens * error code if failure 1910789Sahrens * 1911789Sahrens * Timestamps: 1912789Sahrens * vp - atime updated 1913789Sahrens * 1914789Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 1915789Sahrens * This allows us to use the low range for "special" directory entries: 1916789Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1917789Sahrens * we use the offset 2 for the '.zfs' directory. 1918789Sahrens */ 1919789Sahrens /* ARGSUSED */ 1920789Sahrens static int 19215331Samw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 19225331Samw caller_context_t *ct, int flags) 1923789Sahrens { 1924789Sahrens znode_t *zp = VTOZ(vp); 1925789Sahrens iovec_t *iovp; 19265331Samw edirent_t *eodp; 1927789Sahrens dirent64_t *odp; 1928789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1929869Sperrin objset_t *os; 1930789Sahrens caddr_t outbuf; 1931789Sahrens size_t bufsize; 1932789Sahrens zap_cursor_t zc; 1933789Sahrens zap_attribute_t zap; 1934789Sahrens uint_t bytes_wanted; 1935789Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 1936789Sahrens int local_eof; 1937869Sperrin int outcount; 1938869Sperrin int error; 1939869Sperrin uint8_t prefetch; 19405663Sck153898 boolean_t check_sysattrs; 1941789Sahrens 19425367Sahrens ZFS_ENTER(zfsvfs); 19435367Sahrens ZFS_VERIFY_ZP(zp); 1944789Sahrens 1945789Sahrens /* 1946789Sahrens * If we are not given an eof variable, 1947789Sahrens * use a local one. 1948789Sahrens */ 1949789Sahrens if (eofp == NULL) 1950789Sahrens eofp = &local_eof; 1951789Sahrens 1952789Sahrens /* 1953789Sahrens * Check for valid iov_len. 1954789Sahrens */ 1955789Sahrens if (uio->uio_iov->iov_len <= 0) { 1956789Sahrens ZFS_EXIT(zfsvfs); 1957789Sahrens return (EINVAL); 1958789Sahrens } 1959789Sahrens 1960789Sahrens /* 1961789Sahrens * Quit if directory has been removed (posix) 1962789Sahrens */ 19633461Sahrens if ((*eofp = zp->z_unlinked) != 0) { 1964789Sahrens ZFS_EXIT(zfsvfs); 1965789Sahrens return (0); 1966789Sahrens } 1967789Sahrens 1968869Sperrin error = 0; 1969869Sperrin os = zfsvfs->z_os; 1970869Sperrin offset = uio->uio_loffset; 1971869Sperrin prefetch = zp->z_zn_prefetch; 1972869Sperrin 1973789Sahrens /* 1974789Sahrens * Initialize the iterator cursor. 1975789Sahrens */ 1976789Sahrens if (offset <= 3) { 1977789Sahrens /* 1978789Sahrens * Start iteration from the beginning of the directory. 1979789Sahrens */ 1980869Sperrin zap_cursor_init(&zc, os, zp->z_id); 1981789Sahrens } else { 1982789Sahrens /* 1983789Sahrens * The offset is a serialized cursor. 1984789Sahrens */ 1985869Sperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1986789Sahrens } 1987789Sahrens 1988789Sahrens /* 1989789Sahrens * Get space to change directory entries into fs independent format. 1990789Sahrens */ 1991789Sahrens iovp = uio->uio_iov; 1992789Sahrens bytes_wanted = iovp->iov_len; 1993789Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 1994789Sahrens bufsize = bytes_wanted; 1995789Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 1996789Sahrens odp = (struct dirent64 *)outbuf; 1997789Sahrens } else { 1998789Sahrens bufsize = bytes_wanted; 1999789Sahrens odp = (struct dirent64 *)iovp->iov_base; 2000789Sahrens } 20015331Samw eodp = (struct edirent *)odp; 2002789Sahrens 2003789Sahrens /* 20047757SJanice.Chang@Sun.COM * If this VFS supports the system attribute view interface; and 20057757SJanice.Chang@Sun.COM * we're looking at an extended attribute directory; and we care 20067757SJanice.Chang@Sun.COM * about normalization conflicts on this vfs; then we must check 20077757SJanice.Chang@Sun.COM * for normalization conflicts with the sysattr name space. 20085663Sck153898 */ 20097757SJanice.Chang@Sun.COM check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 20105663Sck153898 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 20115663Sck153898 (flags & V_RDDIR_ENTFLAGS); 20125663Sck153898 20135663Sck153898 /* 2014789Sahrens * Transform to file-system independent format 2015789Sahrens */ 2016789Sahrens outcount = 0; 2017789Sahrens while (outcount < bytes_wanted) { 20183912Slling ino64_t objnum; 20193912Slling ushort_t reclen; 20203912Slling off64_t *next; 20213912Slling 2022789Sahrens /* 2023789Sahrens * Special case `.', `..', and `.zfs'. 2024789Sahrens */ 2025789Sahrens if (offset == 0) { 2026789Sahrens (void) strcpy(zap.za_name, "."); 20275331Samw zap.za_normalization_conflict = 0; 20283912Slling objnum = zp->z_id; 2029789Sahrens } else if (offset == 1) { 2030789Sahrens (void) strcpy(zap.za_name, ".."); 20315331Samw zap.za_normalization_conflict = 0; 20323912Slling objnum = zp->z_phys->zp_parent; 2033789Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2034789Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 20355331Samw zap.za_normalization_conflict = 0; 20363912Slling objnum = ZFSCTL_INO_ROOT; 2037789Sahrens } else { 2038789Sahrens /* 2039789Sahrens * Grab next entry. 2040789Sahrens */ 2041789Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2042789Sahrens if ((*eofp = (error == ENOENT)) != 0) 2043789Sahrens break; 2044789Sahrens else 2045789Sahrens goto update; 2046789Sahrens } 2047789Sahrens 2048789Sahrens if (zap.za_integer_length != 8 || 2049789Sahrens zap.za_num_integers != 1) { 2050789Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2051789Sahrens "entry, obj = %lld, offset = %lld\n", 2052789Sahrens (u_longlong_t)zp->z_id, 2053789Sahrens (u_longlong_t)offset); 2054789Sahrens error = ENXIO; 2055789Sahrens goto update; 2056789Sahrens } 20573912Slling 20583912Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 20593912Slling /* 20603912Slling * MacOS X can extract the object type here such as: 20613912Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 20623912Slling */ 20635663Sck153898 20645663Sck153898 if (check_sysattrs && !zap.za_normalization_conflict) { 20655663Sck153898 zap.za_normalization_conflict = 20665663Sck153898 xattr_sysattr_casechk(zap.za_name); 20675663Sck153898 } 2068789Sahrens } 20695331Samw 20709749STim.Haley@Sun.COM if (flags & V_RDDIR_ACCFILTER) { 20719749STim.Haley@Sun.COM /* 20729749STim.Haley@Sun.COM * If we have no access at all, don't include 20739749STim.Haley@Sun.COM * this entry in the returned information 20749749STim.Haley@Sun.COM */ 20759749STim.Haley@Sun.COM znode_t *ezp; 20769749STim.Haley@Sun.COM if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 20779749STim.Haley@Sun.COM goto skip_entry; 20789749STim.Haley@Sun.COM if (!zfs_has_access(ezp, cr)) { 20799749STim.Haley@Sun.COM VN_RELE(ZTOV(ezp)); 20809749STim.Haley@Sun.COM goto skip_entry; 20819749STim.Haley@Sun.COM } 20829749STim.Haley@Sun.COM VN_RELE(ZTOV(ezp)); 20839749STim.Haley@Sun.COM } 20849749STim.Haley@Sun.COM 20855331Samw if (flags & V_RDDIR_ENTFLAGS) 20865331Samw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 20875331Samw else 20885331Samw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2089789Sahrens 2090789Sahrens /* 2091789Sahrens * Will this entry fit in the buffer? 2092789Sahrens */ 20933912Slling if (outcount + reclen > bufsize) { 2094789Sahrens /* 2095789Sahrens * Did we manage to fit anything in the buffer? 2096789Sahrens */ 2097789Sahrens if (!outcount) { 2098789Sahrens error = EINVAL; 2099789Sahrens goto update; 2100789Sahrens } 2101789Sahrens break; 2102789Sahrens } 21035331Samw if (flags & V_RDDIR_ENTFLAGS) { 21045331Samw /* 21055331Samw * Add extended flag entry: 21065331Samw */ 21075331Samw eodp->ed_ino = objnum; 21085331Samw eodp->ed_reclen = reclen; 21095331Samw /* NOTE: ed_off is the offset for the *next* entry */ 21105331Samw next = &(eodp->ed_off); 21115331Samw eodp->ed_eflags = zap.za_normalization_conflict ? 21125331Samw ED_CASE_CONFLICT : 0; 21135331Samw (void) strncpy(eodp->ed_name, zap.za_name, 21145331Samw EDIRENT_NAMELEN(reclen)); 21155331Samw eodp = (edirent_t *)((intptr_t)eodp + reclen); 21165331Samw } else { 21175331Samw /* 21185331Samw * Add normal entry: 21195331Samw */ 21205331Samw odp->d_ino = objnum; 21215331Samw odp->d_reclen = reclen; 21225331Samw /* NOTE: d_off is the offset for the *next* entry */ 21235331Samw next = &(odp->d_off); 21245331Samw (void) strncpy(odp->d_name, zap.za_name, 21255331Samw DIRENT64_NAMELEN(reclen)); 21265331Samw odp = (dirent64_t *)((intptr_t)odp + reclen); 21275331Samw } 21283912Slling outcount += reclen; 2129789Sahrens 2130789Sahrens ASSERT(outcount <= bufsize); 2131789Sahrens 2132789Sahrens /* Prefetch znode */ 2133869Sperrin if (prefetch) 21343912Slling dmu_prefetch(os, objnum, 0, 0); 2135789Sahrens 21369749STim.Haley@Sun.COM skip_entry: 2137789Sahrens /* 2138789Sahrens * Move to the next entry, fill in the previous offset. 2139789Sahrens */ 2140789Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2141789Sahrens zap_cursor_advance(&zc); 2142789Sahrens offset = zap_cursor_serialize(&zc); 2143789Sahrens } else { 2144789Sahrens offset += 1; 2145789Sahrens } 2146789Sahrens *next = offset; 2147789Sahrens } 2148869Sperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2149789Sahrens 2150789Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2151789Sahrens iovp->iov_base += outcount; 2152789Sahrens iovp->iov_len -= outcount; 2153789Sahrens uio->uio_resid -= outcount; 2154789Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2155789Sahrens /* 2156789Sahrens * Reset the pointer. 2157789Sahrens */ 2158789Sahrens offset = uio->uio_loffset; 2159789Sahrens } 2160789Sahrens 2161789Sahrens update: 2162885Sahrens zap_cursor_fini(&zc); 2163789Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2164789Sahrens kmem_free(outbuf, bufsize); 2165789Sahrens 2166789Sahrens if (error == ENOENT) 2167789Sahrens error = 0; 2168789Sahrens 2169789Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2170789Sahrens 2171789Sahrens uio->uio_loffset = offset; 2172789Sahrens ZFS_EXIT(zfsvfs); 2173789Sahrens return (error); 2174789Sahrens } 2175789Sahrens 21764720Sfr157268 ulong_t zfs_fsync_sync_cnt = 4; 21774720Sfr157268 2178789Sahrens static int 21795331Samw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2180789Sahrens { 2181789Sahrens znode_t *zp = VTOZ(vp); 2182789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2183789Sahrens 21841773Seschrock /* 21851773Seschrock * Regardless of whether this is required for standards conformance, 21861773Seschrock * this is the logical behavior when fsync() is called on a file with 21871773Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 21881773Seschrock * going to be pushed out as part of the zil_commit(). 21891773Seschrock */ 21901773Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 21911773Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 21925331Samw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 21931773Seschrock 21944720Sfr157268 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 21954720Sfr157268 21965367Sahrens ZFS_ENTER(zfsvfs); 21975367Sahrens ZFS_VERIFY_ZP(zp); 21982638Sperrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2199789Sahrens ZFS_EXIT(zfsvfs); 2200789Sahrens return (0); 2201789Sahrens } 2202789Sahrens 22035331Samw 2204789Sahrens /* 2205789Sahrens * Get the requested file attributes and place them in the provided 2206789Sahrens * vattr structure. 2207789Sahrens * 2208789Sahrens * IN: vp - vnode of file. 2209789Sahrens * vap - va_mask identifies requested attributes. 22105331Samw * If AT_XVATTR set, then optional attrs are requested 22115331Samw * flags - ATTR_NOACLCHECK (CIFS server context) 2212789Sahrens * cr - credentials of caller. 22135331Samw * ct - caller context 2214789Sahrens * 2215789Sahrens * OUT: vap - attribute values. 2216789Sahrens * 2217789Sahrens * RETURN: 0 (always succeeds) 2218789Sahrens */ 2219789Sahrens /* ARGSUSED */ 2220789Sahrens static int 22215331Samw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 22225331Samw caller_context_t *ct) 2223789Sahrens { 2224789Sahrens znode_t *zp = VTOZ(vp); 2225789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 22265326Sek110237 znode_phys_t *pzp; 22275331Samw int error = 0; 22284543Smarks uint64_t links; 22295331Samw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 22305331Samw xoptattr_t *xoap = NULL; 22315331Samw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2232789Sahrens 22335367Sahrens ZFS_ENTER(zfsvfs); 22345367Sahrens ZFS_VERIFY_ZP(zp); 22355326Sek110237 pzp = zp->z_phys; 2236789Sahrens 22375331Samw /* 22385331Samw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 22395331Samw * Also, if we are the owner don't bother, since owner should 22405331Samw * always be allowed to read basic attributes of file. 22415331Samw */ 22425331Samw if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 22435331Samw (pzp->zp_uid != crgetuid(cr))) { 22445331Samw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 22455331Samw skipaclchk, cr)) { 22465331Samw ZFS_EXIT(zfsvfs); 22475331Samw return (error); 22485331Samw } 22495331Samw } 22505331Samw 2251789Sahrens /* 2252789Sahrens * Return all attributes. It's cheaper to provide the answer 2253789Sahrens * than to determine whether we were asked the question. 2254789Sahrens */ 2255789Sahrens 22569774SRay.Hassan@Sun.COM mutex_enter(&zp->z_lock); 2257789Sahrens vap->va_type = vp->v_type; 2258789Sahrens vap->va_mode = pzp->zp_mode & MODEMASK; 22595771Sjp151216 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2260789Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2261789Sahrens vap->va_nodeid = zp->z_id; 22624543Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 22634543Smarks links = pzp->zp_links + 1; 22644543Smarks else 22654543Smarks links = pzp->zp_links; 22664543Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2267789Sahrens vap->va_size = pzp->zp_size; 22681816Smarks vap->va_rdev = vp->v_rdev; 2269789Sahrens vap->va_seq = zp->z_seq; 2270789Sahrens 22715331Samw /* 22725331Samw * Add in any requested optional attributes and the create time. 22735331Samw * Also set the corresponding bits in the returned attribute bitmap. 22745331Samw */ 22755331Samw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 22765331Samw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 22775331Samw xoap->xoa_archive = 22785331Samw ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 22795331Samw XVA_SET_RTN(xvap, XAT_ARCHIVE); 22805331Samw } 22815331Samw 22825331Samw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 22835331Samw xoap->xoa_readonly = 22845331Samw ((pzp->zp_flags & ZFS_READONLY) != 0); 22855331Samw XVA_SET_RTN(xvap, XAT_READONLY); 22865331Samw } 22875331Samw 22885331Samw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 22895331Samw xoap->xoa_system = 22905331Samw ((pzp->zp_flags & ZFS_SYSTEM) != 0); 22915331Samw XVA_SET_RTN(xvap, XAT_SYSTEM); 22925331Samw } 22935331Samw 22945331Samw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 22955331Samw xoap->xoa_hidden = 22965331Samw ((pzp->zp_flags & ZFS_HIDDEN) != 0); 22975331Samw XVA_SET_RTN(xvap, XAT_HIDDEN); 22985331Samw } 22995331Samw 23005331Samw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 23015331Samw xoap->xoa_nounlink = 23025331Samw ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 23035331Samw XVA_SET_RTN(xvap, XAT_NOUNLINK); 23045331Samw } 23055331Samw 23065331Samw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 23075331Samw xoap->xoa_immutable = 23085331Samw ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 23095331Samw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 23105331Samw } 23115331Samw 23125331Samw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 23135331Samw xoap->xoa_appendonly = 23145331Samw ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 23155331Samw XVA_SET_RTN(xvap, XAT_APPENDONLY); 23165331Samw } 23175331Samw 23185331Samw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 23195331Samw xoap->xoa_nodump = 23205331Samw ((pzp->zp_flags & ZFS_NODUMP) != 0); 23215331Samw XVA_SET_RTN(xvap, XAT_NODUMP); 23225331Samw } 23235331Samw 23245331Samw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 23255331Samw xoap->xoa_opaque = 23265331Samw ((pzp->zp_flags & ZFS_OPAQUE) != 0); 23275331Samw XVA_SET_RTN(xvap, XAT_OPAQUE); 23285331Samw } 23295331Samw 23305331Samw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 23315331Samw xoap->xoa_av_quarantined = 23325331Samw ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 23335331Samw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 23345331Samw } 23355331Samw 23365331Samw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 23375331Samw xoap->xoa_av_modified = 23385331Samw ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 23395331Samw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 23405331Samw } 23415331Samw 23425331Samw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 23435331Samw vp->v_type == VREG && 23445331Samw (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 23455331Samw size_t len; 23465331Samw dmu_object_info_t doi; 23475331Samw 23485331Samw /* 23495331Samw * Only VREG files have anti-virus scanstamps, so we 23505331Samw * won't conflict with symlinks in the bonus buffer. 23515331Samw */ 23525331Samw dmu_object_info_from_db(zp->z_dbuf, &doi); 23535331Samw len = sizeof (xoap->xoa_av_scanstamp) + 23545331Samw sizeof (znode_phys_t); 23555331Samw if (len <= doi.doi_bonus_size) { 23565331Samw /* 23575331Samw * pzp points to the start of the 23585331Samw * znode_phys_t. pzp + 1 points to the 23595331Samw * first byte after the znode_phys_t. 23605331Samw */ 23615331Samw (void) memcpy(xoap->xoa_av_scanstamp, 23625331Samw pzp + 1, 23635331Samw sizeof (xoap->xoa_av_scanstamp)); 23645331Samw XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 23655331Samw } 23665331Samw } 23675331Samw 23685331Samw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 23695331Samw ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 23705331Samw XVA_SET_RTN(xvap, XAT_CREATETIME); 23715331Samw } 237210793Sdai.ngo@sun.com 237310793Sdai.ngo@sun.com if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 237410793Sdai.ngo@sun.com xoap->xoa_reparse = 237510793Sdai.ngo@sun.com ((pzp->zp_flags & ZFS_REPARSE) != 0); 237610793Sdai.ngo@sun.com XVA_SET_RTN(xvap, XAT_REPARSE); 237710793Sdai.ngo@sun.com } 23785331Samw } 23795331Samw 2380789Sahrens ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2381789Sahrens ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2382789Sahrens ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2383789Sahrens 2384789Sahrens mutex_exit(&zp->z_lock); 2385789Sahrens 2386789Sahrens dmu_object_size_from_db(zp->z_dbuf, &vap->va_blksize, &vap->va_nblocks); 2387789Sahrens 2388789Sahrens if (zp->z_blksz == 0) { 2389789Sahrens /* 2390789Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2391789Sahrens */ 2392789Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2393789Sahrens } 2394789Sahrens 2395789Sahrens ZFS_EXIT(zfsvfs); 2396789Sahrens return (0); 2397789Sahrens } 2398789Sahrens 2399789Sahrens /* 2400789Sahrens * Set the file attributes to the values contained in the 2401789Sahrens * vattr structure. 2402789Sahrens * 2403789Sahrens * IN: vp - vnode of file to be modified. 2404789Sahrens * vap - new attribute values. 24055331Samw * If AT_XVATTR set, then optional attrs are being set 2406789Sahrens * flags - ATTR_UTIME set if non-default time values provided. 24075331Samw * - ATTR_NOACLCHECK (CIFS context only). 2408789Sahrens * cr - credentials of caller. 24095331Samw * ct - caller context 2410789Sahrens * 2411789Sahrens * RETURN: 0 if success 2412789Sahrens * error code if failure 2413789Sahrens * 2414789Sahrens * Timestamps: 2415789Sahrens * vp - ctime updated, mtime updated if size changed. 2416789Sahrens */ 2417789Sahrens /* ARGSUSED */ 2418789Sahrens static int 2419789Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2420789Sahrens caller_context_t *ct) 2421789Sahrens { 24225326Sek110237 znode_t *zp = VTOZ(vp); 24235326Sek110237 znode_phys_t *pzp; 2424789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 24255326Sek110237 zilog_t *zilog; 2426789Sahrens dmu_tx_t *tx; 24271878Smaybee vattr_t oldva; 24288190SMark.Shellenbaum@Sun.COM xvattr_t tmpxvattr; 2429789Sahrens uint_t mask = vap->va_mask; 24301878Smaybee uint_t saved_mask; 24312796Smarks int trim_mask = 0; 2432789Sahrens uint64_t new_mode; 24339179SMark.Shellenbaum@Sun.COM uint64_t new_uid, new_gid; 24341231Smarks znode_t *attrzp; 2435789Sahrens int need_policy = FALSE; 2436789Sahrens int err; 24375331Samw zfs_fuid_info_t *fuidp = NULL; 24385331Samw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 24395331Samw xoptattr_t *xoap; 24405824Smarks zfs_acl_t *aclp = NULL; 24415331Samw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 24429179SMark.Shellenbaum@Sun.COM boolean_t fuid_dirtied = B_FALSE; 2443789Sahrens 2444789Sahrens if (mask == 0) 2445789Sahrens return (0); 2446789Sahrens 2447789Sahrens if (mask & AT_NOSET) 2448789Sahrens return (EINVAL); 2449789Sahrens 24505367Sahrens ZFS_ENTER(zfsvfs); 24515367Sahrens ZFS_VERIFY_ZP(zp); 24525331Samw 24535331Samw pzp = zp->z_phys; 24545331Samw zilog = zfsvfs->z_log; 24555331Samw 24565331Samw /* 24575331Samw * Make sure that if we have ephemeral uid/gid or xvattr specified 24585331Samw * that file system is at proper version level 24595331Samw */ 24605331Samw 24615331Samw if (zfsvfs->z_use_fuids == B_FALSE && 24625331Samw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 24635331Samw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 24645386Stimh (mask & AT_XVATTR))) { 24655386Stimh ZFS_EXIT(zfsvfs); 24665331Samw return (EINVAL); 24675386Stimh } 24685386Stimh 24695386Stimh if (mask & AT_SIZE && vp->v_type == VDIR) { 24705386Stimh ZFS_EXIT(zfsvfs); 2471789Sahrens return (EISDIR); 24725386Stimh } 24735386Stimh 24745386Stimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 24755386Stimh ZFS_EXIT(zfsvfs); 24761308Smarks return (EINVAL); 24775386Stimh } 24781308Smarks 24795331Samw /* 24805331Samw * If this is an xvattr_t, then get a pointer to the structure of 24815331Samw * optional attributes. If this is NULL, then we have a vattr_t. 24825331Samw */ 24835331Samw xoap = xva_getxoptattr(xvap); 24845331Samw 24858190SMark.Shellenbaum@Sun.COM xva_init(&tmpxvattr); 24868190SMark.Shellenbaum@Sun.COM 24875331Samw /* 24885331Samw * Immutable files can only alter immutable bit and atime 24895331Samw */ 24905331Samw if ((pzp->zp_flags & ZFS_IMMUTABLE) && 24915331Samw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 24925386Stimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 24935386Stimh ZFS_EXIT(zfsvfs); 24945331Samw return (EPERM); 24955386Stimh } 24965386Stimh 24975386Stimh if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 24985386Stimh ZFS_EXIT(zfsvfs); 24995331Samw return (EPERM); 25005386Stimh } 2501789Sahrens 25026064Smarks /* 25036064Smarks * Verify timestamps doesn't overflow 32 bits. 25046064Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 25056064Smarks * handle times greater than 2039. This check should be removed 25066064Smarks * once large timestamps are fully supported. 25076064Smarks */ 25086064Smarks if (mask & (AT_ATIME | AT_MTIME)) { 25096064Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 25106064Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 25116064Smarks ZFS_EXIT(zfsvfs); 25126064Smarks return (EOVERFLOW); 25136064Smarks } 25146064Smarks } 25156064Smarks 2516789Sahrens top: 25171231Smarks attrzp = NULL; 2518789Sahrens 25199981STim.Haley@Sun.COM /* Can this be moved to before the top label? */ 2520789Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2521789Sahrens ZFS_EXIT(zfsvfs); 2522789Sahrens return (EROFS); 2523789Sahrens } 2524789Sahrens 2525789Sahrens /* 2526789Sahrens * First validate permissions 2527789Sahrens */ 2528789Sahrens 2529789Sahrens if (mask & AT_SIZE) { 25305331Samw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2531789Sahrens if (err) { 2532789Sahrens ZFS_EXIT(zfsvfs); 2533789Sahrens return (err); 2534789Sahrens } 25351878Smaybee /* 25361878Smaybee * XXX - Note, we are not providing any open 25371878Smaybee * mode flags here (like FNDELAY), so we may 25381878Smaybee * block if there are locks present... this 25391878Smaybee * should be addressed in openat(). 25401878Smaybee */ 25416992Smaybee /* XXX - would it be OK to generate a log record here? */ 25426992Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 25431878Smaybee if (err) { 25441878Smaybee ZFS_EXIT(zfsvfs); 25451878Smaybee return (err); 25461878Smaybee } 2547789Sahrens } 2548789Sahrens 25495331Samw if (mask & (AT_ATIME|AT_MTIME) || 25505331Samw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 25515331Samw XVA_ISSET_REQ(xvap, XAT_READONLY) || 25525331Samw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 25535331Samw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 25545331Samw XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 25555331Samw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 25565331Samw skipaclchk, cr); 2557789Sahrens 2558789Sahrens if (mask & (AT_UID|AT_GID)) { 2559789Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2560789Sahrens int take_owner; 2561789Sahrens int take_group; 2562789Sahrens 2563789Sahrens /* 2564913Smarks * NOTE: even if a new mode is being set, 2565913Smarks * we may clear S_ISUID/S_ISGID bits. 2566913Smarks */ 2567913Smarks 2568913Smarks if (!(mask & AT_MODE)) 2569913Smarks vap->va_mode = pzp->zp_mode; 2570913Smarks 2571913Smarks /* 2572789Sahrens * Take ownership or chgrp to group we are a member of 2573789Sahrens */ 2574789Sahrens 2575789Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 25765331Samw take_group = (mask & AT_GID) && 25775331Samw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2578789Sahrens 2579789Sahrens /* 2580789Sahrens * If both AT_UID and AT_GID are set then take_owner and 2581789Sahrens * take_group must both be set in order to allow taking 2582789Sahrens * ownership. 2583789Sahrens * 2584789Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2585789Sahrens * 2586789Sahrens */ 2587789Sahrens 2588789Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2589789Sahrens ((idmask == AT_UID) && take_owner) || 2590789Sahrens ((idmask == AT_GID) && take_group)) { 25915331Samw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 25925331Samw skipaclchk, cr) == 0) { 2593789Sahrens /* 2594789Sahrens * Remove setuid/setgid for non-privileged users 2595789Sahrens */ 25961115Smarks secpolicy_setid_clear(vap, cr); 25972796Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2598789Sahrens } else { 2599789Sahrens need_policy = TRUE; 2600789Sahrens } 2601789Sahrens } else { 2602789Sahrens need_policy = TRUE; 2603789Sahrens } 2604789Sahrens } 2605789Sahrens 26062796Smarks mutex_enter(&zp->z_lock); 26072796Smarks oldva.va_mode = pzp->zp_mode; 26085771Sjp151216 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 26095331Samw if (mask & AT_XVATTR) { 26108190SMark.Shellenbaum@Sun.COM /* 26118190SMark.Shellenbaum@Sun.COM * Update xvattr mask to include only those attributes 26128190SMark.Shellenbaum@Sun.COM * that are actually changing. 26138190SMark.Shellenbaum@Sun.COM * 26148190SMark.Shellenbaum@Sun.COM * the bits will be restored prior to actually setting 26158190SMark.Shellenbaum@Sun.COM * the attributes so the caller thinks they were set. 26168190SMark.Shellenbaum@Sun.COM */ 26178190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 26188190SMark.Shellenbaum@Sun.COM if (xoap->xoa_appendonly != 26198190SMark.Shellenbaum@Sun.COM ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { 26208190SMark.Shellenbaum@Sun.COM need_policy = TRUE; 26218190SMark.Shellenbaum@Sun.COM } else { 26228190SMark.Shellenbaum@Sun.COM XVA_CLR_REQ(xvap, XAT_APPENDONLY); 26238190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 26248190SMark.Shellenbaum@Sun.COM } 26258190SMark.Shellenbaum@Sun.COM } 26268190SMark.Shellenbaum@Sun.COM 26278190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 26288190SMark.Shellenbaum@Sun.COM if (xoap->xoa_nounlink != 26298190SMark.Shellenbaum@Sun.COM ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { 26308190SMark.Shellenbaum@Sun.COM need_policy = TRUE; 26318190SMark.Shellenbaum@Sun.COM } else { 26328190SMark.Shellenbaum@Sun.COM XVA_CLR_REQ(xvap, XAT_NOUNLINK); 26338190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 26348190SMark.Shellenbaum@Sun.COM } 26358190SMark.Shellenbaum@Sun.COM } 26368190SMark.Shellenbaum@Sun.COM 26378190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 26388190SMark.Shellenbaum@Sun.COM if (xoap->xoa_immutable != 26398190SMark.Shellenbaum@Sun.COM ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { 26408190SMark.Shellenbaum@Sun.COM need_policy = TRUE; 26418190SMark.Shellenbaum@Sun.COM } else { 26428190SMark.Shellenbaum@Sun.COM XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 26438190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 26448190SMark.Shellenbaum@Sun.COM } 26458190SMark.Shellenbaum@Sun.COM } 26468190SMark.Shellenbaum@Sun.COM 26478190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 26488190SMark.Shellenbaum@Sun.COM if (xoap->xoa_nodump != 26498190SMark.Shellenbaum@Sun.COM ((pzp->zp_flags & ZFS_NODUMP) != 0)) { 26508190SMark.Shellenbaum@Sun.COM need_policy = TRUE; 26518190SMark.Shellenbaum@Sun.COM } else { 26528190SMark.Shellenbaum@Sun.COM XVA_CLR_REQ(xvap, XAT_NODUMP); 26538190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 26548190SMark.Shellenbaum@Sun.COM } 26558190SMark.Shellenbaum@Sun.COM } 26568190SMark.Shellenbaum@Sun.COM 26578190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 26588190SMark.Shellenbaum@Sun.COM if (xoap->xoa_av_modified != 26598190SMark.Shellenbaum@Sun.COM ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { 26608190SMark.Shellenbaum@Sun.COM need_policy = TRUE; 26618190SMark.Shellenbaum@Sun.COM } else { 26628190SMark.Shellenbaum@Sun.COM XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 26638190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 26648190SMark.Shellenbaum@Sun.COM } 26658190SMark.Shellenbaum@Sun.COM } 26668190SMark.Shellenbaum@Sun.COM 26678190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 26688190SMark.Shellenbaum@Sun.COM if ((vp->v_type != VREG && 26698190SMark.Shellenbaum@Sun.COM xoap->xoa_av_quarantined) || 26708190SMark.Shellenbaum@Sun.COM xoap->xoa_av_quarantined != 26718190SMark.Shellenbaum@Sun.COM ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { 26728190SMark.Shellenbaum@Sun.COM need_policy = TRUE; 26738190SMark.Shellenbaum@Sun.COM } else { 26748190SMark.Shellenbaum@Sun.COM XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 26758190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 26768190SMark.Shellenbaum@Sun.COM } 26778190SMark.Shellenbaum@Sun.COM } 26788190SMark.Shellenbaum@Sun.COM 267910793Sdai.ngo@sun.com if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 268010793Sdai.ngo@sun.com mutex_exit(&zp->z_lock); 268110793Sdai.ngo@sun.com ZFS_EXIT(zfsvfs); 268210793Sdai.ngo@sun.com return (EPERM); 268310793Sdai.ngo@sun.com } 268410793Sdai.ngo@sun.com 26858190SMark.Shellenbaum@Sun.COM if (need_policy == FALSE && 26868190SMark.Shellenbaum@Sun.COM (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 26878190SMark.Shellenbaum@Sun.COM XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 26885331Samw need_policy = TRUE; 26895331Samw } 26905331Samw } 26915331Samw 26922796Smarks mutex_exit(&zp->z_lock); 26932796Smarks 26942796Smarks if (mask & AT_MODE) { 26955331Samw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 26962796Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 26972796Smarks &oldva, cr); 26982796Smarks if (err) { 26992796Smarks ZFS_EXIT(zfsvfs); 27002796Smarks return (err); 27012796Smarks } 27022796Smarks trim_mask |= AT_MODE; 27032796Smarks } else { 27042796Smarks need_policy = TRUE; 27052796Smarks } 27062796Smarks } 2707789Sahrens 2708789Sahrens if (need_policy) { 27091115Smarks /* 27101115Smarks * If trim_mask is set then take ownership 27112796Smarks * has been granted or write_acl is present and user 27122796Smarks * has the ability to modify mode. In that case remove 27132796Smarks * UID|GID and or MODE from mask so that 27141115Smarks * secpolicy_vnode_setattr() doesn't revoke it. 27151115Smarks */ 27162796Smarks 27172796Smarks if (trim_mask) { 27182796Smarks saved_mask = vap->va_mask; 27192796Smarks vap->va_mask &= ~trim_mask; 27202796Smarks } 2721789Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 27225331Samw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2723789Sahrens if (err) { 2724789Sahrens ZFS_EXIT(zfsvfs); 2725789Sahrens return (err); 2726789Sahrens } 27271115Smarks 27281115Smarks if (trim_mask) 27292796Smarks vap->va_mask |= saved_mask; 2730789Sahrens } 2731789Sahrens 2732789Sahrens /* 2733789Sahrens * secpolicy_vnode_setattr, or take ownership may have 2734789Sahrens * changed va_mask 2735789Sahrens */ 2736789Sahrens mask = vap->va_mask; 2737789Sahrens 2738789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2739789Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 2740789Sahrens 2741789Sahrens if (mask & AT_MODE) { 27421576Smarks uint64_t pmode = pzp->zp_mode; 27431576Smarks 27441576Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2745789Sahrens 27469396SMatthew.Ahrens@Sun.COM if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 27479396SMatthew.Ahrens@Sun.COM goto out; 27485331Samw if (pzp->zp_acl.z_acl_extern_obj) { 27495331Samw /* Are we upgrading ACL from old V0 format to new V1 */ 27505331Samw if (zfsvfs->z_version <= ZPL_VERSION_FUID && 27515331Samw pzp->zp_acl.z_acl_version == 27525331Samw ZFS_ACL_VERSION_INITIAL) { 27535331Samw dmu_tx_hold_free(tx, 27545331Samw pzp->zp_acl.z_acl_extern_obj, 0, 27555331Samw DMU_OBJECT_END); 27565331Samw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 27575824Smarks 0, aclp->z_acl_bytes); 27585331Samw } else { 27595331Samw dmu_tx_hold_write(tx, 27605331Samw pzp->zp_acl.z_acl_extern_obj, 0, 27615824Smarks aclp->z_acl_bytes); 27625331Samw } 27636180Smarks } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 27646180Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 27656180Smarks 0, aclp->z_acl_bytes); 27665331Samw } 2767789Sahrens } 2768789Sahrens 27699179SMark.Shellenbaum@Sun.COM if (mask & (AT_UID | AT_GID)) { 27709179SMark.Shellenbaum@Sun.COM if (pzp->zp_xattr) { 27719179SMark.Shellenbaum@Sun.COM err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 27729396SMatthew.Ahrens@Sun.COM if (err) 27739396SMatthew.Ahrens@Sun.COM goto out; 27749179SMark.Shellenbaum@Sun.COM dmu_tx_hold_bonus(tx, attrzp->z_id); 27759179SMark.Shellenbaum@Sun.COM } 27769179SMark.Shellenbaum@Sun.COM if (mask & AT_UID) { 27779179SMark.Shellenbaum@Sun.COM new_uid = zfs_fuid_create(zfsvfs, 27789179SMark.Shellenbaum@Sun.COM (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 27799396SMatthew.Ahrens@Sun.COM if (new_uid != pzp->zp_uid && 27809396SMatthew.Ahrens@Sun.COM zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { 27819396SMatthew.Ahrens@Sun.COM err = EDQUOT; 27829396SMatthew.Ahrens@Sun.COM goto out; 27839396SMatthew.Ahrens@Sun.COM } 27841231Smarks } 27859396SMatthew.Ahrens@Sun.COM 27869179SMark.Shellenbaum@Sun.COM if (mask & AT_GID) { 27879179SMark.Shellenbaum@Sun.COM new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 27889179SMark.Shellenbaum@Sun.COM cr, ZFS_GROUP, &fuidp); 27899396SMatthew.Ahrens@Sun.COM if (new_gid != pzp->zp_gid && 27909396SMatthew.Ahrens@Sun.COM zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { 27919396SMatthew.Ahrens@Sun.COM err = EDQUOT; 27929396SMatthew.Ahrens@Sun.COM goto out; 27939396SMatthew.Ahrens@Sun.COM } 27949179SMark.Shellenbaum@Sun.COM } 27959179SMark.Shellenbaum@Sun.COM fuid_dirtied = zfsvfs->z_fuid_dirty; 27969179SMark.Shellenbaum@Sun.COM if (fuid_dirtied) { 27979179SMark.Shellenbaum@Sun.COM if (zfsvfs->z_fuid_obj == 0) { 27989179SMark.Shellenbaum@Sun.COM dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 27999179SMark.Shellenbaum@Sun.COM dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 28009179SMark.Shellenbaum@Sun.COM FUID_SIZE_ESTIMATE(zfsvfs)); 28019179SMark.Shellenbaum@Sun.COM dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 28029179SMark.Shellenbaum@Sun.COM FALSE, NULL); 28039179SMark.Shellenbaum@Sun.COM } else { 28049179SMark.Shellenbaum@Sun.COM dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 28059179SMark.Shellenbaum@Sun.COM dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 28069179SMark.Shellenbaum@Sun.COM FUID_SIZE_ESTIMATE(zfsvfs)); 28079179SMark.Shellenbaum@Sun.COM } 28089179SMark.Shellenbaum@Sun.COM } 28091231Smarks } 28101231Smarks 28118227SNeil.Perrin@Sun.COM err = dmu_tx_assign(tx, TXG_NOWAIT); 2812789Sahrens if (err) { 28139396SMatthew.Ahrens@Sun.COM if (err == ERESTART) 28142113Sahrens dmu_tx_wait(tx); 28159396SMatthew.Ahrens@Sun.COM goto out; 2816789Sahrens } 2817789Sahrens 2818789Sahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 2819789Sahrens 2820789Sahrens /* 2821789Sahrens * Set each attribute requested. 2822789Sahrens * We group settings according to the locks they need to acquire. 2823789Sahrens * 2824789Sahrens * Note: you cannot set ctime directly, although it will be 2825789Sahrens * updated as a side-effect of calling this function. 2826789Sahrens */ 2827789Sahrens 2828789Sahrens mutex_enter(&zp->z_lock); 2829789Sahrens 2830789Sahrens if (mask & AT_MODE) { 28315824Smarks mutex_enter(&zp->z_acl_lock); 28325824Smarks zp->z_phys->zp_mode = new_mode; 28339179SMark.Shellenbaum@Sun.COM err = zfs_aclset_common(zp, aclp, cr, tx); 2834789Sahrens ASSERT3U(err, ==, 0); 283510143STim.Haley@Sun.COM zp->z_acl_cached = aclp; 283610143STim.Haley@Sun.COM aclp = NULL; 28375824Smarks mutex_exit(&zp->z_acl_lock); 2838789Sahrens } 2839789Sahrens 28401231Smarks if (attrzp) 28411231Smarks mutex_enter(&attrzp->z_lock); 28421231Smarks 28431231Smarks if (mask & AT_UID) { 28449179SMark.Shellenbaum@Sun.COM pzp->zp_uid = new_uid; 28459179SMark.Shellenbaum@Sun.COM if (attrzp) 28469179SMark.Shellenbaum@Sun.COM attrzp->z_phys->zp_uid = new_uid; 28471231Smarks } 28481231Smarks 28491231Smarks if (mask & AT_GID) { 28509179SMark.Shellenbaum@Sun.COM pzp->zp_gid = new_gid; 28511231Smarks if (attrzp) 28529179SMark.Shellenbaum@Sun.COM attrzp->z_phys->zp_gid = new_gid; 28531231Smarks } 28541231Smarks 28551231Smarks if (attrzp) 28561231Smarks mutex_exit(&attrzp->z_lock); 2857789Sahrens 2858789Sahrens if (mask & AT_ATIME) 2859789Sahrens ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 2860789Sahrens 2861789Sahrens if (mask & AT_MTIME) 2862789Sahrens ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 2863789Sahrens 28646992Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 28651878Smaybee if (mask & AT_SIZE) 2866789Sahrens zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 28671878Smaybee else if (mask != 0) 2868789Sahrens zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 28695331Samw /* 28705331Samw * Do this after setting timestamps to prevent timestamp 28715331Samw * update from toggling bit 28725331Samw */ 28735331Samw 28745331Samw if (xoap && (mask & AT_XVATTR)) { 28758190SMark.Shellenbaum@Sun.COM 28768190SMark.Shellenbaum@Sun.COM /* 28778190SMark.Shellenbaum@Sun.COM * restore trimmed off masks 28788190SMark.Shellenbaum@Sun.COM * so that return masks can be set for caller. 28798190SMark.Shellenbaum@Sun.COM */ 28808190SMark.Shellenbaum@Sun.COM 28818190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 28828190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(xvap, XAT_APPENDONLY); 28838190SMark.Shellenbaum@Sun.COM } 28848190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 28858190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(xvap, XAT_NOUNLINK); 28868190SMark.Shellenbaum@Sun.COM } 28878190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 28888190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(xvap, XAT_IMMUTABLE); 28898190SMark.Shellenbaum@Sun.COM } 28908190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 28918190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(xvap, XAT_NODUMP); 28928190SMark.Shellenbaum@Sun.COM } 28938190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 28948190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 28958190SMark.Shellenbaum@Sun.COM } 28968190SMark.Shellenbaum@Sun.COM if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 28978190SMark.Shellenbaum@Sun.COM XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 28988190SMark.Shellenbaum@Sun.COM } 28998190SMark.Shellenbaum@Sun.COM 29005331Samw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 29015331Samw size_t len; 29025331Samw dmu_object_info_t doi; 29035331Samw 29045331Samw ASSERT(vp->v_type == VREG); 29055331Samw 29065331Samw /* Grow the bonus buffer if necessary. */ 29075331Samw dmu_object_info_from_db(zp->z_dbuf, &doi); 29085331Samw len = sizeof (xoap->xoa_av_scanstamp) + 29095331Samw sizeof (znode_phys_t); 29105331Samw if (len > doi.doi_bonus_size) 29115331Samw VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 29125331Samw } 29135331Samw zfs_xvattr_set(zp, xvap); 29145331Samw } 2915789Sahrens 29169179SMark.Shellenbaum@Sun.COM if (fuid_dirtied) 29179179SMark.Shellenbaum@Sun.COM zfs_fuid_sync(zfsvfs, tx); 29189179SMark.Shellenbaum@Sun.COM 29191878Smaybee if (mask != 0) 29205331Samw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 29215331Samw 2922789Sahrens mutex_exit(&zp->z_lock); 2923789Sahrens 29249396SMatthew.Ahrens@Sun.COM out: 29251231Smarks if (attrzp) 29261231Smarks VN_RELE(ZTOV(attrzp)); 29271231Smarks 292810143STim.Haley@Sun.COM if (aclp) 292910143STim.Haley@Sun.COM zfs_acl_free(aclp); 293010143STim.Haley@Sun.COM 29319396SMatthew.Ahrens@Sun.COM if (fuidp) { 29329396SMatthew.Ahrens@Sun.COM zfs_fuid_info_free(fuidp); 29339396SMatthew.Ahrens@Sun.COM fuidp = NULL; 29349396SMatthew.Ahrens@Sun.COM } 29359396SMatthew.Ahrens@Sun.COM 29369396SMatthew.Ahrens@Sun.COM if (err) 29379396SMatthew.Ahrens@Sun.COM dmu_tx_abort(tx); 29389396SMatthew.Ahrens@Sun.COM else 29399396SMatthew.Ahrens@Sun.COM dmu_tx_commit(tx); 29409396SMatthew.Ahrens@Sun.COM 29419396SMatthew.Ahrens@Sun.COM if (err == ERESTART) 29429396SMatthew.Ahrens@Sun.COM goto top; 2943789Sahrens 2944789Sahrens ZFS_EXIT(zfsvfs); 2945789Sahrens return (err); 2946789Sahrens } 2947789Sahrens 29483271Smaybee typedef struct zfs_zlock { 29493271Smaybee krwlock_t *zl_rwlock; /* lock we acquired */ 29503271Smaybee znode_t *zl_znode; /* znode we held */ 29513271Smaybee struct zfs_zlock *zl_next; /* next in list */ 29523271Smaybee } zfs_zlock_t; 29533271Smaybee 29543271Smaybee /* 29553271Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 29563271Smaybee */ 29573271Smaybee static void 29583271Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 29593271Smaybee { 29603271Smaybee zfs_zlock_t *zl; 29613271Smaybee 29623271Smaybee while ((zl = *zlpp) != NULL) { 29633271Smaybee if (zl->zl_znode != NULL) 29643271Smaybee VN_RELE(ZTOV(zl->zl_znode)); 29653271Smaybee rw_exit(zl->zl_rwlock); 29663271Smaybee *zlpp = zl->zl_next; 29673271Smaybee kmem_free(zl, sizeof (*zl)); 29683271Smaybee } 29693271Smaybee } 29703271Smaybee 2971789Sahrens /* 2972789Sahrens * Search back through the directory tree, using the ".." entries. 2973789Sahrens * Lock each directory in the chain to prevent concurrent renames. 2974789Sahrens * Fail any attempt to move a directory into one of its own descendants. 2975789Sahrens * XXX - z_parent_lock can overlap with map or grow locks 2976789Sahrens */ 2977789Sahrens static int 2978789Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 2979789Sahrens { 2980789Sahrens zfs_zlock_t *zl; 29813638Sbillm znode_t *zp = tdzp; 2982789Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 2983789Sahrens uint64_t *oidp = &zp->z_id; 2984789Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 2985789Sahrens krw_t rw = RW_WRITER; 2986789Sahrens 2987789Sahrens /* 2988789Sahrens * First pass write-locks szp and compares to zp->z_id. 2989789Sahrens * Later passes read-lock zp and compare to zp->z_parent. 2990789Sahrens */ 2991789Sahrens do { 29923271Smaybee if (!rw_tryenter(rwlp, rw)) { 29933271Smaybee /* 29943271Smaybee * Another thread is renaming in this path. 29953271Smaybee * Note that if we are a WRITER, we don't have any 29963271Smaybee * parent_locks held yet. 29973271Smaybee */ 29983271Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 29993271Smaybee /* 30003271Smaybee * Drop our locks and restart 30013271Smaybee */ 30023271Smaybee zfs_rename_unlock(&zl); 30033271Smaybee *zlpp = NULL; 30043271Smaybee zp = tdzp; 30053271Smaybee oidp = &zp->z_id; 30063271Smaybee rwlp = &szp->z_parent_lock; 30073271Smaybee rw = RW_WRITER; 30083271Smaybee continue; 30093271Smaybee } else { 30103271Smaybee /* 30113271Smaybee * Wait for other thread to drop its locks 30123271Smaybee */ 30133271Smaybee rw_enter(rwlp, rw); 30143271Smaybee } 30153271Smaybee } 30163271Smaybee 3017789Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3018789Sahrens zl->zl_rwlock = rwlp; 3019789Sahrens zl->zl_znode = NULL; 3020789Sahrens zl->zl_next = *zlpp; 3021789Sahrens *zlpp = zl; 3022789Sahrens 3023789Sahrens if (*oidp == szp->z_id) /* We're a descendant of szp */ 3024789Sahrens return (EINVAL); 3025789Sahrens 3026789Sahrens if (*oidp == rootid) /* We've hit the top */ 3027789Sahrens return (0); 3028789Sahrens 3029789Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 3030789Sahrens int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3031789Sahrens if (error) 3032789Sahrens return (error); 3033789Sahrens zl->zl_znode = zp; 3034789Sahrens } 3035789Sahrens oidp = &zp->z_phys->zp_parent; 3036789Sahrens rwlp = &zp->z_parent_lock; 3037789Sahrens rw = RW_READER; 3038789Sahrens 3039789Sahrens } while (zp->z_id != sdzp->z_id); 3040789Sahrens 3041789Sahrens return (0); 3042789Sahrens } 3043789Sahrens 3044789Sahrens /* 3045789Sahrens * Move an entry from the provided source directory to the target 3046789Sahrens * directory. Change the entry name as indicated. 3047789Sahrens * 3048789Sahrens * IN: sdvp - Source directory containing the "old entry". 3049789Sahrens * snm - Old entry name. 3050789Sahrens * tdvp - Target directory to contain the "new entry". 3051789Sahrens * tnm - New entry name. 3052789Sahrens * cr - credentials of caller. 30535331Samw * ct - caller context 30545331Samw * flags - case flags 3055789Sahrens * 3056789Sahrens * RETURN: 0 if success 3057789Sahrens * error code if failure 3058789Sahrens * 3059789Sahrens * Timestamps: 3060789Sahrens * sdvp,tdvp - ctime|mtime updated 3061789Sahrens */ 30625331Samw /*ARGSUSED*/ 3063789Sahrens static int 30645331Samw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 30655331Samw caller_context_t *ct, int flags) 3066789Sahrens { 3067789Sahrens znode_t *tdzp, *szp, *tzp; 3068789Sahrens znode_t *sdzp = VTOZ(sdvp); 3069789Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 30705326Sek110237 zilog_t *zilog; 3071789Sahrens vnode_t *realvp; 3072789Sahrens zfs_dirlock_t *sdl, *tdl; 3073789Sahrens dmu_tx_t *tx; 3074789Sahrens zfs_zlock_t *zl; 30755331Samw int cmp, serr, terr; 30765331Samw int error = 0; 30775331Samw int zflg = 0; 3078789Sahrens 30795367Sahrens ZFS_ENTER(zfsvfs); 30805367Sahrens ZFS_VERIFY_ZP(sdzp); 30815326Sek110237 zilog = zfsvfs->z_log; 3082789Sahrens 3083789Sahrens /* 3084789Sahrens * Make sure we have the real vp for the target directory. 3085789Sahrens */ 30865331Samw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3087789Sahrens tdvp = realvp; 3088789Sahrens 3089789Sahrens if (tdvp->v_vfsp != sdvp->v_vfsp) { 3090789Sahrens ZFS_EXIT(zfsvfs); 3091789Sahrens return (EXDEV); 3092789Sahrens } 3093789Sahrens 3094789Sahrens tdzp = VTOZ(tdvp); 30955367Sahrens ZFS_VERIFY_ZP(tdzp); 30965498Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 30975331Samw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 30985331Samw ZFS_EXIT(zfsvfs); 30995331Samw return (EILSEQ); 31005331Samw } 31015331Samw 31025331Samw if (flags & FIGNORECASE) 31035331Samw zflg |= ZCILOOK; 31045331Samw 3105789Sahrens top: 3106789Sahrens szp = NULL; 3107789Sahrens tzp = NULL; 3108789Sahrens zl = NULL; 3109789Sahrens 3110789Sahrens /* 3111789Sahrens * This is to prevent the creation of links into attribute space 3112789Sahrens * by renaming a linked file into/outof an attribute directory. 3113789Sahrens * See the comment in zfs_link() for why this is considered bad. 3114789Sahrens */ 3115789Sahrens if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 3116789Sahrens (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 3117789Sahrens ZFS_EXIT(zfsvfs); 3118789Sahrens return (EINVAL); 3119789Sahrens } 3120789Sahrens 3121789Sahrens /* 3122789Sahrens * Lock source and target directory entries. To prevent deadlock, 3123789Sahrens * a lock ordering must be defined. We lock the directory with 3124789Sahrens * the smallest object id first, or if it's a tie, the one with 3125789Sahrens * the lexically first name. 3126789Sahrens */ 3127789Sahrens if (sdzp->z_id < tdzp->z_id) { 3128789Sahrens cmp = -1; 3129789Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3130789Sahrens cmp = 1; 3131789Sahrens } else { 31325331Samw /* 31335331Samw * First compare the two name arguments without 31345331Samw * considering any case folding. 31355331Samw */ 31365331Samw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 31375331Samw 31385331Samw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 31395498Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3140789Sahrens if (cmp == 0) { 3141789Sahrens /* 3142789Sahrens * POSIX: "If the old argument and the new argument 3143789Sahrens * both refer to links to the same existing file, 3144789Sahrens * the rename() function shall return successfully 3145789Sahrens * and perform no other action." 3146789Sahrens */ 3147789Sahrens ZFS_EXIT(zfsvfs); 3148789Sahrens return (0); 3149789Sahrens } 31505331Samw /* 31515331Samw * If the file system is case-folding, then we may 31525331Samw * have some more checking to do. A case-folding file 31535331Samw * system is either supporting mixed case sensitivity 31545331Samw * access or is completely case-insensitive. Note 31555331Samw * that the file system is always case preserving. 31565331Samw * 31575331Samw * In mixed sensitivity mode case sensitive behavior 31585331Samw * is the default. FIGNORECASE must be used to 31595331Samw * explicitly request case insensitive behavior. 31605331Samw * 31615331Samw * If the source and target names provided differ only 31625331Samw * by case (e.g., a request to rename 'tim' to 'Tim'), 31635331Samw * we will treat this as a special case in the 31645331Samw * case-insensitive mode: as long as the source name 31655331Samw * is an exact match, we will allow this to proceed as 31665331Samw * a name-change request. 31675331Samw */ 31685498Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 31695498Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 31705498Stimh flags & FIGNORECASE)) && 31715331Samw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 31725331Samw &error) == 0) { 31735331Samw /* 31745331Samw * case preserving rename request, require exact 31755331Samw * name matches 31765331Samw */ 31775331Samw zflg |= ZCIEXACT; 31785331Samw zflg &= ~ZCILOOK; 31795331Samw } 3180789Sahrens } 31815331Samw 3182*11321SSanjeev.Bagewadi@Sun.COM /* 3183*11321SSanjeev.Bagewadi@Sun.COM * If the source and destination directories are the same, we should 3184*11321SSanjeev.Bagewadi@Sun.COM * grab the z_name_lock of that directory only once. 3185*11321SSanjeev.Bagewadi@Sun.COM */ 3186*11321SSanjeev.Bagewadi@Sun.COM if (sdzp == tdzp) { 3187*11321SSanjeev.Bagewadi@Sun.COM zflg |= ZHAVELOCK; 3188*11321SSanjeev.Bagewadi@Sun.COM rw_enter(&sdzp->z_name_lock, RW_READER); 3189*11321SSanjeev.Bagewadi@Sun.COM } 3190*11321SSanjeev.Bagewadi@Sun.COM 3191789Sahrens if (cmp < 0) { 31925331Samw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 31935331Samw ZEXISTS | zflg, NULL, NULL); 31945331Samw terr = zfs_dirent_lock(&tdl, 31955331Samw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3196789Sahrens } else { 31975331Samw terr = zfs_dirent_lock(&tdl, 31985331Samw tdzp, tnm, &tzp, zflg, NULL, NULL); 31995331Samw serr = zfs_dirent_lock(&sdl, 32005331Samw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 32015331Samw NULL, NULL); 3202789Sahrens } 3203789Sahrens 3204789Sahrens if (serr) { 3205789Sahrens /* 3206789Sahrens * Source entry invalid or not there. 3207789Sahrens */ 3208789Sahrens if (!terr) { 3209789Sahrens zfs_dirent_unlock(tdl); 3210789Sahrens if (tzp) 3211789Sahrens VN_RELE(ZTOV(tzp)); 3212789Sahrens } 3213*11321SSanjeev.Bagewadi@Sun.COM 3214*11321SSanjeev.Bagewadi@Sun.COM if (sdzp == tdzp) 3215*11321SSanjeev.Bagewadi@Sun.COM rw_exit(&sdzp->z_name_lock); 3216*11321SSanjeev.Bagewadi@Sun.COM 3217789Sahrens if (strcmp(snm, "..") == 0) 3218789Sahrens serr = EINVAL; 3219789Sahrens ZFS_EXIT(zfsvfs); 3220789Sahrens return (serr); 3221789Sahrens } 3222789Sahrens if (terr) { 3223789Sahrens zfs_dirent_unlock(sdl); 3224789Sahrens VN_RELE(ZTOV(szp)); 3225*11321SSanjeev.Bagewadi@Sun.COM 3226*11321SSanjeev.Bagewadi@Sun.COM if (sdzp == tdzp) 3227*11321SSanjeev.Bagewadi@Sun.COM rw_exit(&sdzp->z_name_lock); 3228*11321SSanjeev.Bagewadi@Sun.COM 3229789Sahrens if (strcmp(tnm, "..") == 0) 3230789Sahrens terr = EINVAL; 3231789Sahrens ZFS_EXIT(zfsvfs); 3232789Sahrens return (terr); 3233789Sahrens } 3234789Sahrens 3235789Sahrens /* 3236789Sahrens * Must have write access at the source to remove the old entry 3237789Sahrens * and write access at the target to create the new entry. 3238789Sahrens * Note that if target and source are the same, this can be 3239789Sahrens * done in a single check. 3240789Sahrens */ 3241789Sahrens 3242789Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3243789Sahrens goto out; 3244789Sahrens 3245789Sahrens if (ZTOV(szp)->v_type == VDIR) { 3246789Sahrens /* 3247789Sahrens * Check to make sure rename is valid. 3248789Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3249789Sahrens */ 3250789Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3251789Sahrens goto out; 3252789Sahrens } 3253789Sahrens 3254789Sahrens /* 3255789Sahrens * Does target exist? 3256789Sahrens */ 3257789Sahrens if (tzp) { 3258789Sahrens /* 3259789Sahrens * Source and target must be the same type. 3260789Sahrens */ 3261789Sahrens if (ZTOV(szp)->v_type == VDIR) { 3262789Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3263789Sahrens error = ENOTDIR; 3264789Sahrens goto out; 3265789Sahrens } 3266789Sahrens } else { 3267789Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3268789Sahrens error = EISDIR; 3269789Sahrens goto out; 3270789Sahrens } 3271789Sahrens } 3272789Sahrens /* 3273789Sahrens * POSIX dictates that when the source and target 3274789Sahrens * entries refer to the same file object, rename 3275789Sahrens * must do nothing and exit without error. 3276789Sahrens */ 3277789Sahrens if (szp->z_id == tzp->z_id) { 3278789Sahrens error = 0; 3279789Sahrens goto out; 3280789Sahrens } 3281789Sahrens } 3282789Sahrens 32835331Samw vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3284789Sahrens if (tzp) 32855331Samw vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 32864863Spraks 32874863Spraks /* 32884863Spraks * notify the target directory if it is not the same 32894863Spraks * as source directory. 32904863Spraks */ 32914863Spraks if (tdvp != sdvp) { 32925331Samw vnevent_rename_dest_dir(tdvp, ct); 32934863Spraks } 3294789Sahrens 3295789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3296789Sahrens dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 3297789Sahrens dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 32981544Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 32991544Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 33001544Seschrock if (sdzp != tdzp) 3301789Sahrens dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 33021544Seschrock if (tzp) 33031544Seschrock dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 33043461Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 33058227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 3306789Sahrens if (error) { 3307789Sahrens if (zl != NULL) 3308789Sahrens zfs_rename_unlock(&zl); 3309789Sahrens zfs_dirent_unlock(sdl); 3310789Sahrens zfs_dirent_unlock(tdl); 3311*11321SSanjeev.Bagewadi@Sun.COM 3312*11321SSanjeev.Bagewadi@Sun.COM if (sdzp == tdzp) 3313*11321SSanjeev.Bagewadi@Sun.COM rw_exit(&sdzp->z_name_lock); 3314*11321SSanjeev.Bagewadi@Sun.COM 3315789Sahrens VN_RELE(ZTOV(szp)); 3316789Sahrens if (tzp) 3317789Sahrens VN_RELE(ZTOV(tzp)); 33188227SNeil.Perrin@Sun.COM if (error == ERESTART) { 33192113Sahrens dmu_tx_wait(tx); 33202113Sahrens dmu_tx_abort(tx); 3321789Sahrens goto top; 3322789Sahrens } 33232113Sahrens dmu_tx_abort(tx); 3324789Sahrens ZFS_EXIT(zfsvfs); 3325789Sahrens return (error); 3326789Sahrens } 3327789Sahrens 3328789Sahrens if (tzp) /* Attempt to remove the existing target */ 33295331Samw error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3330789Sahrens 3331789Sahrens if (error == 0) { 3332789Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3333789Sahrens if (error == 0) { 33345331Samw szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; 33355331Samw 3336789Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3337789Sahrens ASSERT(error == 0); 33385331Samw 33395331Samw zfs_log_rename(zilog, tx, 33405331Samw TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 33415331Samw sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 33426976Seschrock 33436976Seschrock /* Update path information for the target vnode */ 33446976Seschrock vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3345789Sahrens } 3346789Sahrens } 3347789Sahrens 3348789Sahrens dmu_tx_commit(tx); 3349789Sahrens out: 3350789Sahrens if (zl != NULL) 3351789Sahrens zfs_rename_unlock(&zl); 3352789Sahrens 3353789Sahrens zfs_dirent_unlock(sdl); 3354789Sahrens zfs_dirent_unlock(tdl); 3355789Sahrens 3356*11321SSanjeev.Bagewadi@Sun.COM if (sdzp == tdzp) 3357*11321SSanjeev.Bagewadi@Sun.COM rw_exit(&sdzp->z_name_lock); 3358*11321SSanjeev.Bagewadi@Sun.COM 3359*11321SSanjeev.Bagewadi@Sun.COM 3360789Sahrens VN_RELE(ZTOV(szp)); 3361789Sahrens if (tzp) 3362789Sahrens VN_RELE(ZTOV(tzp)); 3363789Sahrens 3364789Sahrens ZFS_EXIT(zfsvfs); 3365789Sahrens return (error); 3366789Sahrens } 3367789Sahrens 3368789Sahrens /* 3369789Sahrens * Insert the indicated symbolic reference entry into the directory. 3370789Sahrens * 3371789Sahrens * IN: dvp - Directory to contain new symbolic link. 3372789Sahrens * link - Name for new symlink entry. 3373789Sahrens * vap - Attributes of new entry. 3374789Sahrens * target - Target path of new symlink. 3375789Sahrens * cr - credentials of caller. 33765331Samw * ct - caller context 33775331Samw * flags - case flags 3378789Sahrens * 3379789Sahrens * RETURN: 0 if success 3380789Sahrens * error code if failure 3381789Sahrens * 3382789Sahrens * Timestamps: 3383789Sahrens * dvp - ctime|mtime updated 3384789Sahrens */ 33855331Samw /*ARGSUSED*/ 3386789Sahrens static int 33875331Samw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 33885331Samw caller_context_t *ct, int flags) 3389789Sahrens { 3390789Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3391789Sahrens zfs_dirlock_t *dl; 3392789Sahrens dmu_tx_t *tx; 3393789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 33945326Sek110237 zilog_t *zilog; 3395789Sahrens int len = strlen(link); 3396789Sahrens int error; 33975331Samw int zflg = ZNEW; 33989179SMark.Shellenbaum@Sun.COM zfs_acl_ids_t acl_ids; 33999179SMark.Shellenbaum@Sun.COM boolean_t fuid_dirtied; 3400789Sahrens 3401789Sahrens ASSERT(vap->va_type == VLNK); 3402789Sahrens 34035367Sahrens ZFS_ENTER(zfsvfs); 34045367Sahrens ZFS_VERIFY_ZP(dzp); 34055326Sek110237 zilog = zfsvfs->z_log; 34065331Samw 34075498Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 34085331Samw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 34095331Samw ZFS_EXIT(zfsvfs); 34105331Samw return (EILSEQ); 34115331Samw } 34125331Samw if (flags & FIGNORECASE) 34135331Samw zflg |= ZCILOOK; 3414789Sahrens top: 34155331Samw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3416789Sahrens ZFS_EXIT(zfsvfs); 3417789Sahrens return (error); 3418789Sahrens } 3419789Sahrens 3420789Sahrens if (len > MAXPATHLEN) { 3421789Sahrens ZFS_EXIT(zfsvfs); 3422789Sahrens return (ENAMETOOLONG); 3423789Sahrens } 3424789Sahrens 3425789Sahrens /* 3426789Sahrens * Attempt to lock directory; fail if entry already exists. 3427789Sahrens */ 34285331Samw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 34295331Samw if (error) { 3430789Sahrens ZFS_EXIT(zfsvfs); 3431789Sahrens return (error); 3432789Sahrens } 3433789Sahrens 34349179SMark.Shellenbaum@Sun.COM VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); 34359396SMatthew.Ahrens@Sun.COM if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 34369396SMatthew.Ahrens@Sun.COM zfs_acl_ids_free(&acl_ids); 34379396SMatthew.Ahrens@Sun.COM zfs_dirent_unlock(dl); 34389396SMatthew.Ahrens@Sun.COM ZFS_EXIT(zfsvfs); 34399396SMatthew.Ahrens@Sun.COM return (EDQUOT); 34409396SMatthew.Ahrens@Sun.COM } 3441789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 34429179SMark.Shellenbaum@Sun.COM fuid_dirtied = zfsvfs->z_fuid_dirty; 3443789Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3444789Sahrens dmu_tx_hold_bonus(tx, dzp->z_id); 34451544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 34469179SMark.Shellenbaum@Sun.COM if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 3447789Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 34489396SMatthew.Ahrens@Sun.COM if (fuid_dirtied) 34499396SMatthew.Ahrens@Sun.COM zfs_fuid_txhold(zfsvfs, tx); 34508227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 3451789Sahrens if (error) { 34529179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids); 3453789Sahrens zfs_dirent_unlock(dl); 34548227SNeil.Perrin@Sun.COM if (error == ERESTART) { 34552113Sahrens dmu_tx_wait(tx); 34562113Sahrens dmu_tx_abort(tx); 3457789Sahrens goto top; 3458789Sahrens } 34592113Sahrens dmu_tx_abort(tx); 3460789Sahrens ZFS_EXIT(zfsvfs); 3461789Sahrens return (error); 3462789Sahrens } 3463789Sahrens 3464789Sahrens dmu_buf_will_dirty(dzp->z_dbuf, tx); 3465789Sahrens 3466789Sahrens /* 3467789Sahrens * Create a new object for the symlink. 3468789Sahrens * Put the link content into bonus buffer if it will fit; 3469789Sahrens * otherwise, store it just like any other file data. 3470789Sahrens */ 3471789Sahrens if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 34729179SMark.Shellenbaum@Sun.COM zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids); 3473789Sahrens if (len != 0) 3474789Sahrens bcopy(link, zp->z_phys + 1, len); 3475789Sahrens } else { 3476789Sahrens dmu_buf_t *dbp; 34771669Sperrin 34789179SMark.Shellenbaum@Sun.COM zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 34799179SMark.Shellenbaum@Sun.COM 34809179SMark.Shellenbaum@Sun.COM if (fuid_dirtied) 34819179SMark.Shellenbaum@Sun.COM zfs_fuid_sync(zfsvfs, tx); 34821669Sperrin /* 34831669Sperrin * Nothing can access the znode yet so no locking needed 34841669Sperrin * for growing the znode's blocksize. 34851669Sperrin */ 34861669Sperrin zfs_grow_blocksize(zp, len, tx); 3487789Sahrens 34885446Sahrens VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, 34895446Sahrens zp->z_id, 0, FTAG, &dbp)); 3490789Sahrens dmu_buf_will_dirty(dbp, tx); 3491789Sahrens 3492789Sahrens ASSERT3U(len, <=, dbp->db_size); 3493789Sahrens bcopy(link, dbp->db_data, len); 34941544Seschrock dmu_buf_rele(dbp, FTAG); 3495789Sahrens } 3496789Sahrens zp->z_phys->zp_size = len; 3497789Sahrens 3498789Sahrens /* 3499789Sahrens * Insert the new object into the directory. 3500789Sahrens */ 3501789Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 35025331Samw if (error == 0) { 35035331Samw uint64_t txtype = TX_SYMLINK; 35045331Samw if (flags & FIGNORECASE) 35055331Samw txtype |= TX_CI; 35065331Samw zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 35075331Samw } 35089179SMark.Shellenbaum@Sun.COM 35099179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids); 3510789Sahrens 3511789Sahrens dmu_tx_commit(tx); 3512789Sahrens 3513789Sahrens zfs_dirent_unlock(dl); 3514789Sahrens 3515789Sahrens VN_RELE(ZTOV(zp)); 3516789Sahrens 3517789Sahrens ZFS_EXIT(zfsvfs); 3518789Sahrens return (error); 3519789Sahrens } 3520789Sahrens 3521789Sahrens /* 3522789Sahrens * Return, in the buffer contained in the provided uio structure, 3523789Sahrens * the symbolic path referred to by vp. 3524789Sahrens * 3525789Sahrens * IN: vp - vnode of symbolic link. 3526789Sahrens * uoip - structure to contain the link path. 3527789Sahrens * cr - credentials of caller. 35285331Samw * ct - caller context 3529789Sahrens * 3530789Sahrens * OUT: uio - structure to contain the link path. 3531789Sahrens * 3532789Sahrens * RETURN: 0 if success 3533789Sahrens * error code if failure 3534789Sahrens * 3535789Sahrens * Timestamps: 3536789Sahrens * vp - atime updated 3537789Sahrens */ 3538789Sahrens /* ARGSUSED */ 3539789Sahrens static int 35405331Samw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3541789Sahrens { 3542789Sahrens znode_t *zp = VTOZ(vp); 3543789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3544789Sahrens size_t bufsz; 3545789Sahrens int error; 3546789Sahrens 35475367Sahrens ZFS_ENTER(zfsvfs); 35485367Sahrens ZFS_VERIFY_ZP(zp); 3549789Sahrens 3550789Sahrens bufsz = (size_t)zp->z_phys->zp_size; 3551789Sahrens if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 3552789Sahrens error = uiomove(zp->z_phys + 1, 3553789Sahrens MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3554789Sahrens } else { 35551544Seschrock dmu_buf_t *dbp; 35561544Seschrock error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 35571544Seschrock if (error) { 3558789Sahrens ZFS_EXIT(zfsvfs); 3559789Sahrens return (error); 3560789Sahrens } 3561789Sahrens error = uiomove(dbp->db_data, 3562789Sahrens MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 35631544Seschrock dmu_buf_rele(dbp, FTAG); 3564789Sahrens } 3565789Sahrens 3566789Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3567789Sahrens ZFS_EXIT(zfsvfs); 3568789Sahrens return (error); 3569789Sahrens } 3570789Sahrens 3571789Sahrens /* 3572789Sahrens * Insert a new entry into directory tdvp referencing svp. 3573789Sahrens * 3574789Sahrens * IN: tdvp - Directory to contain new entry. 3575789Sahrens * svp - vnode of new entry. 3576789Sahrens * name - name of new entry. 3577789Sahrens * cr - credentials of caller. 35785331Samw * ct - caller context 3579789Sahrens * 3580789Sahrens * RETURN: 0 if success 3581789Sahrens * error code if failure 3582789Sahrens * 3583789Sahrens * Timestamps: 3584789Sahrens * tdvp - ctime|mtime updated 3585789Sahrens * svp - ctime updated 3586789Sahrens */ 3587789Sahrens /* ARGSUSED */ 3588789Sahrens static int 35895331Samw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 35905331Samw caller_context_t *ct, int flags) 3591789Sahrens { 3592789Sahrens znode_t *dzp = VTOZ(tdvp); 3593789Sahrens znode_t *tzp, *szp; 3594789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 35955326Sek110237 zilog_t *zilog; 3596789Sahrens zfs_dirlock_t *dl; 3597789Sahrens dmu_tx_t *tx; 3598789Sahrens vnode_t *realvp; 3599789Sahrens int error; 36005331Samw int zf = ZNEW; 36015331Samw uid_t owner; 3602789Sahrens 3603789Sahrens ASSERT(tdvp->v_type == VDIR); 3604789Sahrens 36055367Sahrens ZFS_ENTER(zfsvfs); 36065367Sahrens ZFS_VERIFY_ZP(dzp); 36075326Sek110237 zilog = zfsvfs->z_log; 3608789Sahrens 36095331Samw if (VOP_REALVP(svp, &realvp, ct) == 0) 3610789Sahrens svp = realvp; 3611789Sahrens 3612789Sahrens if (svp->v_vfsp != tdvp->v_vfsp) { 3613789Sahrens ZFS_EXIT(zfsvfs); 3614789Sahrens return (EXDEV); 3615789Sahrens } 36165367Sahrens szp = VTOZ(svp); 36175367Sahrens ZFS_VERIFY_ZP(szp); 3618789Sahrens 36195498Stimh if (zfsvfs->z_utf8 && u8_validate(name, 36205331Samw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 36215331Samw ZFS_EXIT(zfsvfs); 36225331Samw return (EILSEQ); 36235331Samw } 36245331Samw if (flags & FIGNORECASE) 36255331Samw zf |= ZCILOOK; 36265331Samw 3627789Sahrens top: 3628789Sahrens /* 3629789Sahrens * We do not support links between attributes and non-attributes 3630789Sahrens * because of the potential security risk of creating links 3631789Sahrens * into "normal" file space in order to circumvent restrictions 3632789Sahrens * imposed in attribute space. 3633789Sahrens */ 3634789Sahrens if ((szp->z_phys->zp_flags & ZFS_XATTR) != 3635789Sahrens (dzp->z_phys->zp_flags & ZFS_XATTR)) { 3636789Sahrens ZFS_EXIT(zfsvfs); 3637789Sahrens return (EINVAL); 3638789Sahrens } 3639789Sahrens 3640789Sahrens /* 3641789Sahrens * POSIX dictates that we return EPERM here. 3642789Sahrens * Better choices include ENOTSUP or EISDIR. 3643789Sahrens */ 3644789Sahrens if (svp->v_type == VDIR) { 3645789Sahrens ZFS_EXIT(zfsvfs); 3646789Sahrens return (EPERM); 3647789Sahrens } 3648789Sahrens 36495959Smarks owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); 36505331Samw if (owner != crgetuid(cr) && 3651789Sahrens secpolicy_basic_link(cr) != 0) { 3652789Sahrens ZFS_EXIT(zfsvfs); 3653789Sahrens return (EPERM); 3654789Sahrens } 3655789Sahrens 36565331Samw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3657789Sahrens ZFS_EXIT(zfsvfs); 3658789Sahrens return (error); 3659789Sahrens } 3660789Sahrens 3661789Sahrens /* 3662789Sahrens * Attempt to lock directory; fail if entry already exists. 3663789Sahrens */ 36645331Samw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 36655331Samw if (error) { 3666789Sahrens ZFS_EXIT(zfsvfs); 3667789Sahrens return (error); 3668789Sahrens } 3669789Sahrens 3670789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3671789Sahrens dmu_tx_hold_bonus(tx, szp->z_id); 36721544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 36738227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT); 3674789Sahrens if (error) { 3675789Sahrens zfs_dirent_unlock(dl); 36768227SNeil.Perrin@Sun.COM if (error == ERESTART) { 36772113Sahrens dmu_tx_wait(tx); 36782113Sahrens dmu_tx_abort(tx); 3679789Sahrens goto top; 3680789Sahrens } 36812113Sahrens dmu_tx_abort(tx); 3682789Sahrens ZFS_EXIT(zfsvfs); 3683789Sahrens return (error); 3684789Sahrens } 3685789Sahrens 3686789Sahrens error = zfs_link_create(dl, szp, tx, 0); 3687789Sahrens 36885331Samw if (error == 0) { 36895331Samw uint64_t txtype = TX_LINK; 36905331Samw if (flags & FIGNORECASE) 36915331Samw txtype |= TX_CI; 36925331Samw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 36935331Samw } 3694789Sahrens 3695789Sahrens dmu_tx_commit(tx); 3696789Sahrens 3697789Sahrens zfs_dirent_unlock(dl); 3698789Sahrens 36994863Spraks if (error == 0) { 37005331Samw vnevent_link(svp, ct); 37014863Spraks } 37024863Spraks 3703789Sahrens ZFS_EXIT(zfsvfs); 3704789Sahrens return (error); 3705789Sahrens } 3706789Sahrens 3707789Sahrens /* 3708789Sahrens * zfs_null_putapage() is used when the file system has been force 3709789Sahrens * unmounted. It just drops the pages. 3710789Sahrens */ 3711789Sahrens /* ARGSUSED */ 3712789Sahrens static int 3713789Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 3714789Sahrens size_t *lenp, int flags, cred_t *cr) 3715789Sahrens { 3716789Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 3717789Sahrens return (0); 3718789Sahrens } 3719789Sahrens 37202688Smaybee /* 37212688Smaybee * Push a page out to disk, klustering if possible. 37222688Smaybee * 37232688Smaybee * IN: vp - file to push page to. 37242688Smaybee * pp - page to push. 37252688Smaybee * flags - additional flags. 37262688Smaybee * cr - credentials of caller. 37272688Smaybee * 37282688Smaybee * OUT: offp - start of range pushed. 37292688Smaybee * lenp - len of range pushed. 37302688Smaybee * 37312688Smaybee * RETURN: 0 if success 37322688Smaybee * error code if failure 37332688Smaybee * 37342688Smaybee * NOTE: callers must have locked the page to be pushed. On 37352688Smaybee * exit, the page (and all other pages in the kluster) must be 37362688Smaybee * unlocked. 37372688Smaybee */ 3738789Sahrens /* ARGSUSED */ 3739789Sahrens static int 3740789Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 3741789Sahrens size_t *lenp, int flags, cred_t *cr) 3742789Sahrens { 3743789Sahrens znode_t *zp = VTOZ(vp); 3744789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3745789Sahrens dmu_tx_t *tx; 37462688Smaybee u_offset_t off, koff; 37472688Smaybee size_t len, klen; 37484709Smaybee uint64_t filesz; 3749789Sahrens int err; 3750789Sahrens 37514709Smaybee filesz = zp->z_phys->zp_size; 37522688Smaybee off = pp->p_offset; 37532688Smaybee len = PAGESIZE; 37542688Smaybee /* 37552688Smaybee * If our blocksize is bigger than the page size, try to kluster 37568227SNeil.Perrin@Sun.COM * multiple pages so that we write a full block (thus avoiding 37572688Smaybee * a read-modify-write). 37582688Smaybee */ 37594709Smaybee if (off < filesz && zp->z_blksz > PAGESIZE) { 37608636SMark.Maybee@Sun.COM klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 37618636SMark.Maybee@Sun.COM koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 37622688Smaybee ASSERT(koff <= filesz); 37632688Smaybee if (koff + klen > filesz) 37642688Smaybee klen = P2ROUNDUP(filesz - koff, (uint64_t)PAGESIZE); 37652688Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 37662688Smaybee } 37672688Smaybee ASSERT3U(btop(len), ==, btopr(len)); 37688636SMark.Maybee@Sun.COM 37691819Smaybee /* 37701819Smaybee * Can't push pages past end-of-file. 37711819Smaybee */ 37724709Smaybee if (off >= filesz) { 37734709Smaybee /* ignore all pages */ 37742688Smaybee err = 0; 37752688Smaybee goto out; 37764709Smaybee } else if (off + len > filesz) { 37774709Smaybee int npages = btopr(filesz - off); 37782688Smaybee page_t *trunc; 37792688Smaybee 37802688Smaybee page_list_break(&pp, &trunc, npages); 37814709Smaybee /* ignore pages past end of file */ 37822688Smaybee if (trunc) 37834709Smaybee pvn_write_done(trunc, flags); 37844709Smaybee len = filesz - off; 37851819Smaybee } 37869396SMatthew.Ahrens@Sun.COM 37879396SMatthew.Ahrens@Sun.COM if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) || 37889396SMatthew.Ahrens@Sun.COM zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) { 37899396SMatthew.Ahrens@Sun.COM err = EDQUOT; 37909396SMatthew.Ahrens@Sun.COM goto out; 37919396SMatthew.Ahrens@Sun.COM } 37928636SMark.Maybee@Sun.COM top: 3793789Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3794789Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 3795789Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 37968227SNeil.Perrin@Sun.COM err = dmu_tx_assign(tx, TXG_NOWAIT); 3797789Sahrens if (err != 0) { 37988227SNeil.Perrin@Sun.COM if (err == ERESTART) { 37992113Sahrens dmu_tx_wait(tx); 38002113Sahrens dmu_tx_abort(tx); 3801789Sahrens goto top; 3802789Sahrens } 38032113Sahrens dmu_tx_abort(tx); 3804789Sahrens goto out; 3805789Sahrens } 3806789Sahrens 38072688Smaybee if (zp->z_blksz <= PAGESIZE) { 38087315SJonathan.Adams@Sun.COM caddr_t va = zfs_map_page(pp, S_READ); 38092688Smaybee ASSERT3U(len, <=, PAGESIZE); 38102688Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 38117315SJonathan.Adams@Sun.COM zfs_unmap_page(pp, va); 38122688Smaybee } else { 38132688Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 38142688Smaybee } 38152688Smaybee 38162688Smaybee if (err == 0) { 38172688Smaybee zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 38188636SMark.Maybee@Sun.COM zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 38192688Smaybee } 38209951SLin.Ling@Sun.COM dmu_tx_commit(tx); 38212688Smaybee 38222688Smaybee out: 38234709Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 3824789Sahrens if (offp) 3825789Sahrens *offp = off; 3826789Sahrens if (lenp) 3827789Sahrens *lenp = len; 3828789Sahrens 3829789Sahrens return (err); 3830789Sahrens } 3831789Sahrens 3832789Sahrens /* 3833789Sahrens * Copy the portion of the file indicated from pages into the file. 3834789Sahrens * The pages are stored in a page list attached to the files vnode. 3835789Sahrens * 3836789Sahrens * IN: vp - vnode of file to push page data to. 3837789Sahrens * off - position in file to put data. 3838789Sahrens * len - amount of data to write. 3839789Sahrens * flags - flags to control the operation. 3840789Sahrens * cr - credentials of caller. 38415331Samw * ct - caller context. 3842789Sahrens * 3843789Sahrens * RETURN: 0 if success 3844789Sahrens * error code if failure 3845789Sahrens * 3846789Sahrens * Timestamps: 3847789Sahrens * vp - ctime|mtime updated 3848789Sahrens */ 38495331Samw /*ARGSUSED*/ 3850789Sahrens static int 38515331Samw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 38525331Samw caller_context_t *ct) 3853789Sahrens { 3854789Sahrens znode_t *zp = VTOZ(vp); 3855789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3856789Sahrens page_t *pp; 3857789Sahrens size_t io_len; 3858789Sahrens u_offset_t io_off; 38598636SMark.Maybee@Sun.COM uint_t blksz; 38608636SMark.Maybee@Sun.COM rl_t *rl; 3861789Sahrens int error = 0; 3862789Sahrens 38635367Sahrens ZFS_ENTER(zfsvfs); 38645367Sahrens ZFS_VERIFY_ZP(zp); 3865789Sahrens 38668636SMark.Maybee@Sun.COM /* 38678636SMark.Maybee@Sun.COM * Align this request to the file block size in case we kluster. 38688636SMark.Maybee@Sun.COM * XXX - this can result in pretty aggresive locking, which can 38698636SMark.Maybee@Sun.COM * impact simultanious read/write access. One option might be 38708636SMark.Maybee@Sun.COM * to break up long requests (len == 0) into block-by-block 38718636SMark.Maybee@Sun.COM * operations to get narrower locking. 38728636SMark.Maybee@Sun.COM */ 38738636SMark.Maybee@Sun.COM blksz = zp->z_blksz; 38748636SMark.Maybee@Sun.COM if (ISP2(blksz)) 38758636SMark.Maybee@Sun.COM io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 38768636SMark.Maybee@Sun.COM else 38778636SMark.Maybee@Sun.COM io_off = 0; 38788636SMark.Maybee@Sun.COM if (len > 0 && ISP2(blksz)) 38799141SMark.Maybee@Sun.COM io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 38808636SMark.Maybee@Sun.COM else 38818636SMark.Maybee@Sun.COM io_len = 0; 38828636SMark.Maybee@Sun.COM 38838636SMark.Maybee@Sun.COM if (io_len == 0) { 3884789Sahrens /* 38858636SMark.Maybee@Sun.COM * Search the entire vp list for pages >= io_off. 3886789Sahrens */ 38878636SMark.Maybee@Sun.COM rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 38888636SMark.Maybee@Sun.COM error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 38891472Sperrin goto out; 3890789Sahrens } 38918636SMark.Maybee@Sun.COM rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 38928636SMark.Maybee@Sun.COM 38938636SMark.Maybee@Sun.COM if (off > zp->z_phys->zp_size) { 3894789Sahrens /* past end of file */ 38958636SMark.Maybee@Sun.COM zfs_range_unlock(rl); 3896789Sahrens ZFS_EXIT(zfsvfs); 3897789Sahrens return (0); 3898789Sahrens } 3899789Sahrens 39008636SMark.Maybee@Sun.COM len = MIN(io_len, P2ROUNDUP(zp->z_phys->zp_size, PAGESIZE) - io_off); 39018636SMark.Maybee@Sun.COM 39028636SMark.Maybee@Sun.COM for (off = io_off; io_off < off + len; io_off += io_len) { 3903789Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 39041669Sperrin pp = page_lookup(vp, io_off, 39054339Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 3906789Sahrens } else { 3907789Sahrens pp = page_lookup_nowait(vp, io_off, 39084339Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 3909789Sahrens } 3910789Sahrens 3911789Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 3912789Sahrens int err; 3913789Sahrens 3914789Sahrens /* 3915789Sahrens * Found a dirty page to push 3916789Sahrens */ 39171669Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 39181669Sperrin if (err) 3919789Sahrens error = err; 3920789Sahrens } else { 3921789Sahrens io_len = PAGESIZE; 3922789Sahrens } 3923789Sahrens } 39241472Sperrin out: 39258636SMark.Maybee@Sun.COM zfs_range_unlock(rl); 39262638Sperrin if ((flags & B_ASYNC) == 0) 39272638Sperrin zil_commit(zfsvfs->z_log, UINT64_MAX, zp->z_id); 3928789Sahrens ZFS_EXIT(zfsvfs); 3929789Sahrens return (error); 3930789Sahrens } 3931789Sahrens 39325331Samw /*ARGSUSED*/ 3933789Sahrens void 39345331Samw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3935789Sahrens { 3936789Sahrens znode_t *zp = VTOZ(vp); 3937789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3938789Sahrens int error; 3939789Sahrens 39405326Sek110237 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 39415446Sahrens if (zp->z_dbuf == NULL) { 39425446Sahrens /* 39435642Smaybee * The fs has been unmounted, or we did a 39445642Smaybee * suspend/resume and this file no longer exists. 39455446Sahrens */ 3946789Sahrens if (vn_has_cached_data(vp)) { 3947789Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 3948789Sahrens B_INVAL, cr); 3949789Sahrens } 3950789Sahrens 39511544Seschrock mutex_enter(&zp->z_lock); 395210369Schris.kirby@sun.com mutex_enter(&vp->v_lock); 395310369Schris.kirby@sun.com ASSERT(vp->v_count == 1); 395410369Schris.kirby@sun.com vp->v_count = 0; 395510369Schris.kirby@sun.com mutex_exit(&vp->v_lock); 39565446Sahrens mutex_exit(&zp->z_lock); 39575642Smaybee rw_exit(&zfsvfs->z_teardown_inactive_lock); 39585446Sahrens zfs_znode_free(zp); 3959789Sahrens return; 3960789Sahrens } 3961789Sahrens 3962789Sahrens /* 3963789Sahrens * Attempt to push any data in the page cache. If this fails 3964789Sahrens * we will get kicked out later in zfs_zinactive(). 3965789Sahrens */ 39661298Sperrin if (vn_has_cached_data(vp)) { 39671298Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 39681298Sperrin cr); 39691298Sperrin } 3970789Sahrens 39713461Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3972789Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3973789Sahrens 3974789Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 3975789Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 3976789Sahrens if (error) { 3977789Sahrens dmu_tx_abort(tx); 3978789Sahrens } else { 3979789Sahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 3980789Sahrens mutex_enter(&zp->z_lock); 3981789Sahrens zp->z_atime_dirty = 0; 3982789Sahrens mutex_exit(&zp->z_lock); 3983789Sahrens dmu_tx_commit(tx); 3984789Sahrens } 3985789Sahrens } 3986789Sahrens 3987789Sahrens zfs_zinactive(zp); 39885326Sek110237 rw_exit(&zfsvfs->z_teardown_inactive_lock); 3989789Sahrens } 3990789Sahrens 3991789Sahrens /* 3992789Sahrens * Bounds-check the seek operation. 3993789Sahrens * 3994789Sahrens * IN: vp - vnode seeking within 3995789Sahrens * ooff - old file offset 3996789Sahrens * noffp - pointer to new file offset 39975331Samw * ct - caller context 3998789Sahrens * 3999789Sahrens * RETURN: 0 if success 4000789Sahrens * EINVAL if new offset invalid 4001789Sahrens */ 4002789Sahrens /* ARGSUSED */ 4003789Sahrens static int 40045331Samw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 40055331Samw caller_context_t *ct) 4006789Sahrens { 4007789Sahrens if (vp->v_type == VDIR) 4008789Sahrens return (0); 4009789Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4010789Sahrens } 4011789Sahrens 4012789Sahrens /* 4013789Sahrens * Pre-filter the generic locking function to trap attempts to place 4014789Sahrens * a mandatory lock on a memory mapped file. 4015789Sahrens */ 4016789Sahrens static int 4017789Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 40185331Samw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4019789Sahrens { 4020789Sahrens znode_t *zp = VTOZ(vp); 4021789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4022789Sahrens 40235367Sahrens ZFS_ENTER(zfsvfs); 40245367Sahrens ZFS_VERIFY_ZP(zp); 4025789Sahrens 4026789Sahrens /* 40271544Seschrock * We are following the UFS semantics with respect to mapcnt 40281544Seschrock * here: If we see that the file is mapped already, then we will 40291544Seschrock * return an error, but we don't worry about races between this 40301544Seschrock * function and zfs_map(). 4031789Sahrens */ 40321544Seschrock if (zp->z_mapcnt > 0 && MANDMODE((mode_t)zp->z_phys->zp_mode)) { 4033789Sahrens ZFS_EXIT(zfsvfs); 4034789Sahrens return (EAGAIN); 4035789Sahrens } 4036789Sahrens ZFS_EXIT(zfsvfs); 403710896SMark.Shellenbaum@Sun.COM return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4038789Sahrens } 4039789Sahrens 4040789Sahrens /* 4041789Sahrens * If we can't find a page in the cache, we will create a new page 4042789Sahrens * and fill it with file data. For efficiency, we may try to fill 40438636SMark.Maybee@Sun.COM * multiple pages at once (klustering) to fill up the supplied page 40449265SMark.Maybee@Sun.COM * list. Note that the pages to be filled are held with an exclusive 40459265SMark.Maybee@Sun.COM * lock to prevent access by other threads while they are being filled. 4046789Sahrens */ 4047789Sahrens static int 4048789Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4049789Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4050789Sahrens { 4051789Sahrens znode_t *zp = VTOZ(vp); 4052789Sahrens page_t *pp, *cur_pp; 4053789Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4054789Sahrens u_offset_t io_off, total; 4055789Sahrens size_t io_len; 4056789Sahrens int err; 4057789Sahrens 40582688Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 40598636SMark.Maybee@Sun.COM /* 40608636SMark.Maybee@Sun.COM * We only have a single page, don't bother klustering 40618636SMark.Maybee@Sun.COM */ 4062789Sahrens io_off = off; 4063789Sahrens io_len = PAGESIZE; 40649265SMark.Maybee@Sun.COM pp = page_create_va(vp, io_off, io_len, 40659265SMark.Maybee@Sun.COM PG_EXCL | PG_WAIT, seg, addr); 4066789Sahrens } else { 4067789Sahrens /* 40688636SMark.Maybee@Sun.COM * Try to find enough pages to fill the page list 4069789Sahrens */ 4070789Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 40718636SMark.Maybee@Sun.COM &io_len, off, plsz, 0); 4072789Sahrens } 4073789Sahrens if (pp == NULL) { 4074789Sahrens /* 40758636SMark.Maybee@Sun.COM * The page already exists, nothing to do here. 4076789Sahrens */ 4077789Sahrens *pl = NULL; 4078789Sahrens return (0); 4079789Sahrens } 4080789Sahrens 4081789Sahrens /* 4082789Sahrens * Fill the pages in the kluster. 4083789Sahrens */ 4084789Sahrens cur_pp = pp; 4085789Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 40868636SMark.Maybee@Sun.COM caddr_t va; 40878636SMark.Maybee@Sun.COM 40882688Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 40897315SJonathan.Adams@Sun.COM va = zfs_map_page(cur_pp, S_WRITE); 40909512SNeil.Perrin@Sun.COM err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 40919512SNeil.Perrin@Sun.COM DMU_READ_PREFETCH); 40927315SJonathan.Adams@Sun.COM zfs_unmap_page(cur_pp, va); 4093789Sahrens if (err) { 4094789Sahrens /* On error, toss the entire kluster */ 4095789Sahrens pvn_read_done(pp, B_ERROR); 40967294Sperrin /* convert checksum errors into IO errors */ 40977294Sperrin if (err == ECKSUM) 40987294Sperrin err = EIO; 4099789Sahrens return (err); 4100789Sahrens } 4101789Sahrens cur_pp = cur_pp->p_next; 4102789Sahrens } 41038636SMark.Maybee@Sun.COM 4104789Sahrens /* 41058636SMark.Maybee@Sun.COM * Fill in the page list array from the kluster starting 41068636SMark.Maybee@Sun.COM * from the desired offset `off'. 4107789Sahrens * NOTE: the page list will always be null terminated. 4108789Sahrens */ 4109789Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 41108636SMark.Maybee@Sun.COM ASSERT(pl == NULL || (*pl)->p_offset == off); 4111789Sahrens 4112789Sahrens return (0); 4113789Sahrens } 4114789Sahrens 4115789Sahrens /* 4116789Sahrens * Return pointers to the pages for the file region [off, off + len] 4117789Sahrens * in the pl array. If plsz is greater than len, this function may 41188636SMark.Maybee@Sun.COM * also return page pointers from after the specified region 41198636SMark.Maybee@Sun.COM * (i.e. the region [off, off + plsz]). These additional pages are 41208636SMark.Maybee@Sun.COM * only returned if they are already in the cache, or were created as 41218636SMark.Maybee@Sun.COM * part of a klustered read. 4122789Sahrens * 4123789Sahrens * IN: vp - vnode of file to get data from. 4124789Sahrens * off - position in file to get data from. 4125789Sahrens * len - amount of data to retrieve. 4126789Sahrens * plsz - length of provided page list. 4127789Sahrens * seg - segment to obtain pages for. 4128789Sahrens * addr - virtual address of fault. 4129789Sahrens * rw - mode of created pages. 4130789Sahrens * cr - credentials of caller. 41315331Samw * ct - caller context. 4132789Sahrens * 4133789Sahrens * OUT: protp - protection mode of created pages. 4134789Sahrens * pl - list of pages created. 4135789Sahrens * 4136789Sahrens * RETURN: 0 if success 4137789Sahrens * error code if failure 4138789Sahrens * 4139789Sahrens * Timestamps: 4140789Sahrens * vp - atime updated 4141789Sahrens */ 4142789Sahrens /* ARGSUSED */ 4143789Sahrens static int 4144789Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4145789Sahrens page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 41465331Samw enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4147789Sahrens { 4148789Sahrens znode_t *zp = VTOZ(vp); 4149789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 41508636SMark.Maybee@Sun.COM page_t **pl0 = pl; 41518636SMark.Maybee@Sun.COM int err = 0; 41528636SMark.Maybee@Sun.COM 41538636SMark.Maybee@Sun.COM /* we do our own caching, faultahead is unnecessary */ 41548636SMark.Maybee@Sun.COM if (pl == NULL) 41558636SMark.Maybee@Sun.COM return (0); 41568636SMark.Maybee@Sun.COM else if (len > plsz) 41578636SMark.Maybee@Sun.COM len = plsz; 41588681SMark.Maybee@Sun.COM else 41598681SMark.Maybee@Sun.COM len = P2ROUNDUP(len, PAGESIZE); 41608636SMark.Maybee@Sun.COM ASSERT(plsz >= len); 4161789Sahrens 41625367Sahrens ZFS_ENTER(zfsvfs); 41635367Sahrens ZFS_VERIFY_ZP(zp); 4164789Sahrens 4165789Sahrens if (protp) 4166789Sahrens *protp = PROT_ALL; 4167789Sahrens 4168789Sahrens /* 41699265SMark.Maybee@Sun.COM * Loop through the requested range [off, off + len) looking 4170789Sahrens * for pages. If we don't find a page, we will need to create 4171789Sahrens * a new page and fill it with data from the file. 4172789Sahrens */ 4173789Sahrens while (len > 0) { 41748636SMark.Maybee@Sun.COM if (*pl = page_lookup(vp, off, SE_SHARED)) 41758636SMark.Maybee@Sun.COM *(pl+1) = NULL; 41768636SMark.Maybee@Sun.COM else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 41778636SMark.Maybee@Sun.COM goto out; 41788636SMark.Maybee@Sun.COM while (*pl) { 41798636SMark.Maybee@Sun.COM ASSERT3U((*pl)->p_offset, ==, off); 4180789Sahrens off += PAGESIZE; 4181789Sahrens addr += PAGESIZE; 41828681SMark.Maybee@Sun.COM if (len > 0) { 41838681SMark.Maybee@Sun.COM ASSERT3U(len, >=, PAGESIZE); 41848636SMark.Maybee@Sun.COM len -= PAGESIZE; 41858681SMark.Maybee@Sun.COM } 41868636SMark.Maybee@Sun.COM ASSERT3U(plsz, >=, PAGESIZE); 4187789Sahrens plsz -= PAGESIZE; 41888636SMark.Maybee@Sun.COM pl++; 4189789Sahrens } 4190789Sahrens } 4191789Sahrens 4192789Sahrens /* 4193789Sahrens * Fill out the page array with any pages already in the cache. 4194789Sahrens */ 41958636SMark.Maybee@Sun.COM while (plsz > 0 && 41968636SMark.Maybee@Sun.COM (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 41978636SMark.Maybee@Sun.COM off += PAGESIZE; 41988636SMark.Maybee@Sun.COM plsz -= PAGESIZE; 4199789Sahrens } 4200789Sahrens out: 42012752Sperrin if (err) { 42022752Sperrin /* 42032752Sperrin * Release any pages we have previously locked. 42042752Sperrin */ 42052752Sperrin while (pl > pl0) 42062752Sperrin page_unlock(*--pl); 42078636SMark.Maybee@Sun.COM } else { 42088636SMark.Maybee@Sun.COM ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 42092752Sperrin } 42102752Sperrin 4211789Sahrens *pl = NULL; 4212789Sahrens 4213789Sahrens ZFS_EXIT(zfsvfs); 4214789Sahrens return (err); 4215789Sahrens } 4216789Sahrens 42171544Seschrock /* 42181544Seschrock * Request a memory map for a section of a file. This code interacts 42191544Seschrock * with common code and the VM system as follows: 42201544Seschrock * 42211544Seschrock * common code calls mmap(), which ends up in smmap_common() 42221544Seschrock * 42231544Seschrock * this calls VOP_MAP(), which takes you into (say) zfs 42241544Seschrock * 42251544Seschrock * zfs_map() calls as_map(), passing segvn_create() as the callback 42261544Seschrock * 42271544Seschrock * segvn_create() creates the new segment and calls VOP_ADDMAP() 42281544Seschrock * 42291544Seschrock * zfs_addmap() updates z_mapcnt 42301544Seschrock */ 42315331Samw /*ARGSUSED*/ 4232789Sahrens static int 4233789Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 42345331Samw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 42355331Samw caller_context_t *ct) 4236789Sahrens { 4237789Sahrens znode_t *zp = VTOZ(vp); 4238789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4239789Sahrens segvn_crargs_t vn_a; 4240789Sahrens int error; 4241789Sahrens 42425929Smarks ZFS_ENTER(zfsvfs); 42435929Smarks ZFS_VERIFY_ZP(zp); 42445929Smarks 42455331Samw if ((prot & PROT_WRITE) && 42465331Samw (zp->z_phys->zp_flags & (ZFS_IMMUTABLE | ZFS_READONLY | 42475929Smarks ZFS_APPENDONLY))) { 42485929Smarks ZFS_EXIT(zfsvfs); 42495331Samw return (EPERM); 42505929Smarks } 42515929Smarks 42525929Smarks if ((prot & (PROT_READ | PROT_EXEC)) && 42535929Smarks (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED)) { 42545929Smarks ZFS_EXIT(zfsvfs); 42555929Smarks return (EACCES); 42565929Smarks } 4257789Sahrens 4258789Sahrens if (vp->v_flag & VNOMAP) { 4259789Sahrens ZFS_EXIT(zfsvfs); 4260789Sahrens return (ENOSYS); 4261789Sahrens } 4262789Sahrens 4263789Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4264789Sahrens ZFS_EXIT(zfsvfs); 4265789Sahrens return (ENXIO); 4266789Sahrens } 4267789Sahrens 4268789Sahrens if (vp->v_type != VREG) { 4269789Sahrens ZFS_EXIT(zfsvfs); 4270789Sahrens return (ENODEV); 4271789Sahrens } 4272789Sahrens 4273789Sahrens /* 4274789Sahrens * If file is locked, disallow mapping. 4275789Sahrens */ 42761544Seschrock if (MANDMODE((mode_t)zp->z_phys->zp_mode) && vn_has_flocks(vp)) { 42771544Seschrock ZFS_EXIT(zfsvfs); 42781544Seschrock return (EAGAIN); 4279789Sahrens } 4280789Sahrens 4281789Sahrens as_rangelock(as); 42826036Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 42836036Smec if (error != 0) { 42846036Smec as_rangeunlock(as); 42856036Smec ZFS_EXIT(zfsvfs); 42866036Smec return (error); 4287789Sahrens } 4288789Sahrens 4289789Sahrens vn_a.vp = vp; 4290789Sahrens vn_a.offset = (u_offset_t)off; 4291789Sahrens vn_a.type = flags & MAP_TYPE; 4292789Sahrens vn_a.prot = prot; 4293789Sahrens vn_a.maxprot = maxprot; 4294789Sahrens vn_a.cred = cr; 4295789Sahrens vn_a.amp = NULL; 4296789Sahrens vn_a.flags = flags & ~MAP_TYPE; 42971417Skchow vn_a.szc = 0; 42981417Skchow vn_a.lgrp_mem_policy_flags = 0; 4299789Sahrens 4300789Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4301789Sahrens 4302789Sahrens as_rangeunlock(as); 4303789Sahrens ZFS_EXIT(zfsvfs); 4304789Sahrens return (error); 4305789Sahrens } 4306789Sahrens 4307789Sahrens /* ARGSUSED */ 4308789Sahrens static int 4309789Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 43105331Samw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 43115331Samw caller_context_t *ct) 4312789Sahrens { 43131544Seschrock uint64_t pages = btopr(len); 43141544Seschrock 43151544Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4316789Sahrens return (0); 4317789Sahrens } 4318789Sahrens 43191773Seschrock /* 43201773Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 43211773Seschrock * more accurate mtime for the associated file. Since we don't have a way of 43221773Seschrock * detecting when the data was actually modified, we have to resort to 43231773Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 43241773Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 43251773Seschrock * which by far the most common case: 43261773Seschrock * 43271773Seschrock * open() 43281773Seschrock * mmap() 43291773Seschrock * <modify memory> 43301773Seschrock * munmap() 43311773Seschrock * close() 43321773Seschrock * <time lapse> 43331773Seschrock * putpage() via fsflush 43341773Seschrock * 43351773Seschrock * If we wait until fsflush to come along, we can have a modification time that 43361773Seschrock * is some arbitrary point in the future. In order to prevent this in the 43371773Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 43381773Seschrock * torn down. 43391773Seschrock */ 4340789Sahrens /* ARGSUSED */ 4341789Sahrens static int 4342789Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 43435331Samw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 43445331Samw caller_context_t *ct) 4345789Sahrens { 43461544Seschrock uint64_t pages = btopr(len); 43471544Seschrock 43481544Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 43491544Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 43501773Seschrock 43511773Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 43521773Seschrock vn_has_cached_data(vp)) 43535331Samw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 43541773Seschrock 4355789Sahrens return (0); 4356789Sahrens } 4357789Sahrens 4358789Sahrens /* 4359789Sahrens * Free or allocate space in a file. Currently, this function only 4360789Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4361789Sahrens * misnamed, as its functionality includes the ability to allocate as 4362789Sahrens * well as free space. 4363789Sahrens * 4364789Sahrens * IN: vp - vnode of file to free data in. 4365789Sahrens * cmd - action to take (only F_FREESP supported). 4366789Sahrens * bfp - section of file to free/alloc. 4367789Sahrens * flag - current file open mode flags. 4368789Sahrens * offset - current file offset. 4369789Sahrens * cr - credentials of caller [UNUSED]. 43705331Samw * ct - caller context. 4371789Sahrens * 4372789Sahrens * RETURN: 0 if success 4373789Sahrens * error code if failure 4374789Sahrens * 4375789Sahrens * Timestamps: 4376789Sahrens * vp - ctime|mtime updated 4377789Sahrens */ 4378789Sahrens /* ARGSUSED */ 4379789Sahrens static int 4380789Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4381789Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4382789Sahrens { 4383789Sahrens znode_t *zp = VTOZ(vp); 4384789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4385789Sahrens uint64_t off, len; 4386789Sahrens int error; 4387789Sahrens 43885367Sahrens ZFS_ENTER(zfsvfs); 43895367Sahrens ZFS_VERIFY_ZP(zp); 4390789Sahrens 4391789Sahrens if (cmd != F_FREESP) { 4392789Sahrens ZFS_EXIT(zfsvfs); 4393789Sahrens return (EINVAL); 4394789Sahrens } 4395789Sahrens 4396789Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4397789Sahrens ZFS_EXIT(zfsvfs); 4398789Sahrens return (error); 4399789Sahrens } 4400789Sahrens 4401789Sahrens if (bfp->l_len < 0) { 4402789Sahrens ZFS_EXIT(zfsvfs); 4403789Sahrens return (EINVAL); 4404789Sahrens } 4405789Sahrens 4406789Sahrens off = bfp->l_start; 44071669Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 44081878Smaybee 44096992Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4410789Sahrens 4411789Sahrens ZFS_EXIT(zfsvfs); 4412789Sahrens return (error); 4413789Sahrens } 4414789Sahrens 44155331Samw /*ARGSUSED*/ 4416789Sahrens static int 44175331Samw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4418789Sahrens { 4419789Sahrens znode_t *zp = VTOZ(vp); 4420789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 44215326Sek110237 uint32_t gen; 4422789Sahrens uint64_t object = zp->z_id; 4423789Sahrens zfid_short_t *zfid; 4424789Sahrens int size, i; 4425789Sahrens 44265367Sahrens ZFS_ENTER(zfsvfs); 44275367Sahrens ZFS_VERIFY_ZP(zp); 44285326Sek110237 gen = (uint32_t)zp->z_gen; 4429789Sahrens 4430789Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4431789Sahrens if (fidp->fid_len < size) { 4432789Sahrens fidp->fid_len = size; 44331512Sek110237 ZFS_EXIT(zfsvfs); 4434789Sahrens return (ENOSPC); 4435789Sahrens } 4436789Sahrens 4437789Sahrens zfid = (zfid_short_t *)fidp; 4438789Sahrens 4439789Sahrens zfid->zf_len = size; 4440789Sahrens 4441789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4442789Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4443789Sahrens 4444789Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4445789Sahrens if (gen == 0) 4446789Sahrens gen = 1; 4447789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4448789Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4449789Sahrens 4450789Sahrens if (size == LONG_FID_LEN) { 4451789Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4452789Sahrens zfid_long_t *zlfid; 4453789Sahrens 4454789Sahrens zlfid = (zfid_long_t *)fidp; 4455789Sahrens 4456789Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4457789Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4458789Sahrens 4459789Sahrens /* XXX - this should be the generation number for the objset */ 4460789Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4461789Sahrens zlfid->zf_setgen[i] = 0; 4462789Sahrens } 4463789Sahrens 4464789Sahrens ZFS_EXIT(zfsvfs); 4465789Sahrens return (0); 4466789Sahrens } 4467789Sahrens 4468789Sahrens static int 44695331Samw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 44705331Samw caller_context_t *ct) 4471789Sahrens { 4472789Sahrens znode_t *zp, *xzp; 4473789Sahrens zfsvfs_t *zfsvfs; 4474789Sahrens zfs_dirlock_t *dl; 4475789Sahrens int error; 4476789Sahrens 4477789Sahrens switch (cmd) { 4478789Sahrens case _PC_LINK_MAX: 4479789Sahrens *valp = ULONG_MAX; 4480789Sahrens return (0); 4481789Sahrens 4482789Sahrens case _PC_FILESIZEBITS: 4483789Sahrens *valp = 64; 4484789Sahrens return (0); 4485789Sahrens 4486789Sahrens case _PC_XATTR_EXISTS: 4487789Sahrens zp = VTOZ(vp); 4488789Sahrens zfsvfs = zp->z_zfsvfs; 44895367Sahrens ZFS_ENTER(zfsvfs); 44905367Sahrens ZFS_VERIFY_ZP(zp); 4491789Sahrens *valp = 0; 4492789Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 44935331Samw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4494789Sahrens if (error == 0) { 4495789Sahrens zfs_dirent_unlock(dl); 4496789Sahrens if (!zfs_dirempty(xzp)) 4497789Sahrens *valp = 1; 4498789Sahrens VN_RELE(ZTOV(xzp)); 4499789Sahrens } else if (error == ENOENT) { 4500789Sahrens /* 4501789Sahrens * If there aren't extended attributes, it's the 4502789Sahrens * same as having zero of them. 4503789Sahrens */ 4504789Sahrens error = 0; 4505789Sahrens } 4506789Sahrens ZFS_EXIT(zfsvfs); 4507789Sahrens return (error); 4508789Sahrens 45095331Samw case _PC_SATTR_ENABLED: 45105331Samw case _PC_SATTR_EXISTS: 45117757SJanice.Chang@Sun.COM *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 45125331Samw (vp->v_type == VREG || vp->v_type == VDIR); 45135331Samw return (0); 45145331Samw 45159749STim.Haley@Sun.COM case _PC_ACCESS_FILTERING: 45169749STim.Haley@Sun.COM *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 45179749STim.Haley@Sun.COM vp->v_type == VDIR; 45189749STim.Haley@Sun.COM return (0); 45199749STim.Haley@Sun.COM 4520789Sahrens case _PC_ACL_ENABLED: 4521789Sahrens *valp = _ACL_ACE_ENABLED; 4522789Sahrens return (0); 4523789Sahrens 4524789Sahrens case _PC_MIN_HOLE_SIZE: 4525789Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 4526789Sahrens return (0); 4527789Sahrens 452810440SRoger.Faulkner@Sun.COM case _PC_TIMESTAMP_RESOLUTION: 452910440SRoger.Faulkner@Sun.COM /* nanosecond timestamp resolution */ 453010440SRoger.Faulkner@Sun.COM *valp = 1L; 453110440SRoger.Faulkner@Sun.COM return (0); 453210440SRoger.Faulkner@Sun.COM 4533789Sahrens default: 45345331Samw return (fs_pathconf(vp, cmd, valp, cr, ct)); 4535789Sahrens } 4536789Sahrens } 4537789Sahrens 4538789Sahrens /*ARGSUSED*/ 4539789Sahrens static int 45405331Samw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 45415331Samw caller_context_t *ct) 4542789Sahrens { 4543789Sahrens znode_t *zp = VTOZ(vp); 4544789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4545789Sahrens int error; 45465331Samw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4547789Sahrens 45485367Sahrens ZFS_ENTER(zfsvfs); 45495367Sahrens ZFS_VERIFY_ZP(zp); 45505331Samw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4551789Sahrens ZFS_EXIT(zfsvfs); 4552789Sahrens 4553789Sahrens return (error); 4554789Sahrens } 4555789Sahrens 4556789Sahrens /*ARGSUSED*/ 4557789Sahrens static int 45585331Samw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 45595331Samw caller_context_t *ct) 4560789Sahrens { 4561789Sahrens znode_t *zp = VTOZ(vp); 4562789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4563789Sahrens int error; 45645331Samw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4565789Sahrens 45665367Sahrens ZFS_ENTER(zfsvfs); 45675367Sahrens ZFS_VERIFY_ZP(zp); 45685331Samw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4569789Sahrens ZFS_EXIT(zfsvfs); 4570789Sahrens return (error); 4571789Sahrens } 4572789Sahrens 4573789Sahrens /* 4574789Sahrens * Predeclare these here so that the compiler assumes that 4575789Sahrens * this is an "old style" function declaration that does 4576789Sahrens * not include arguments => we won't get type mismatch errors 4577789Sahrens * in the initializations that follow. 4578789Sahrens */ 4579789Sahrens static int zfs_inval(); 4580789Sahrens static int zfs_isdir(); 4581789Sahrens 4582789Sahrens static int 4583789Sahrens zfs_inval() 4584789Sahrens { 4585789Sahrens return (EINVAL); 4586789Sahrens } 4587789Sahrens 4588789Sahrens static int 4589789Sahrens zfs_isdir() 4590789Sahrens { 4591789Sahrens return (EISDIR); 4592789Sahrens } 4593789Sahrens /* 4594789Sahrens * Directory vnode operations template 4595789Sahrens */ 4596789Sahrens vnodeops_t *zfs_dvnodeops; 4597789Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 45983898Srsb VOPNAME_OPEN, { .vop_open = zfs_open }, 45993898Srsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 46003898Srsb VOPNAME_READ, { .error = zfs_isdir }, 46013898Srsb VOPNAME_WRITE, { .error = zfs_isdir }, 46023898Srsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 46033898Srsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 46043898Srsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 46053898Srsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 46063898Srsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 46073898Srsb VOPNAME_CREATE, { .vop_create = zfs_create }, 46083898Srsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 46093898Srsb VOPNAME_LINK, { .vop_link = zfs_link }, 46103898Srsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 46113898Srsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 46123898Srsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 46133898Srsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 46143898Srsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 46153898Srsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 46163898Srsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 46173898Srsb VOPNAME_FID, { .vop_fid = zfs_fid }, 46183898Srsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 46193898Srsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 46203898Srsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 46213898Srsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 46224863Spraks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 46233898Srsb NULL, NULL 4624789Sahrens }; 4625789Sahrens 4626789Sahrens /* 4627789Sahrens * Regular file vnode operations template 4628789Sahrens */ 4629789Sahrens vnodeops_t *zfs_fvnodeops; 4630789Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 46313898Srsb VOPNAME_OPEN, { .vop_open = zfs_open }, 46323898Srsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 46333898Srsb VOPNAME_READ, { .vop_read = zfs_read }, 46343898Srsb VOPNAME_WRITE, { .vop_write = zfs_write }, 46353898Srsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 46363898Srsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 46373898Srsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 46383898Srsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 46393898Srsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 46403898Srsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 46413898Srsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 46423898Srsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 46433898Srsb VOPNAME_FID, { .vop_fid = zfs_fid }, 46443898Srsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 46453898Srsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 46463898Srsb VOPNAME_SPACE, { .vop_space = zfs_space }, 46473898Srsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 46483898Srsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 46493898Srsb VOPNAME_MAP, { .vop_map = zfs_map }, 46503898Srsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 46513898Srsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 46523898Srsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 46533898Srsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 46543898Srsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 46553898Srsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 46563898Srsb NULL, NULL 4657789Sahrens }; 4658789Sahrens 4659789Sahrens /* 4660789Sahrens * Symbolic link vnode operations template 4661789Sahrens */ 4662789Sahrens vnodeops_t *zfs_symvnodeops; 4663789Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 46643898Srsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 46653898Srsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 46663898Srsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 46673898Srsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 46683898Srsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 46693898Srsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 46703898Srsb VOPNAME_FID, { .vop_fid = zfs_fid }, 46713898Srsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 46723898Srsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 46733898Srsb NULL, NULL 4674789Sahrens }; 4675789Sahrens 4676789Sahrens /* 46778845Samw@Sun.COM * special share hidden files vnode operations template 46788845Samw@Sun.COM */ 46798845Samw@Sun.COM vnodeops_t *zfs_sharevnodeops; 46808845Samw@Sun.COM const fs_operation_def_t zfs_sharevnodeops_template[] = { 46818845Samw@Sun.COM VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 46828845Samw@Sun.COM VOPNAME_ACCESS, { .vop_access = zfs_access }, 46838845Samw@Sun.COM VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 46848845Samw@Sun.COM VOPNAME_FID, { .vop_fid = zfs_fid }, 46858845Samw@Sun.COM VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 46868845Samw@Sun.COM VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 46878845Samw@Sun.COM VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 46888845Samw@Sun.COM VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 46898845Samw@Sun.COM NULL, NULL 46908845Samw@Sun.COM }; 46918845Samw@Sun.COM 46928845Samw@Sun.COM /* 4693789Sahrens * Extended attribute directory vnode operations template 4694789Sahrens * This template is identical to the directory vnodes 4695789Sahrens * operation template except for restricted operations: 4696789Sahrens * VOP_MKDIR() 4697789Sahrens * VOP_SYMLINK() 4698789Sahrens * Note that there are other restrictions embedded in: 4699789Sahrens * zfs_create() - restrict type to VREG 4700789Sahrens * zfs_link() - no links into/out of attribute space 4701789Sahrens * zfs_rename() - no moves into/out of attribute space 4702789Sahrens */ 4703789Sahrens vnodeops_t *zfs_xdvnodeops; 4704789Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 47053898Srsb VOPNAME_OPEN, { .vop_open = zfs_open }, 47063898Srsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 47073898Srsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 47083898Srsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 47093898Srsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 47103898Srsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 47113898Srsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 47123898Srsb VOPNAME_CREATE, { .vop_create = zfs_create }, 47133898Srsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 47143898Srsb VOPNAME_LINK, { .vop_link = zfs_link }, 47153898Srsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 47163898Srsb VOPNAME_MKDIR, { .error = zfs_inval }, 47173898Srsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 47183898Srsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 47193898Srsb VOPNAME_SYMLINK, { .error = zfs_inval }, 47203898Srsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 47213898Srsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 47223898Srsb VOPNAME_FID, { .vop_fid = zfs_fid }, 47233898Srsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 47243898Srsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 47253898Srsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 47263898Srsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 47273898Srsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 47283898Srsb NULL, NULL 4729789Sahrens }; 4730789Sahrens 4731789Sahrens /* 4732789Sahrens * Error vnode operations template 4733789Sahrens */ 4734789Sahrens vnodeops_t *zfs_evnodeops; 4735789Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 47363898Srsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 47373898Srsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 47383898Srsb NULL, NULL 4739789Sahrens }; 4740