1789Sahrens /*
2789Sahrens * CDDL HEADER START
3789Sahrens *
4789Sahrens * The contents of this file are subject to the terms of the
51484Sek110237 * Common Development and Distribution License (the "License").
61484Sek110237 * You may not use this file except in compliance with the License.
7789Sahrens *
8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens * or http://www.opensolaris.org/os/licensing.
10789Sahrens * See the License for the specific language governing permissions
11789Sahrens * and limitations under the License.
12789Sahrens *
13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens *
19789Sahrens * CDDL HEADER END
20789Sahrens */
21789Sahrens /*
2212218SMark.Shellenbaum@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23789Sahrens */
24789Sahrens
25789Sahrens #include <sys/types.h>
26789Sahrens #include <sys/param.h>
27789Sahrens #include <sys/time.h>
28789Sahrens #include <sys/systm.h>
29789Sahrens #include <sys/sysmacros.h>
30789Sahrens #include <sys/resource.h>
31789Sahrens #include <sys/vfs.h>
32789Sahrens #include <sys/vnode.h>
33789Sahrens #include <sys/file.h>
34789Sahrens #include <sys/mode.h>
35789Sahrens #include <sys/kmem.h>
36789Sahrens #include <sys/uio.h>
37789Sahrens #include <sys/pathname.h>
38789Sahrens #include <sys/cmn_err.h>
39789Sahrens #include <sys/errno.h>
40789Sahrens #include <sys/stat.h>
41789Sahrens #include <sys/unistd.h>
425498Stimh #include <sys/sunddi.h>
43789Sahrens #include <sys/random.h>
44789Sahrens #include <sys/policy.h>
45789Sahrens #include <sys/zfs_dir.h>
46789Sahrens #include <sys/zfs_acl.h>
47789Sahrens #include <sys/fs/zfs.h>
48789Sahrens #include "fs/fs_subr.h"
49789Sahrens #include <sys/zap.h>
50789Sahrens #include <sys/dmu.h>
51789Sahrens #include <sys/atomic.h>
52789Sahrens #include <sys/zfs_ctldir.h>
535331Samw #include <sys/zfs_fuid.h>
5411935SMark.Shellenbaum@Sun.COM #include <sys/sa.h>
5511935SMark.Shellenbaum@Sun.COM #include <sys/zfs_sa.h>
561484Sek110237 #include <sys/dnlc.h>
575331Samw #include <sys/extdirent.h>
585331Samw
595331Samw /*
605331Samw * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
615331Samw * of names after deciding which is the appropriate lookup interface.
625331Samw */
635331Samw static int
zfs_match_find(zfsvfs_t * zfsvfs,znode_t * dzp,char * name,boolean_t exact,boolean_t update,int * deflags,pathname_t * rpnp,uint64_t * zoid)645331Samw zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
655331Samw boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
665331Samw {
675331Samw int error;
685331Samw
695331Samw if (zfsvfs->z_norm) {
705331Samw matchtype_t mt = MT_FIRST;
715331Samw boolean_t conflict = B_FALSE;
725331Samw size_t bufsz = 0;
735331Samw char *buf = NULL;
745331Samw
755331Samw if (rpnp) {
766492Stimh buf = rpnp->pn_buf;
775331Samw bufsz = rpnp->pn_bufsize;
785331Samw }
795331Samw if (exact)
805331Samw mt = MT_EXACT;
815331Samw /*
825331Samw * In the non-mixed case we only expect there would ever
835331Samw * be one match, but we need to use the normalizing lookup.
845331Samw */
855331Samw error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
865331Samw zoid, mt, buf, bufsz, &conflict);
876492Stimh if (!error && deflags)
885331Samw *deflags = conflict ? ED_CASE_CONFLICT : 0;
895331Samw } else {
905331Samw error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
915331Samw }
925331Samw *zoid = ZFS_DIRENT_OBJ(*zoid);
935331Samw
945331Samw if (error == ENOENT && update)
955331Samw dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
965331Samw
975331Samw return (error);
985331Samw }
99789Sahrens
100789Sahrens /*
101789Sahrens * Lock a directory entry. A dirlock on <dzp, name> protects that name
102789Sahrens * in dzp's directory zap object. As long as you hold a dirlock, you can
103789Sahrens * assume two things: (1) dzp cannot be reaped, and (2) no other thread
104789Sahrens * can change the zap entry for (i.e. link or unlink) this name.
105789Sahrens *
106789Sahrens * Input arguments:
107789Sahrens * dzp - znode for directory
108789Sahrens * name - name of entry to lock
109789Sahrens * flag - ZNEW: if the entry already exists, fail with EEXIST.
110789Sahrens * ZEXISTS: if the entry does not exist, fail with ENOENT.
111789Sahrens * ZSHARED: allow concurrent access with other ZSHARED callers.
112789Sahrens * ZXATTR: we want dzp's xattr directory
1135331Samw * ZCILOOK: On a mixed sensitivity file system,
1145331Samw * this lookup should be case-insensitive.
1155331Samw * ZCIEXACT: On a purely case-insensitive file system,
1165331Samw * this lookup should be case-sensitive.
1175331Samw * ZRENAMING: we are locking for renaming, force narrow locks
11811321SSanjeev.Bagewadi@Sun.COM * ZHAVELOCK: Don't grab the z_name_lock for this call. The
11911321SSanjeev.Bagewadi@Sun.COM * current thread already holds it.
120789Sahrens *
121789Sahrens * Output arguments:
122789Sahrens * zpp - pointer to the znode for the entry (NULL if there isn't one)
123789Sahrens * dlpp - pointer to the dirlock for this entry (NULL on error)
1245331Samw * direntflags - (case-insensitive lookup only)
1255331Samw * flags if multiple case-sensitive matches exist in directory
1265331Samw * realpnp - (case-insensitive lookup only)
1275331Samw * actual name matched within the directory
128789Sahrens *
129789Sahrens * Return value: 0 on success or errno on failure.
130789Sahrens *
131789Sahrens * NOTE: Always checks for, and rejects, '.' and '..'.
1325331Samw * NOTE: For case-insensitive file systems we take wide locks (see below),
1335331Samw * but return znode pointers to a single match.
134789Sahrens */
135789Sahrens int
zfs_dirent_lock(zfs_dirlock_t ** dlpp,znode_t * dzp,char * name,znode_t ** zpp,int flag,int * direntflags,pathname_t * realpnp)136789Sahrens zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
1375331Samw int flag, int *direntflags, pathname_t *realpnp)
138789Sahrens {
139789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
140789Sahrens zfs_dirlock_t *dl;
1415331Samw boolean_t update;
1425331Samw boolean_t exact;
143789Sahrens uint64_t zoid;
1445331Samw vnode_t *vp = NULL;
1455331Samw int error = 0;
1465331Samw int cmpflags;
147789Sahrens
148789Sahrens *zpp = NULL;
149789Sahrens *dlpp = NULL;
150789Sahrens
151789Sahrens /*
152789Sahrens * Verify that we are not trying to lock '.', '..', or '.zfs'
153789Sahrens */
154789Sahrens if (name[0] == '.' &&
155789Sahrens (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
156789Sahrens zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
157789Sahrens return (EEXIST);
158789Sahrens
159789Sahrens /*
1605331Samw * Case sensitivity and normalization preferences are set when
1615331Samw * the file system is created. These are stored in the
1625331Samw * zfsvfs->z_case and zfsvfs->z_norm fields. These choices
1635331Samw * affect what vnodes can be cached in the DNLC, how we
1645331Samw * perform zap lookups, and the "width" of our dirlocks.
1655331Samw *
1665331Samw * A normal dirlock locks a single name. Note that with
1675331Samw * normalization a name can be composed multiple ways, but
1685331Samw * when normalized, these names all compare equal. A wide
1695331Samw * dirlock locks multiple names. We need these when the file
1705331Samw * system is supporting mixed-mode access. It is sometimes
1715331Samw * necessary to lock all case permutations of file name at
1725331Samw * once so that simultaneous case-insensitive/case-sensitive
1735331Samw * behaves as rationally as possible.
1745331Samw */
1755331Samw
1765331Samw /*
1775331Samw * Decide if exact matches should be requested when performing
1785331Samw * a zap lookup on file systems supporting case-insensitive
1795331Samw * access.
1805331Samw */
1815498Stimh exact =
1825498Stimh ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
1835498Stimh ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
1845331Samw
1855331Samw /*
1865331Samw * Only look in or update the DNLC if we are looking for the
1875331Samw * name on a file system that does not require normalization
1885331Samw * or case folding. We can also look there if we happen to be
1895331Samw * on a non-normalizing, mixed sensitivity file system IF we
1905331Samw * are looking for the exact name.
1915331Samw *
1925331Samw * Maybe can add TO-UPPERed version of name to dnlc in ci-only
1935331Samw * case for performance improvement?
1945331Samw */
1955331Samw update = !zfsvfs->z_norm ||
1965498Stimh ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
1975331Samw !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
1985331Samw
1995331Samw /*
2005331Samw * ZRENAMING indicates we are in a situation where we should
2015331Samw * take narrow locks regardless of the file system's
2025331Samw * preferences for normalizing and case folding. This will
2035331Samw * prevent us deadlocking trying to grab the same wide lock
2045331Samw * twice if the two names happen to be case-insensitive
2055331Samw * matches.
2065331Samw */
2075331Samw if (flag & ZRENAMING)
2085331Samw cmpflags = 0;
2095331Samw else
2105331Samw cmpflags = zfsvfs->z_norm;
2115331Samw
2125331Samw /*
213789Sahrens * Wait until there are no locks on this name.
21411321SSanjeev.Bagewadi@Sun.COM *
21511321SSanjeev.Bagewadi@Sun.COM * Don't grab the the lock if it is already held. However, cannot
21611321SSanjeev.Bagewadi@Sun.COM * have both ZSHARED and ZHAVELOCK together.
217789Sahrens */
21811321SSanjeev.Bagewadi@Sun.COM ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
21911321SSanjeev.Bagewadi@Sun.COM if (!(flag & ZHAVELOCK))
22011321SSanjeev.Bagewadi@Sun.COM rw_enter(&dzp->z_name_lock, RW_READER);
22111321SSanjeev.Bagewadi@Sun.COM
222789Sahrens mutex_enter(&dzp->z_lock);
223789Sahrens for (;;) {
2243461Sahrens if (dzp->z_unlinked) {
225789Sahrens mutex_exit(&dzp->z_lock);
22611321SSanjeev.Bagewadi@Sun.COM if (!(flag & ZHAVELOCK))
22711321SSanjeev.Bagewadi@Sun.COM rw_exit(&dzp->z_name_lock);
228789Sahrens return (ENOENT);
229789Sahrens }
2305331Samw for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
2315331Samw if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
2325331Samw U8_UNICODE_LATEST, &error) == 0) || error != 0)
233789Sahrens break;
2345331Samw }
2355331Samw if (error != 0) {
2365331Samw mutex_exit(&dzp->z_lock);
23711321SSanjeev.Bagewadi@Sun.COM if (!(flag & ZHAVELOCK))
23811321SSanjeev.Bagewadi@Sun.COM rw_exit(&dzp->z_name_lock);
2395331Samw return (ENOENT);
2405331Samw }
241789Sahrens if (dl == NULL) {
242789Sahrens /*
243789Sahrens * Allocate a new dirlock and add it to the list.
244789Sahrens */
245789Sahrens dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
246789Sahrens cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
247789Sahrens dl->dl_name = name;
248789Sahrens dl->dl_sharecnt = 0;
24911321SSanjeev.Bagewadi@Sun.COM dl->dl_namelock = 0;
250789Sahrens dl->dl_namesize = 0;
251789Sahrens dl->dl_dzp = dzp;
252789Sahrens dl->dl_next = dzp->z_dirlocks;
253789Sahrens dzp->z_dirlocks = dl;
254789Sahrens break;
255789Sahrens }
256789Sahrens if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
257789Sahrens break;
258789Sahrens cv_wait(&dl->dl_cv, &dzp->z_lock);
259789Sahrens }
260789Sahrens
26111321SSanjeev.Bagewadi@Sun.COM /*
26211321SSanjeev.Bagewadi@Sun.COM * If the z_name_lock was NOT held for this dirlock record it.
26311321SSanjeev.Bagewadi@Sun.COM */
26411321SSanjeev.Bagewadi@Sun.COM if (flag & ZHAVELOCK)
26511321SSanjeev.Bagewadi@Sun.COM dl->dl_namelock = 1;
26611321SSanjeev.Bagewadi@Sun.COM
267789Sahrens if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
268789Sahrens /*
269789Sahrens * We're the second shared reference to dl. Make a copy of
270789Sahrens * dl_name in case the first thread goes away before we do.
271789Sahrens * Note that we initialize the new name before storing its
272789Sahrens * pointer into dl_name, because the first thread may load
273789Sahrens * dl->dl_name at any time. He'll either see the old value,
274789Sahrens * which is his, or the new shared copy; either is OK.
275789Sahrens */
276789Sahrens dl->dl_namesize = strlen(dl->dl_name) + 1;
277789Sahrens name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
278789Sahrens bcopy(dl->dl_name, name, dl->dl_namesize);
279789Sahrens dl->dl_name = name;
280789Sahrens }
281789Sahrens
282789Sahrens mutex_exit(&dzp->z_lock);
283789Sahrens
284789Sahrens /*
285789Sahrens * We have a dirlock on the name. (Note that it is the dirlock,
286789Sahrens * not the dzp's z_lock, that protects the name in the zap object.)
287789Sahrens * See if there's an object by this name; if so, put a hold on it.
288789Sahrens */
289789Sahrens if (flag & ZXATTR) {
29011935SMark.Shellenbaum@Sun.COM error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
29111935SMark.Shellenbaum@Sun.COM sizeof (zoid));
29211935SMark.Shellenbaum@Sun.COM if (error == 0)
29311935SMark.Shellenbaum@Sun.COM error = (zoid == 0 ? ENOENT : 0);
294789Sahrens } else {
2955331Samw if (update)
2965331Samw vp = dnlc_lookup(ZTOV(dzp), name);
2971484Sek110237 if (vp == DNLC_NO_VNODE) {
2981484Sek110237 VN_RELE(vp);
2991484Sek110237 error = ENOENT;
3001484Sek110237 } else if (vp) {
3011484Sek110237 if (flag & ZNEW) {
3021484Sek110237 zfs_dirent_unlock(dl);
3031484Sek110237 VN_RELE(vp);
3041484Sek110237 return (EEXIST);
3051484Sek110237 }
3061484Sek110237 *dlpp = dl;
3071484Sek110237 *zpp = VTOZ(vp);
3081484Sek110237 return (0);
3091484Sek110237 } else {
3105331Samw error = zfs_match_find(zfsvfs, dzp, name, exact,
3115331Samw update, direntflags, realpnp, &zoid);
3121484Sek110237 }
313789Sahrens }
314789Sahrens if (error) {
315789Sahrens if (error != ENOENT || (flag & ZEXISTS)) {
316789Sahrens zfs_dirent_unlock(dl);
317789Sahrens return (error);
318789Sahrens }
319789Sahrens } else {
320789Sahrens if (flag & ZNEW) {
321789Sahrens zfs_dirent_unlock(dl);
322789Sahrens return (EEXIST);
323789Sahrens }
324789Sahrens error = zfs_zget(zfsvfs, zoid, zpp);
325789Sahrens if (error) {
326789Sahrens zfs_dirent_unlock(dl);
327789Sahrens return (error);
328789Sahrens }
3295331Samw if (!(flag & ZXATTR) && update)
3301484Sek110237 dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
331789Sahrens }
332789Sahrens
333789Sahrens *dlpp = dl;
334789Sahrens
335789Sahrens return (0);
336789Sahrens }
337789Sahrens
338789Sahrens /*
339789Sahrens * Unlock this directory entry and wake anyone who was waiting for it.
340789Sahrens */
341789Sahrens void
zfs_dirent_unlock(zfs_dirlock_t * dl)342789Sahrens zfs_dirent_unlock(zfs_dirlock_t *dl)
343789Sahrens {
344789Sahrens znode_t *dzp = dl->dl_dzp;
345789Sahrens zfs_dirlock_t **prev_dl, *cur_dl;
346789Sahrens
347789Sahrens mutex_enter(&dzp->z_lock);
34811321SSanjeev.Bagewadi@Sun.COM
34911321SSanjeev.Bagewadi@Sun.COM if (!dl->dl_namelock)
35011321SSanjeev.Bagewadi@Sun.COM rw_exit(&dzp->z_name_lock);
35111321SSanjeev.Bagewadi@Sun.COM
352789Sahrens if (dl->dl_sharecnt > 1) {
353789Sahrens dl->dl_sharecnt--;
354789Sahrens mutex_exit(&dzp->z_lock);
355789Sahrens return;
356789Sahrens }
357789Sahrens prev_dl = &dzp->z_dirlocks;
358789Sahrens while ((cur_dl = *prev_dl) != dl)
359789Sahrens prev_dl = &cur_dl->dl_next;
360789Sahrens *prev_dl = dl->dl_next;
361789Sahrens cv_broadcast(&dl->dl_cv);
362789Sahrens mutex_exit(&dzp->z_lock);
363789Sahrens
364789Sahrens if (dl->dl_namesize != 0)
365789Sahrens kmem_free(dl->dl_name, dl->dl_namesize);
366789Sahrens cv_destroy(&dl->dl_cv);
367789Sahrens kmem_free(dl, sizeof (*dl));
368789Sahrens }
369789Sahrens
370789Sahrens /*
371789Sahrens * Look up an entry in a directory.
372789Sahrens *
373789Sahrens * NOTE: '.' and '..' are handled as special cases because
374789Sahrens * no directory entries are actually stored for them. If this is
375789Sahrens * the root of a filesystem, then '.zfs' is also treated as a
376789Sahrens * special pseudo-directory.
377789Sahrens */
378789Sahrens int
zfs_dirlook(znode_t * dzp,char * name,vnode_t ** vpp,int flags,int * deflg,pathname_t * rpnp)3795331Samw zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
3805331Samw int *deflg, pathname_t *rpnp)
381789Sahrens {
382789Sahrens zfs_dirlock_t *dl;
383789Sahrens znode_t *zp;
384789Sahrens int error = 0;
38511935SMark.Shellenbaum@Sun.COM uint64_t parent;
386789Sahrens
387789Sahrens if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
388789Sahrens *vpp = ZTOV(dzp);
389789Sahrens VN_HOLD(*vpp);
390789Sahrens } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
391789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
39211935SMark.Shellenbaum@Sun.COM
393789Sahrens /*
394789Sahrens * If we are a snapshot mounted under .zfs, return
395789Sahrens * the vp for the snapshot directory.
396789Sahrens */
39711935SMark.Shellenbaum@Sun.COM if ((error = sa_lookup(dzp->z_sa_hdl,
39811935SMark.Shellenbaum@Sun.COM SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
39911935SMark.Shellenbaum@Sun.COM return (error);
40011935SMark.Shellenbaum@Sun.COM if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
401789Sahrens error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
4025331Samw "snapshot", vpp, NULL, 0, NULL, kcred,
4035331Samw NULL, NULL, NULL);
404789Sahrens return (error);
405789Sahrens }
406789Sahrens rw_enter(&dzp->z_parent_lock, RW_READER);
40711935SMark.Shellenbaum@Sun.COM error = zfs_zget(zfsvfs, parent, &zp);
408789Sahrens if (error == 0)
409789Sahrens *vpp = ZTOV(zp);
410789Sahrens rw_exit(&dzp->z_parent_lock);
411789Sahrens } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
412789Sahrens *vpp = zfsctl_root(dzp);
413789Sahrens } else {
4145331Samw int zf;
4155331Samw
4165331Samw zf = ZEXISTS | ZSHARED;
4175331Samw if (flags & FIGNORECASE)
4185331Samw zf |= ZCILOOK;
4195331Samw
4205331Samw error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
421789Sahrens if (error == 0) {
422789Sahrens *vpp = ZTOV(zp);
423789Sahrens zfs_dirent_unlock(dl);
424869Sperrin dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
425789Sahrens }
4265331Samw rpnp = NULL;
427789Sahrens }
428789Sahrens
4296492Stimh if ((flags & FIGNORECASE) && rpnp && !error)
4306492Stimh (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
4315331Samw
432789Sahrens return (error);
433789Sahrens }
434789Sahrens
4351544Seschrock /*
4363461Sahrens * unlinked Set (formerly known as the "delete queue") Error Handling
4371544Seschrock *
4383461Sahrens * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
4391544Seschrock * don't specify the name of the entry that we will be manipulating. We
4401544Seschrock * also fib and say that we won't be adding any new entries to the
4413461Sahrens * unlinked set, even though we might (this is to lower the minimum file
4421544Seschrock * size that can be deleted in a full filesystem). So on the small
4433461Sahrens * chance that the nlink list is using a fat zap (ie. has more than
4441544Seschrock * 2000 entries), we *may* not pre-read a block that's needed.
4451544Seschrock * Therefore it is remotely possible for some of the assertions
4463461Sahrens * regarding the unlinked set below to fail due to i/o error. On a
4471544Seschrock * nondebug system, this will result in the space being leaked.
4481544Seschrock */
449789Sahrens void
zfs_unlinked_add(znode_t * zp,dmu_tx_t * tx)4503461Sahrens zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
451789Sahrens {
452789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs;
453789Sahrens
4543461Sahrens ASSERT(zp->z_unlinked);
45511935SMark.Shellenbaum@Sun.COM ASSERT(zp->z_links == 0);
456789Sahrens
4577046Sahrens VERIFY3U(0, ==,
4587046Sahrens zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
4596992Smaybee }
4606992Smaybee
461789Sahrens /*
4623461Sahrens * Clean up any znodes that had no links when we either crashed or
4633461Sahrens * (force) umounted the file system.
4643461Sahrens */
4653461Sahrens void
zfs_unlinked_drain(zfsvfs_t * zfsvfs)4663461Sahrens zfs_unlinked_drain(zfsvfs_t *zfsvfs)
4673461Sahrens {
4683461Sahrens zap_cursor_t zc;
4693461Sahrens zap_attribute_t zap;
4703461Sahrens dmu_object_info_t doi;
4713461Sahrens znode_t *zp;
4723461Sahrens int error;
4733461Sahrens
4743461Sahrens /*
4753461Sahrens * Interate over the contents of the unlinked set.
4763461Sahrens */
4773461Sahrens for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
4783461Sahrens zap_cursor_retrieve(&zc, &zap) == 0;
4793461Sahrens zap_cursor_advance(&zc)) {
4803461Sahrens
4813461Sahrens /*
4823461Sahrens * See what kind of object we have in list
4833461Sahrens */
4843461Sahrens
4853461Sahrens error = dmu_object_info(zfsvfs->z_os,
4863461Sahrens zap.za_first_integer, &doi);
4873461Sahrens if (error != 0)
4883461Sahrens continue;
4893461Sahrens
4903461Sahrens ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
4913461Sahrens (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
4923461Sahrens /*
4933461Sahrens * We need to re-mark these list entries for deletion,
4943461Sahrens * so we pull them back into core and set zp->z_unlinked.
4953461Sahrens */
4963461Sahrens error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
4973461Sahrens
4983461Sahrens /*
4993461Sahrens * We may pick up znodes that are already marked for deletion.
5003461Sahrens * This could happen during the purge of an extended attribute
5013461Sahrens * directory. All we need to do is skip over them, since they
5023461Sahrens * are already in the system marked z_unlinked.
5033461Sahrens */
5043461Sahrens if (error != 0)
5053461Sahrens continue;
5063461Sahrens
5073461Sahrens zp->z_unlinked = B_TRUE;
5083461Sahrens VN_RELE(ZTOV(zp));
5093461Sahrens }
5103461Sahrens zap_cursor_fini(&zc);
5113461Sahrens }
5123461Sahrens
5133461Sahrens /*
514789Sahrens * Delete the entire contents of a directory. Return a count
5155860Sck153898 * of the number of entries that could not be deleted. If we encounter
5165860Sck153898 * an error, return a count of at least one so that the directory stays
5175860Sck153898 * in the unlinked set.
518789Sahrens *
519789Sahrens * NOTE: this function assumes that the directory is inactive,
520789Sahrens * so there is no need to lock its entries before deletion.
521789Sahrens * Also, it assumes the directory contents is *only* regular
522789Sahrens * files.
523789Sahrens */
524789Sahrens static int
zfs_purgedir(znode_t * dzp)525789Sahrens zfs_purgedir(znode_t *dzp)
526789Sahrens {
527789Sahrens zap_cursor_t zc;
528789Sahrens zap_attribute_t zap;
529789Sahrens znode_t *xzp;
530789Sahrens dmu_tx_t *tx;
531789Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
532789Sahrens zfs_dirlock_t dl;
533789Sahrens int skipped = 0;
534789Sahrens int error;
535789Sahrens
536789Sahrens for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
537789Sahrens (error = zap_cursor_retrieve(&zc, &zap)) == 0;
538789Sahrens zap_cursor_advance(&zc)) {
5393912Slling error = zfs_zget(zfsvfs,
5403912Slling ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
5415860Sck153898 if (error) {
5425860Sck153898 skipped += 1;
5435860Sck153898 continue;
5445860Sck153898 }
545789Sahrens
546789Sahrens ASSERT((ZTOV(xzp)->v_type == VREG) ||
547789Sahrens (ZTOV(xzp)->v_type == VLNK));
548789Sahrens
549789Sahrens tx = dmu_tx_create(zfsvfs->z_os);
55011935SMark.Shellenbaum@Sun.COM dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
5511544Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
55211935SMark.Shellenbaum@Sun.COM dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
5533461Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
55411935SMark.Shellenbaum@Sun.COM /* Is this really needed ? */
55511935SMark.Shellenbaum@Sun.COM zfs_sa_upgrade_txholds(tx, xzp);
556789Sahrens error = dmu_tx_assign(tx, TXG_WAIT);
557789Sahrens if (error) {
558789Sahrens dmu_tx_abort(tx);
559789Sahrens VN_RELE(ZTOV(xzp));
560789Sahrens skipped += 1;
561789Sahrens continue;
562789Sahrens }
563789Sahrens bzero(&dl, sizeof (dl));
564789Sahrens dl.dl_dzp = dzp;
565789Sahrens dl.dl_name = zap.za_name;
566789Sahrens
567789Sahrens error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
5685860Sck153898 if (error)
5695860Sck153898 skipped += 1;
570789Sahrens dmu_tx_commit(tx);
571789Sahrens
572789Sahrens VN_RELE(ZTOV(xzp));
573789Sahrens }
574885Sahrens zap_cursor_fini(&zc);
5755860Sck153898 if (error != ENOENT)
5765860Sck153898 skipped += 1;
577789Sahrens return (skipped);
578789Sahrens }
579789Sahrens
580789Sahrens void
zfs_rmnode(znode_t * zp)581789Sahrens zfs_rmnode(znode_t *zp)
582789Sahrens {
583789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs;
584789Sahrens objset_t *os = zfsvfs->z_os;
585789Sahrens znode_t *xzp = NULL;
586789Sahrens dmu_tx_t *tx;
587789Sahrens uint64_t acl_obj;
58811935SMark.Shellenbaum@Sun.COM uint64_t xattr_obj;
589789Sahrens int error;
590789Sahrens
59111935SMark.Shellenbaum@Sun.COM ASSERT(zp->z_links == 0);
592789Sahrens ASSERT(ZTOV(zp)->v_count == 0);
593789Sahrens
594789Sahrens /*
595789Sahrens * If this is an attribute directory, purge its contents.
596789Sahrens */
59711935SMark.Shellenbaum@Sun.COM if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) {
598789Sahrens if (zfs_purgedir(zp) != 0) {
599789Sahrens /*
6003461Sahrens * Not enough space to delete some xattrs.
6016992Smaybee * Leave it in the unlinked set.
602789Sahrens */
6035745Smaybee zfs_znode_dmu_fini(zp);
6045745Smaybee zfs_znode_free(zp);
605789Sahrens return;
606789Sahrens }
6073461Sahrens }
608789Sahrens
609789Sahrens /*
6106992Smaybee * Free up all the data in the file.
6116992Smaybee */
6126992Smaybee error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
6136992Smaybee if (error) {
6146992Smaybee /*
6156992Smaybee * Not enough space. Leave the file in the unlinked set.
6166992Smaybee */
6176992Smaybee zfs_znode_dmu_fini(zp);
6186992Smaybee zfs_znode_free(zp);
6196992Smaybee return;
6206992Smaybee }
6216992Smaybee
6226992Smaybee /*
6233461Sahrens * If the file has extended attributes, we're going to unlink
6243461Sahrens * the xattr dir.
625789Sahrens */
62611935SMark.Shellenbaum@Sun.COM error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
62711935SMark.Shellenbaum@Sun.COM &xattr_obj, sizeof (xattr_obj));
62811935SMark.Shellenbaum@Sun.COM if (error == 0 && xattr_obj) {
62911935SMark.Shellenbaum@Sun.COM error = zfs_zget(zfsvfs, xattr_obj, &xzp);
630789Sahrens ASSERT(error == 0);
631789Sahrens }
632789Sahrens
63312620SMark.Shellenbaum@Oracle.COM acl_obj = zfs_external_acl(zp);
634789Sahrens
635789Sahrens /*
6366992Smaybee * Set up the final transaction.
637789Sahrens */
638789Sahrens tx = dmu_tx_create(os);
639789Sahrens dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
6403461Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
641789Sahrens if (xzp) {
6423461Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
64311935SMark.Shellenbaum@Sun.COM dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
644789Sahrens }
645789Sahrens if (acl_obj)
646789Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
64711935SMark.Shellenbaum@Sun.COM
64811935SMark.Shellenbaum@Sun.COM zfs_sa_upgrade_txholds(tx, zp);
649789Sahrens error = dmu_tx_assign(tx, TXG_WAIT);
650789Sahrens if (error) {
6513461Sahrens /*
6523461Sahrens * Not enough space to delete the file. Leave it in the
6533461Sahrens * unlinked set, leaking it until the fs is remounted (at
6543461Sahrens * which point we'll call zfs_unlinked_drain() to process it).
6553461Sahrens */
656789Sahrens dmu_tx_abort(tx);
6575745Smaybee zfs_znode_dmu_fini(zp);
6585745Smaybee zfs_znode_free(zp);
6595745Smaybee goto out;
660789Sahrens }
661789Sahrens
662789Sahrens if (xzp) {
66311935SMark.Shellenbaum@Sun.COM ASSERT(error == 0);
664789Sahrens mutex_enter(&xzp->z_lock);
6653461Sahrens xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
66611935SMark.Shellenbaum@Sun.COM xzp->z_links = 0; /* no more links to it */
66711935SMark.Shellenbaum@Sun.COM VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
66811935SMark.Shellenbaum@Sun.COM &xzp->z_links, sizeof (xzp->z_links), tx));
669789Sahrens mutex_exit(&xzp->z_lock);
6703461Sahrens zfs_unlinked_add(xzp, tx);
671789Sahrens }
672789Sahrens
6733461Sahrens /* Remove this znode from the unlinked set */
6747046Sahrens VERIFY3U(0, ==,
6757046Sahrens zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
676789Sahrens
677789Sahrens zfs_znode_delete(zp, tx);
678789Sahrens
679789Sahrens dmu_tx_commit(tx);
6805745Smaybee out:
681789Sahrens if (xzp)
682789Sahrens VN_RELE(ZTOV(xzp));
683789Sahrens }
684789Sahrens
6854577Sahrens static uint64_t
zfs_dirent(znode_t * zp,uint64_t mode)68611935SMark.Shellenbaum@Sun.COM zfs_dirent(znode_t *zp, uint64_t mode)
6874577Sahrens {
6884577Sahrens uint64_t de = zp->z_id;
68911935SMark.Shellenbaum@Sun.COM
6904577Sahrens if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
69111935SMark.Shellenbaum@Sun.COM de |= IFTODT(mode) << 60;
6924577Sahrens return (de);
6934577Sahrens }
6944577Sahrens
695789Sahrens /*
6963461Sahrens * Link zp into dl. Can only fail if zp has been unlinked.
697789Sahrens */
698789Sahrens int
zfs_link_create(zfs_dirlock_t * dl,znode_t * zp,dmu_tx_t * tx,int flag)699789Sahrens zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
700789Sahrens {
701789Sahrens znode_t *dzp = dl->dl_dzp;
70211935SMark.Shellenbaum@Sun.COM zfsvfs_t *zfsvfs = zp->z_zfsvfs;
703789Sahrens vnode_t *vp = ZTOV(zp);
7043912Slling uint64_t value;
705789Sahrens int zp_is_dir = (vp->v_type == VDIR);
70611935SMark.Shellenbaum@Sun.COM sa_bulk_attr_t bulk[5];
70711935SMark.Shellenbaum@Sun.COM uint64_t mtime[2], ctime[2];
70811935SMark.Shellenbaum@Sun.COM int count = 0;
709789Sahrens int error;
710789Sahrens
711789Sahrens mutex_enter(&zp->z_lock);
712789Sahrens
713789Sahrens if (!(flag & ZRENAMING)) {
7143461Sahrens if (zp->z_unlinked) { /* no new links to unlinked zp */
715789Sahrens ASSERT(!(flag & (ZNEW | ZEXISTS)));
716789Sahrens mutex_exit(&zp->z_lock);
717789Sahrens return (ENOENT);
718789Sahrens }
71911935SMark.Shellenbaum@Sun.COM zp->z_links++;
72011935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
72111935SMark.Shellenbaum@Sun.COM &zp->z_links, sizeof (zp->z_links));
72211935SMark.Shellenbaum@Sun.COM
723789Sahrens }
72411935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
72511935SMark.Shellenbaum@Sun.COM &dzp->z_id, sizeof (dzp->z_id));
72611935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
72711935SMark.Shellenbaum@Sun.COM &zp->z_pflags, sizeof (zp->z_pflags));
728789Sahrens
72911935SMark.Shellenbaum@Sun.COM if (!(flag & ZNEW)) {
73011935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
73111935SMark.Shellenbaum@Sun.COM ctime, sizeof (ctime));
73211935SMark.Shellenbaum@Sun.COM zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
73311935SMark.Shellenbaum@Sun.COM ctime, B_TRUE);
73411935SMark.Shellenbaum@Sun.COM }
73511935SMark.Shellenbaum@Sun.COM error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
73611935SMark.Shellenbaum@Sun.COM ASSERT(error == 0);
73711935SMark.Shellenbaum@Sun.COM
738789Sahrens mutex_exit(&zp->z_lock);
739789Sahrens
740789Sahrens mutex_enter(&dzp->z_lock);
74111935SMark.Shellenbaum@Sun.COM dzp->z_size++;
74211935SMark.Shellenbaum@Sun.COM dzp->z_links += zp_is_dir;
74311935SMark.Shellenbaum@Sun.COM count = 0;
74411935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
74511935SMark.Shellenbaum@Sun.COM &dzp->z_size, sizeof (dzp->z_size));
74611935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
74711935SMark.Shellenbaum@Sun.COM &dzp->z_links, sizeof (dzp->z_links));
74811935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
74911935SMark.Shellenbaum@Sun.COM mtime, sizeof (mtime));
75011935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
75111935SMark.Shellenbaum@Sun.COM ctime, sizeof (ctime));
75211935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
75311935SMark.Shellenbaum@Sun.COM &dzp->z_pflags, sizeof (dzp->z_pflags));
75411935SMark.Shellenbaum@Sun.COM zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
75511935SMark.Shellenbaum@Sun.COM error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
75611935SMark.Shellenbaum@Sun.COM ASSERT(error == 0);
757789Sahrens mutex_exit(&dzp->z_lock);
758789Sahrens
75911935SMark.Shellenbaum@Sun.COM value = zfs_dirent(zp, zp->z_mode);
760789Sahrens error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
7613912Slling 8, 1, &value, tx);
762789Sahrens ASSERT(error == 0);
763789Sahrens
7641484Sek110237 dnlc_update(ZTOV(dzp), dl->dl_name, vp);
7651484Sek110237
766789Sahrens return (0);
767789Sahrens }
768789Sahrens
76912413SSam.Falkner@Sun.COM static int
zfs_dropname(zfs_dirlock_t * dl,znode_t * zp,znode_t * dzp,dmu_tx_t * tx,int flag)77012413SSam.Falkner@Sun.COM zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
77112413SSam.Falkner@Sun.COM int flag)
77212413SSam.Falkner@Sun.COM {
77312413SSam.Falkner@Sun.COM int error;
77412413SSam.Falkner@Sun.COM
77512413SSam.Falkner@Sun.COM if (zp->z_zfsvfs->z_norm) {
77612413SSam.Falkner@Sun.COM if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
77712413SSam.Falkner@Sun.COM (flag & ZCIEXACT)) ||
77812413SSam.Falkner@Sun.COM ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
77912413SSam.Falkner@Sun.COM !(flag & ZCILOOK)))
78012413SSam.Falkner@Sun.COM error = zap_remove_norm(zp->z_zfsvfs->z_os,
78112413SSam.Falkner@Sun.COM dzp->z_id, dl->dl_name, MT_EXACT, tx);
78212413SSam.Falkner@Sun.COM else
78312413SSam.Falkner@Sun.COM error = zap_remove_norm(zp->z_zfsvfs->z_os,
78412413SSam.Falkner@Sun.COM dzp->z_id, dl->dl_name, MT_FIRST, tx);
78512413SSam.Falkner@Sun.COM } else {
78612413SSam.Falkner@Sun.COM error = zap_remove(zp->z_zfsvfs->z_os,
78712413SSam.Falkner@Sun.COM dzp->z_id, dl->dl_name, tx);
78812413SSam.Falkner@Sun.COM }
78912413SSam.Falkner@Sun.COM
79012413SSam.Falkner@Sun.COM return (error);
79112413SSam.Falkner@Sun.COM }
79212413SSam.Falkner@Sun.COM
793789Sahrens /*
7943461Sahrens * Unlink zp from dl, and mark zp for deletion if this was the last link.
795789Sahrens * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
7963461Sahrens * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
7973461Sahrens * If it's non-NULL, we use it to indicate whether the znode needs deletion,
798789Sahrens * and it's the caller's job to do it.
799789Sahrens */
800789Sahrens int
zfs_link_destroy(zfs_dirlock_t * dl,znode_t * zp,dmu_tx_t * tx,int flag,boolean_t * unlinkedp)801789Sahrens zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
8023461Sahrens boolean_t *unlinkedp)
803789Sahrens {
804789Sahrens znode_t *dzp = dl->dl_dzp;
80511935SMark.Shellenbaum@Sun.COM zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
806789Sahrens vnode_t *vp = ZTOV(zp);
807789Sahrens int zp_is_dir = (vp->v_type == VDIR);
8083461Sahrens boolean_t unlinked = B_FALSE;
80911935SMark.Shellenbaum@Sun.COM sa_bulk_attr_t bulk[5];
81011935SMark.Shellenbaum@Sun.COM uint64_t mtime[2], ctime[2];
81111935SMark.Shellenbaum@Sun.COM int count = 0;
812789Sahrens int error;
813789Sahrens
8141484Sek110237 dnlc_remove(ZTOV(dzp), dl->dl_name);
8151484Sek110237
816789Sahrens if (!(flag & ZRENAMING)) {
817789Sahrens if (vn_vfswlock(vp)) /* prevent new mounts on zp */
818789Sahrens return (EBUSY);
819789Sahrens
820789Sahrens if (vn_ismntpt(vp)) { /* don't remove mount point */
821789Sahrens vn_vfsunlock(vp);
822789Sahrens return (EBUSY);
823789Sahrens }
824789Sahrens
825789Sahrens mutex_enter(&zp->z_lock);
82611935SMark.Shellenbaum@Sun.COM
82711935SMark.Shellenbaum@Sun.COM if (zp_is_dir && !zfs_dirempty(zp)) {
828789Sahrens mutex_exit(&zp->z_lock);
829789Sahrens vn_vfsunlock(vp);
830789Sahrens return (EEXIST);
831789Sahrens }
83211935SMark.Shellenbaum@Sun.COM
83312413SSam.Falkner@Sun.COM /*
83412413SSam.Falkner@Sun.COM * If we get here, we are going to try to remove the object.
83512413SSam.Falkner@Sun.COM * First try removing the name from the directory; if that
83612413SSam.Falkner@Sun.COM * fails, return the error.
83712413SSam.Falkner@Sun.COM */
83812413SSam.Falkner@Sun.COM error = zfs_dropname(dl, zp, dzp, tx, flag);
83912413SSam.Falkner@Sun.COM if (error != 0) {
84012413SSam.Falkner@Sun.COM mutex_exit(&zp->z_lock);
84112413SSam.Falkner@Sun.COM vn_vfsunlock(vp);
84212413SSam.Falkner@Sun.COM return (error);
84312413SSam.Falkner@Sun.COM }
84412413SSam.Falkner@Sun.COM
84511935SMark.Shellenbaum@Sun.COM if (zp->z_links <= zp_is_dir) {
8463713Sahrens zfs_panic_recover("zfs: link count on %s is %u, "
8473713Sahrens "should be at least %u",
8483713Sahrens zp->z_vnode->v_path ? zp->z_vnode->v_path :
84911935SMark.Shellenbaum@Sun.COM "<unknown>", (int)zp->z_links,
8503713Sahrens zp_is_dir + 1);
85111935SMark.Shellenbaum@Sun.COM zp->z_links = zp_is_dir + 1;
8523713Sahrens }
85311935SMark.Shellenbaum@Sun.COM if (--zp->z_links == zp_is_dir) {
8543461Sahrens zp->z_unlinked = B_TRUE;
85511935SMark.Shellenbaum@Sun.COM zp->z_links = 0;
8563461Sahrens unlinked = B_TRUE;
857789Sahrens } else {
85811935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
85911935SMark.Shellenbaum@Sun.COM NULL, &ctime, sizeof (ctime));
86011935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
86111935SMark.Shellenbaum@Sun.COM NULL, &zp->z_pflags, sizeof (zp->z_pflags));
86211935SMark.Shellenbaum@Sun.COM zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
86311935SMark.Shellenbaum@Sun.COM B_TRUE);
864789Sahrens }
86511935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
86611935SMark.Shellenbaum@Sun.COM NULL, &zp->z_links, sizeof (zp->z_links));
86711935SMark.Shellenbaum@Sun.COM error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
86811935SMark.Shellenbaum@Sun.COM count = 0;
86911935SMark.Shellenbaum@Sun.COM ASSERT(error == 0);
870789Sahrens mutex_exit(&zp->z_lock);
871789Sahrens vn_vfsunlock(vp);
87212413SSam.Falkner@Sun.COM } else {
87312413SSam.Falkner@Sun.COM error = zfs_dropname(dl, zp, dzp, tx, flag);
87412413SSam.Falkner@Sun.COM if (error != 0)
87512413SSam.Falkner@Sun.COM return (error);
876789Sahrens }
877789Sahrens
878789Sahrens mutex_enter(&dzp->z_lock);
87911935SMark.Shellenbaum@Sun.COM dzp->z_size--; /* one dirent removed */
88011935SMark.Shellenbaum@Sun.COM dzp->z_links -= zp_is_dir; /* ".." link from zp */
88111935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
88211935SMark.Shellenbaum@Sun.COM NULL, &dzp->z_links, sizeof (dzp->z_links));
88311935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
88411935SMark.Shellenbaum@Sun.COM NULL, &dzp->z_size, sizeof (dzp->z_size));
88511935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
88611935SMark.Shellenbaum@Sun.COM NULL, ctime, sizeof (ctime));
88711935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
88811935SMark.Shellenbaum@Sun.COM NULL, mtime, sizeof (mtime));
88911935SMark.Shellenbaum@Sun.COM SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
89011935SMark.Shellenbaum@Sun.COM NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
89111935SMark.Shellenbaum@Sun.COM zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
89211935SMark.Shellenbaum@Sun.COM error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
89311935SMark.Shellenbaum@Sun.COM ASSERT(error == 0);
894789Sahrens mutex_exit(&dzp->z_lock);
895789Sahrens
8963461Sahrens if (unlinkedp != NULL)
8973461Sahrens *unlinkedp = unlinked;
8983461Sahrens else if (unlinked)
8993461Sahrens zfs_unlinked_add(zp, tx);
900789Sahrens
901789Sahrens return (0);
902789Sahrens }
903789Sahrens
904789Sahrens /*
905789Sahrens * Indicate whether the directory is empty. Works with or without z_lock
906789Sahrens * held, but can only be consider a hint in the latter case. Returns true
907789Sahrens * if only "." and ".." remain and there's no work in progress.
908789Sahrens */
909789Sahrens boolean_t
zfs_dirempty(znode_t * dzp)910789Sahrens zfs_dirempty(znode_t *dzp)
911789Sahrens {
91211935SMark.Shellenbaum@Sun.COM return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
913789Sahrens }
914789Sahrens
915789Sahrens int
zfs_make_xattrdir(znode_t * zp,vattr_t * vap,vnode_t ** xvpp,cred_t * cr)916789Sahrens zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
917789Sahrens {
918789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs;
919789Sahrens znode_t *xzp;
920789Sahrens dmu_tx_t *tx;
921789Sahrens int error;
9229179SMark.Shellenbaum@Sun.COM zfs_acl_ids_t acl_ids;
9239179SMark.Shellenbaum@Sun.COM boolean_t fuid_dirtied;
92411935SMark.Shellenbaum@Sun.COM uint64_t parent;
925789Sahrens
926789Sahrens *xvpp = NULL;
927789Sahrens
9285331Samw if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
929789Sahrens return (error);
930789Sahrens
9319179SMark.Shellenbaum@Sun.COM if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
9329179SMark.Shellenbaum@Sun.COM &acl_ids)) != 0)
9339179SMark.Shellenbaum@Sun.COM return (error);
9349396SMatthew.Ahrens@Sun.COM if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
9359396SMatthew.Ahrens@Sun.COM zfs_acl_ids_free(&acl_ids);
9369396SMatthew.Ahrens@Sun.COM return (EDQUOT);
9379396SMatthew.Ahrens@Sun.COM }
9389179SMark.Shellenbaum@Sun.COM
93912302SMark.Shellenbaum@Sun.COM top:
940789Sahrens tx = dmu_tx_create(zfsvfs->z_os);
94111935SMark.Shellenbaum@Sun.COM dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
94211935SMark.Shellenbaum@Sun.COM ZFS_SA_BASE_ATTR_SIZE);
94311935SMark.Shellenbaum@Sun.COM dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
9441544Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
9459179SMark.Shellenbaum@Sun.COM fuid_dirtied = zfsvfs->z_fuid_dirty;
9469396SMatthew.Ahrens@Sun.COM if (fuid_dirtied)
9479396SMatthew.Ahrens@Sun.COM zfs_fuid_txhold(zfsvfs, tx);
9488227SNeil.Perrin@Sun.COM error = dmu_tx_assign(tx, TXG_NOWAIT);
949789Sahrens if (error) {
95012302SMark.Shellenbaum@Sun.COM if (error == ERESTART) {
95112302SMark.Shellenbaum@Sun.COM dmu_tx_wait(tx);
95212302SMark.Shellenbaum@Sun.COM dmu_tx_abort(tx);
95312302SMark.Shellenbaum@Sun.COM goto top;
95412302SMark.Shellenbaum@Sun.COM }
9559179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids);
956789Sahrens dmu_tx_abort(tx);
957789Sahrens return (error);
958789Sahrens }
95911935SMark.Shellenbaum@Sun.COM zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
9609179SMark.Shellenbaum@Sun.COM
9619179SMark.Shellenbaum@Sun.COM if (fuid_dirtied)
9629179SMark.Shellenbaum@Sun.COM zfs_fuid_sync(zfsvfs, tx);
9639179SMark.Shellenbaum@Sun.COM
96412218SMark.Shellenbaum@Sun.COM #ifdef DEBUG
96512218SMark.Shellenbaum@Sun.COM error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
96612218SMark.Shellenbaum@Sun.COM &parent, sizeof (parent));
96712218SMark.Shellenbaum@Sun.COM ASSERT(error == 0 && parent == zp->z_id);
96812218SMark.Shellenbaum@Sun.COM #endif
96911935SMark.Shellenbaum@Sun.COM
97011935SMark.Shellenbaum@Sun.COM VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
97111935SMark.Shellenbaum@Sun.COM sizeof (xzp->z_id), tx));
972789Sahrens
9735331Samw (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
9749179SMark.Shellenbaum@Sun.COM xzp, "", NULL, acl_ids.z_fuidp, vap);
9759179SMark.Shellenbaum@Sun.COM
9769179SMark.Shellenbaum@Sun.COM zfs_acl_ids_free(&acl_ids);
977789Sahrens dmu_tx_commit(tx);
978789Sahrens
979789Sahrens *xvpp = ZTOV(xzp);
980789Sahrens
981789Sahrens return (0);
982789Sahrens }
983789Sahrens
984789Sahrens /*
985789Sahrens * Return a znode for the extended attribute directory for zp.
986789Sahrens * ** If the directory does not already exist, it is created **
987789Sahrens *
988789Sahrens * IN: zp - znode to obtain attribute directory from
989789Sahrens * cr - credentials of caller
9903280Sck153898 * flags - flags from the VOP_LOOKUP call
991789Sahrens *
992789Sahrens * OUT: xzpp - pointer to extended attribute znode
993789Sahrens *
994789Sahrens * RETURN: 0 on success
995789Sahrens * error number on failure
996789Sahrens */
997789Sahrens int
zfs_get_xattrdir(znode_t * zp,vnode_t ** xvpp,cred_t * cr,int flags)9983280Sck153898 zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
999789Sahrens {
1000789Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1001789Sahrens znode_t *xzp;
1002789Sahrens zfs_dirlock_t *dl;
1003789Sahrens vattr_t va;
1004789Sahrens int error;
1005789Sahrens top:
10065331Samw error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
1007789Sahrens if (error)
1008789Sahrens return (error);
1009789Sahrens
1010789Sahrens if (xzp != NULL) {
1011789Sahrens *xvpp = ZTOV(xzp);
1012789Sahrens zfs_dirent_unlock(dl);
1013789Sahrens return (0);
1014789Sahrens }
1015789Sahrens
1016789Sahrens
10173280Sck153898 if (!(flags & CREATE_XATTR_DIR)) {
10183280Sck153898 zfs_dirent_unlock(dl);
10193280Sck153898 return (ENOENT);
10203280Sck153898 }
10213280Sck153898
1022789Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
1023789Sahrens zfs_dirent_unlock(dl);
1024789Sahrens return (EROFS);
1025789Sahrens }
1026789Sahrens
1027789Sahrens /*
1028789Sahrens * The ability to 'create' files in an attribute
1029789Sahrens * directory comes from the write_xattr permission on the base file.
1030789Sahrens *
1031789Sahrens * The ability to 'search' an attribute directory requires
1032789Sahrens * read_xattr permission on the base file.
1033789Sahrens *
1034789Sahrens * Once in a directory the ability to read/write attributes
1035789Sahrens * is controlled by the permissions on the attribute file.
1036789Sahrens */
1037789Sahrens va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
1038789Sahrens va.va_type = VDIR;
10391231Smarks va.va_mode = S_IFDIR | S_ISVTX | 0777;
10405771Sjp151216 zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
1041789Sahrens
1042789Sahrens error = zfs_make_xattrdir(zp, &va, xvpp, cr);
1043789Sahrens zfs_dirent_unlock(dl);
1044789Sahrens
10458227SNeil.Perrin@Sun.COM if (error == ERESTART) {
10462113Sahrens /* NB: we already did dmu_tx_wait() if necessary */
1047789Sahrens goto top;
1048789Sahrens }
1049789Sahrens
1050789Sahrens return (error);
1051789Sahrens }
1052789Sahrens
1053789Sahrens /*
1054789Sahrens * Decide whether it is okay to remove within a sticky directory.
1055789Sahrens *
1056789Sahrens * In sticky directories, write access is not sufficient;
1057789Sahrens * you can remove entries from a directory only if:
1058789Sahrens *
1059789Sahrens * you own the directory,
1060789Sahrens * you own the entry,
1061789Sahrens * the entry is a plain file and you have write access,
1062789Sahrens * or you are privileged (checked in secpolicy...).
1063789Sahrens *
1064789Sahrens * The function returns 0 if remove access is granted.
1065789Sahrens */
1066789Sahrens int
zfs_sticky_remove_access(znode_t * zdp,znode_t * zp,cred_t * cr)1067789Sahrens zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
1068789Sahrens {
1069789Sahrens uid_t uid;
1070*13069SMark.Shellenbaum@Oracle.COM uid_t downer;
1071*13069SMark.Shellenbaum@Oracle.COM uid_t fowner;
1072*13069SMark.Shellenbaum@Oracle.COM zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
1073789Sahrens
10748227SNeil.Perrin@Sun.COM if (zdp->z_zfsvfs->z_replay)
1075789Sahrens return (0);
1076789Sahrens
107711935SMark.Shellenbaum@Sun.COM if ((zdp->z_mode & S_ISVTX) == 0)
10785331Samw return (0);
10795331Samw
1080*13069SMark.Shellenbaum@Oracle.COM downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
1081*13069SMark.Shellenbaum@Oracle.COM fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
1082*13069SMark.Shellenbaum@Oracle.COM
1083*13069SMark.Shellenbaum@Oracle.COM if ((uid = crgetuid(cr)) == downer || uid == fowner ||
1084789Sahrens (ZTOV(zp)->v_type == VREG &&
10855331Samw zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
1086789Sahrens return (0);
1087789Sahrens else
1088789Sahrens return (secpolicy_vnode_remove(cr));
1089789Sahrens }
1090