1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (c) 2013, 2016 by Delphix. All rights reserved. 25eda14cbcSMatt Macy * Copyright 2017 Nexenta Systems, Inc. 26eda14cbcSMatt Macy */ 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #include <sys/types.h> 29eda14cbcSMatt Macy #include <sys/param.h> 30eda14cbcSMatt Macy #include <sys/time.h> 31eda14cbcSMatt Macy #include <sys/systm.h> 32eda14cbcSMatt Macy #include <sys/sysmacros.h> 33eda14cbcSMatt Macy #include <sys/resource.h> 34eda14cbcSMatt Macy #include <sys/vfs.h> 35eda14cbcSMatt Macy #include <sys/vnode.h> 36eda14cbcSMatt Macy #include <sys/file.h> 37eda14cbcSMatt Macy #include <sys/kmem.h> 38eda14cbcSMatt Macy #include <sys/uio.h> 39eda14cbcSMatt Macy #include <sys/cmn_err.h> 40eda14cbcSMatt Macy #include <sys/errno.h> 41eda14cbcSMatt Macy #include <sys/stat.h> 42eda14cbcSMatt Macy #include <sys/unistd.h> 43eda14cbcSMatt Macy #include <sys/sunddi.h> 44eda14cbcSMatt Macy #include <sys/random.h> 45eda14cbcSMatt Macy #include <sys/policy.h> 46eda14cbcSMatt Macy #include <sys/condvar.h> 47eda14cbcSMatt Macy #include <sys/callb.h> 48eda14cbcSMatt Macy #include <sys/smp.h> 49eda14cbcSMatt Macy #include <sys/zfs_dir.h> 50eda14cbcSMatt Macy #include <sys/zfs_acl.h> 51eda14cbcSMatt Macy #include <sys/fs/zfs.h> 52eda14cbcSMatt Macy #include <sys/zap.h> 53eda14cbcSMatt Macy #include <sys/dmu.h> 54eda14cbcSMatt Macy #include <sys/atomic.h> 55eda14cbcSMatt Macy #include <sys/zfs_ctldir.h> 56eda14cbcSMatt Macy #include <sys/zfs_fuid.h> 57eda14cbcSMatt Macy #include <sys/sa.h> 58eda14cbcSMatt Macy #include <sys/zfs_sa.h> 59eda14cbcSMatt Macy #include <sys/dmu_objset.h> 60eda14cbcSMatt Macy #include <sys/dsl_dir.h> 61eda14cbcSMatt Macy 62eda14cbcSMatt Macy #include <sys/ccompat.h> 63eda14cbcSMatt Macy 64eda14cbcSMatt Macy /* 65eda14cbcSMatt Macy * zfs_match_find() is used by zfs_dirent_lookup() to perform zap lookups 66eda14cbcSMatt Macy * of names after deciding which is the appropriate lookup interface. 67eda14cbcSMatt Macy */ 68eda14cbcSMatt Macy static int 69eda14cbcSMatt Macy zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name, 70eda14cbcSMatt Macy matchtype_t mt, uint64_t *zoid) 71eda14cbcSMatt Macy { 72eda14cbcSMatt Macy int error; 73eda14cbcSMatt Macy 74eda14cbcSMatt Macy if (zfsvfs->z_norm) { 75eda14cbcSMatt Macy 76eda14cbcSMatt Macy /* 77eda14cbcSMatt Macy * In the non-mixed case we only expect there would ever 78eda14cbcSMatt Macy * be one match, but we need to use the normalizing lookup. 79eda14cbcSMatt Macy */ 80eda14cbcSMatt Macy error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, 81eda14cbcSMatt Macy zoid, mt, NULL, 0, NULL); 82eda14cbcSMatt Macy } else { 83eda14cbcSMatt Macy error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); 84eda14cbcSMatt Macy } 85eda14cbcSMatt Macy *zoid = ZFS_DIRENT_OBJ(*zoid); 86eda14cbcSMatt Macy 87eda14cbcSMatt Macy return (error); 88eda14cbcSMatt Macy } 89eda14cbcSMatt Macy 90eda14cbcSMatt Macy /* 91eda14cbcSMatt Macy * Look up a directory entry under a locked vnode. 92eda14cbcSMatt Macy * dvp being locked gives us a guarantee that there are no concurrent 93eda14cbcSMatt Macy * modification of the directory and, thus, if a node can be found in 94eda14cbcSMatt Macy * the directory, then it must not be unlinked. 95eda14cbcSMatt Macy * 96eda14cbcSMatt Macy * Input arguments: 97eda14cbcSMatt Macy * dzp - znode for directory 98eda14cbcSMatt Macy * name - name of entry to lock 99eda14cbcSMatt Macy * flag - ZNEW: if the entry already exists, fail with EEXIST. 100eda14cbcSMatt Macy * ZEXISTS: if the entry does not exist, fail with ENOENT. 101eda14cbcSMatt Macy * ZXATTR: we want dzp's xattr directory 102eda14cbcSMatt Macy * 103eda14cbcSMatt Macy * Output arguments: 104eda14cbcSMatt Macy * zpp - pointer to the znode for the entry (NULL if there isn't one) 105eda14cbcSMatt Macy * 106eda14cbcSMatt Macy * Return value: 0 on success or errno on failure. 107eda14cbcSMatt Macy * 108eda14cbcSMatt Macy * NOTE: Always checks for, and rejects, '.' and '..'. 109eda14cbcSMatt Macy */ 110eda14cbcSMatt Macy int 111eda14cbcSMatt Macy zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag) 112eda14cbcSMatt Macy { 113eda14cbcSMatt Macy zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 114eda14cbcSMatt Macy znode_t *zp; 115eda14cbcSMatt Macy matchtype_t mt = 0; 116eda14cbcSMatt Macy uint64_t zoid; 117eda14cbcSMatt Macy int error = 0; 118eda14cbcSMatt Macy 119eda14cbcSMatt Macy if (zfsvfs->z_replay == B_FALSE) 120eda14cbcSMatt Macy ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); 121eda14cbcSMatt Macy 122eda14cbcSMatt Macy *zpp = NULL; 123eda14cbcSMatt Macy 124eda14cbcSMatt Macy /* 125eda14cbcSMatt Macy * Verify that we are not trying to lock '.', '..', or '.zfs' 126eda14cbcSMatt Macy */ 127eda14cbcSMatt Macy if (name[0] == '.' && 128eda14cbcSMatt Macy (((name[1] == '\0') || (name[1] == '.' && name[2] == '\0')) || 129eda14cbcSMatt Macy (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))) 130eda14cbcSMatt Macy return (SET_ERROR(EEXIST)); 131eda14cbcSMatt Macy 132eda14cbcSMatt Macy /* 133eda14cbcSMatt Macy * Case sensitivity and normalization preferences are set when 134eda14cbcSMatt Macy * the file system is created. These are stored in the 135eda14cbcSMatt Macy * zfsvfs->z_case and zfsvfs->z_norm fields. These choices 136eda14cbcSMatt Macy * affect how we perform zap lookups. 137eda14cbcSMatt Macy * 138eda14cbcSMatt Macy * When matching we may need to normalize & change case according to 139eda14cbcSMatt Macy * FS settings. 140eda14cbcSMatt Macy * 141eda14cbcSMatt Macy * Note that a normalized match is necessary for a case insensitive 142eda14cbcSMatt Macy * filesystem when the lookup request is not exact because normalization 143eda14cbcSMatt Macy * can fold case independent of normalizing code point sequences. 144eda14cbcSMatt Macy * 145eda14cbcSMatt Macy * See the table above zfs_dropname(). 146eda14cbcSMatt Macy */ 147eda14cbcSMatt Macy if (zfsvfs->z_norm != 0) { 148eda14cbcSMatt Macy mt = MT_NORMALIZE; 149eda14cbcSMatt Macy 150eda14cbcSMatt Macy /* 151eda14cbcSMatt Macy * Determine if the match needs to honor the case specified in 152eda14cbcSMatt Macy * lookup, and if so keep track of that so that during 153eda14cbcSMatt Macy * normalization we don't fold case. 154eda14cbcSMatt Macy */ 155eda14cbcSMatt Macy if (zfsvfs->z_case == ZFS_CASE_MIXED) { 156eda14cbcSMatt Macy mt |= MT_MATCH_CASE; 157eda14cbcSMatt Macy } 158eda14cbcSMatt Macy } 159eda14cbcSMatt Macy 160eda14cbcSMatt Macy /* 161eda14cbcSMatt Macy * Only look in or update the DNLC if we are looking for the 162eda14cbcSMatt Macy * name on a file system that does not require normalization 163eda14cbcSMatt Macy * or case folding. We can also look there if we happen to be 164eda14cbcSMatt Macy * on a non-normalizing, mixed sensitivity file system IF we 165eda14cbcSMatt Macy * are looking for the exact name. 166eda14cbcSMatt Macy * 167eda14cbcSMatt Macy * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE 168eda14cbcSMatt Macy * because in that case MT_EXACT and MT_FIRST should produce exactly 169eda14cbcSMatt Macy * the same result. 170eda14cbcSMatt Macy */ 171eda14cbcSMatt Macy 172eda14cbcSMatt Macy if (dzp->z_unlinked && !(flag & ZXATTR)) 173eda14cbcSMatt Macy return (ENOENT); 174eda14cbcSMatt Macy if (flag & ZXATTR) { 175eda14cbcSMatt Macy error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, 176eda14cbcSMatt Macy sizeof (zoid)); 177eda14cbcSMatt Macy if (error == 0) 178eda14cbcSMatt Macy error = (zoid == 0 ? ENOENT : 0); 179eda14cbcSMatt Macy } else { 180eda14cbcSMatt Macy error = zfs_match_find(zfsvfs, dzp, name, mt, &zoid); 181eda14cbcSMatt Macy } 182eda14cbcSMatt Macy if (error) { 183eda14cbcSMatt Macy if (error != ENOENT || (flag & ZEXISTS)) { 184eda14cbcSMatt Macy return (error); 185eda14cbcSMatt Macy } 186eda14cbcSMatt Macy } else { 187eda14cbcSMatt Macy if (flag & ZNEW) { 188eda14cbcSMatt Macy return (SET_ERROR(EEXIST)); 189eda14cbcSMatt Macy } 190eda14cbcSMatt Macy error = zfs_zget(zfsvfs, zoid, &zp); 191eda14cbcSMatt Macy if (error) 192eda14cbcSMatt Macy return (error); 193eda14cbcSMatt Macy ASSERT(!zp->z_unlinked); 194eda14cbcSMatt Macy *zpp = zp; 195eda14cbcSMatt Macy } 196eda14cbcSMatt Macy 197eda14cbcSMatt Macy return (0); 198eda14cbcSMatt Macy } 199eda14cbcSMatt Macy 200eda14cbcSMatt Macy static int 201eda14cbcSMatt Macy zfs_dd_lookup(znode_t *dzp, znode_t **zpp) 202eda14cbcSMatt Macy { 203eda14cbcSMatt Macy zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 204eda14cbcSMatt Macy znode_t *zp; 205eda14cbcSMatt Macy uint64_t parent; 206eda14cbcSMatt Macy int error; 207eda14cbcSMatt Macy 2088ce21ae6SMateusz Guzik #ifdef ZFS_DEBUG 209eda14cbcSMatt Macy if (zfsvfs->z_replay == B_FALSE) 210eda14cbcSMatt Macy ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); 2118ce21ae6SMateusz Guzik #endif 212eda14cbcSMatt Macy if (dzp->z_unlinked) 213eda14cbcSMatt Macy return (ENOENT); 214eda14cbcSMatt Macy 215eda14cbcSMatt Macy if ((error = sa_lookup(dzp->z_sa_hdl, 216eda14cbcSMatt Macy SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 217eda14cbcSMatt Macy return (error); 218eda14cbcSMatt Macy 219eda14cbcSMatt Macy error = zfs_zget(zfsvfs, parent, &zp); 220eda14cbcSMatt Macy if (error == 0) 221eda14cbcSMatt Macy *zpp = zp; 222eda14cbcSMatt Macy return (error); 223eda14cbcSMatt Macy } 224eda14cbcSMatt Macy 225eda14cbcSMatt Macy int 226eda14cbcSMatt Macy zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp) 227eda14cbcSMatt Macy { 228eda14cbcSMatt Macy zfsvfs_t *zfsvfs __unused = dzp->z_zfsvfs; 229eda14cbcSMatt Macy znode_t *zp = NULL; 230eda14cbcSMatt Macy int error = 0; 231eda14cbcSMatt Macy 232eda14cbcSMatt Macy #ifdef ZFS_DEBUG 233eda14cbcSMatt Macy if (zfsvfs->z_replay == B_FALSE) 234eda14cbcSMatt Macy ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); 235eda14cbcSMatt Macy #endif 236eda14cbcSMatt Macy if (dzp->z_unlinked) 237eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 238eda14cbcSMatt Macy 239eda14cbcSMatt Macy if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 240eda14cbcSMatt Macy *zpp = dzp; 241eda14cbcSMatt Macy } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 242eda14cbcSMatt Macy error = zfs_dd_lookup(dzp, &zp); 243eda14cbcSMatt Macy if (error == 0) 244eda14cbcSMatt Macy *zpp = zp; 245eda14cbcSMatt Macy } else { 246eda14cbcSMatt Macy error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS); 247eda14cbcSMatt Macy if (error == 0) { 248eda14cbcSMatt Macy dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ 249eda14cbcSMatt Macy *zpp = zp; 250eda14cbcSMatt Macy } 251eda14cbcSMatt Macy } 252eda14cbcSMatt Macy return (error); 253eda14cbcSMatt Macy } 254eda14cbcSMatt Macy 255eda14cbcSMatt Macy /* 256eda14cbcSMatt Macy * unlinked Set (formerly known as the "delete queue") Error Handling 257eda14cbcSMatt Macy * 258eda14cbcSMatt Macy * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we 259eda14cbcSMatt Macy * don't specify the name of the entry that we will be manipulating. We 260eda14cbcSMatt Macy * also fib and say that we won't be adding any new entries to the 261eda14cbcSMatt Macy * unlinked set, even though we might (this is to lower the minimum file 262eda14cbcSMatt Macy * size that can be deleted in a full filesystem). So on the small 263eda14cbcSMatt Macy * chance that the nlink list is using a fat zap (ie. has more than 264eda14cbcSMatt Macy * 2000 entries), we *may* not pre-read a block that's needed. 265eda14cbcSMatt Macy * Therefore it is remotely possible for some of the assertions 266eda14cbcSMatt Macy * regarding the unlinked set below to fail due to i/o error. On a 267eda14cbcSMatt Macy * nondebug system, this will result in the space being leaked. 268eda14cbcSMatt Macy */ 269eda14cbcSMatt Macy void 270eda14cbcSMatt Macy zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) 271eda14cbcSMatt Macy { 272eda14cbcSMatt Macy zfsvfs_t *zfsvfs = zp->z_zfsvfs; 273eda14cbcSMatt Macy 274eda14cbcSMatt Macy ASSERT(zp->z_unlinked); 27516038816SMartin Matuska ASSERT3U(zp->z_links, ==, 0); 276eda14cbcSMatt Macy 27716038816SMartin Matuska VERIFY0(zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 278eda14cbcSMatt Macy 279eda14cbcSMatt Macy dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1); 280eda14cbcSMatt Macy } 281eda14cbcSMatt Macy 282eda14cbcSMatt Macy /* 283eda14cbcSMatt Macy * Clean up any znodes that had no links when we either crashed or 284eda14cbcSMatt Macy * (force) umounted the file system. 285eda14cbcSMatt Macy */ 286eda14cbcSMatt Macy void 287eda14cbcSMatt Macy zfs_unlinked_drain(zfsvfs_t *zfsvfs) 288eda14cbcSMatt Macy { 289eda14cbcSMatt Macy zap_cursor_t zc; 290*7a7741afSMartin Matuska zap_attribute_t *zap; 291eda14cbcSMatt Macy dmu_object_info_t doi; 292eda14cbcSMatt Macy znode_t *zp; 293eda14cbcSMatt Macy dmu_tx_t *tx; 294eda14cbcSMatt Macy int error; 295eda14cbcSMatt Macy 296eda14cbcSMatt Macy /* 297eda14cbcSMatt Macy * Iterate over the contents of the unlinked set. 298eda14cbcSMatt Macy */ 299*7a7741afSMartin Matuska zap = zap_attribute_alloc(); 300eda14cbcSMatt Macy for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); 301*7a7741afSMartin Matuska zap_cursor_retrieve(&zc, zap) == 0; 302eda14cbcSMatt Macy zap_cursor_advance(&zc)) { 303eda14cbcSMatt Macy 304eda14cbcSMatt Macy /* 305eda14cbcSMatt Macy * See what kind of object we have in list 306eda14cbcSMatt Macy */ 307eda14cbcSMatt Macy 308eda14cbcSMatt Macy error = dmu_object_info(zfsvfs->z_os, 309*7a7741afSMartin Matuska zap->za_first_integer, &doi); 310eda14cbcSMatt Macy if (error != 0) 311eda14cbcSMatt Macy continue; 312eda14cbcSMatt Macy 313eda14cbcSMatt Macy ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || 314eda14cbcSMatt Macy (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); 315eda14cbcSMatt Macy /* 316eda14cbcSMatt Macy * We need to re-mark these list entries for deletion, 317eda14cbcSMatt Macy * so we pull them back into core and set zp->z_unlinked. 318eda14cbcSMatt Macy */ 319*7a7741afSMartin Matuska error = zfs_zget(zfsvfs, zap->za_first_integer, &zp); 320eda14cbcSMatt Macy 321eda14cbcSMatt Macy /* 322eda14cbcSMatt Macy * We may pick up znodes that are already marked for deletion. 323eda14cbcSMatt Macy * This could happen during the purge of an extended attribute 324eda14cbcSMatt Macy * directory. All we need to do is skip over them, since they 325eda14cbcSMatt Macy * are already in the system marked z_unlinked. 326eda14cbcSMatt Macy */ 327eda14cbcSMatt Macy if (error != 0) 328eda14cbcSMatt Macy continue; 329eda14cbcSMatt Macy 330eda14cbcSMatt Macy vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY); 331eda14cbcSMatt Macy 332eda14cbcSMatt Macy /* 333eda14cbcSMatt Macy * Due to changes in zfs_rmnode we need to make sure the 334eda14cbcSMatt Macy * link count is set to zero here. 335eda14cbcSMatt Macy */ 336eda14cbcSMatt Macy if (zp->z_links != 0) { 337eda14cbcSMatt Macy tx = dmu_tx_create(zfsvfs->z_os); 338eda14cbcSMatt Macy dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 339eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 340eda14cbcSMatt Macy if (error != 0) { 341eda14cbcSMatt Macy dmu_tx_abort(tx); 342eda14cbcSMatt Macy vput(ZTOV(zp)); 343eda14cbcSMatt Macy continue; 344eda14cbcSMatt Macy } 345eda14cbcSMatt Macy zp->z_links = 0; 346eda14cbcSMatt Macy VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 347eda14cbcSMatt Macy &zp->z_links, sizeof (zp->z_links), tx)); 348eda14cbcSMatt Macy dmu_tx_commit(tx); 349eda14cbcSMatt Macy } 350eda14cbcSMatt Macy 351eda14cbcSMatt Macy zp->z_unlinked = B_TRUE; 352eda14cbcSMatt Macy vput(ZTOV(zp)); 353eda14cbcSMatt Macy } 354eda14cbcSMatt Macy zap_cursor_fini(&zc); 355*7a7741afSMartin Matuska zap_attribute_free(zap); 356eda14cbcSMatt Macy } 357eda14cbcSMatt Macy 358eda14cbcSMatt Macy /* 359eda14cbcSMatt Macy * Delete the entire contents of a directory. Return a count 360eda14cbcSMatt Macy * of the number of entries that could not be deleted. If we encounter 361eda14cbcSMatt Macy * an error, return a count of at least one so that the directory stays 362eda14cbcSMatt Macy * in the unlinked set. 363eda14cbcSMatt Macy * 364eda14cbcSMatt Macy * NOTE: this function assumes that the directory is inactive, 365eda14cbcSMatt Macy * so there is no need to lock its entries before deletion. 366eda14cbcSMatt Macy * Also, it assumes the directory contents is *only* regular 367eda14cbcSMatt Macy * files. 368eda14cbcSMatt Macy */ 369eda14cbcSMatt Macy static int 370eda14cbcSMatt Macy zfs_purgedir(znode_t *dzp) 371eda14cbcSMatt Macy { 372eda14cbcSMatt Macy zap_cursor_t zc; 373*7a7741afSMartin Matuska zap_attribute_t *zap; 374eda14cbcSMatt Macy znode_t *xzp; 375eda14cbcSMatt Macy dmu_tx_t *tx; 376eda14cbcSMatt Macy zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 377eda14cbcSMatt Macy int skipped = 0; 378eda14cbcSMatt Macy int error; 379eda14cbcSMatt Macy 380*7a7741afSMartin Matuska zap = zap_attribute_alloc(); 381eda14cbcSMatt Macy for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); 382*7a7741afSMartin Matuska (error = zap_cursor_retrieve(&zc, zap)) == 0; 383eda14cbcSMatt Macy zap_cursor_advance(&zc)) { 384eda14cbcSMatt Macy error = zfs_zget(zfsvfs, 385*7a7741afSMartin Matuska ZFS_DIRENT_OBJ(zap->za_first_integer), &xzp); 386eda14cbcSMatt Macy if (error) { 387eda14cbcSMatt Macy skipped += 1; 388eda14cbcSMatt Macy continue; 389eda14cbcSMatt Macy } 390eda14cbcSMatt Macy 391eda14cbcSMatt Macy vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); 392eda14cbcSMatt Macy ASSERT((ZTOV(xzp)->v_type == VREG) || 393eda14cbcSMatt Macy (ZTOV(xzp)->v_type == VLNK)); 394eda14cbcSMatt Macy 395eda14cbcSMatt Macy tx = dmu_tx_create(zfsvfs->z_os); 396eda14cbcSMatt Macy dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 397*7a7741afSMartin Matuska dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap->za_name); 398eda14cbcSMatt Macy dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 399eda14cbcSMatt Macy dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 400eda14cbcSMatt Macy /* Is this really needed ? */ 401eda14cbcSMatt Macy zfs_sa_upgrade_txholds(tx, xzp); 402eda14cbcSMatt Macy dmu_tx_mark_netfree(tx); 403eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 404eda14cbcSMatt Macy if (error) { 405eda14cbcSMatt Macy dmu_tx_abort(tx); 406eda14cbcSMatt Macy vput(ZTOV(xzp)); 407eda14cbcSMatt Macy skipped += 1; 408eda14cbcSMatt Macy continue; 409eda14cbcSMatt Macy } 410eda14cbcSMatt Macy 411*7a7741afSMartin Matuska error = zfs_link_destroy(dzp, zap->za_name, xzp, tx, 0, NULL); 412eda14cbcSMatt Macy if (error) 413eda14cbcSMatt Macy skipped += 1; 414eda14cbcSMatt Macy dmu_tx_commit(tx); 415eda14cbcSMatt Macy 416eda14cbcSMatt Macy vput(ZTOV(xzp)); 417eda14cbcSMatt Macy } 418eda14cbcSMatt Macy zap_cursor_fini(&zc); 419*7a7741afSMartin Matuska zap_attribute_free(zap); 420eda14cbcSMatt Macy if (error != ENOENT) 421eda14cbcSMatt Macy skipped += 1; 422eda14cbcSMatt Macy return (skipped); 423eda14cbcSMatt Macy } 424eda14cbcSMatt Macy 425eda14cbcSMatt Macy extern taskq_t *zfsvfs_taskq; 426eda14cbcSMatt Macy 427eda14cbcSMatt Macy void 428eda14cbcSMatt Macy zfs_rmnode(znode_t *zp) 429eda14cbcSMatt Macy { 430eda14cbcSMatt Macy zfsvfs_t *zfsvfs = zp->z_zfsvfs; 431eda14cbcSMatt Macy objset_t *os = zfsvfs->z_os; 432eda14cbcSMatt Macy dmu_tx_t *tx; 43315f0b8c3SMartin Matuska uint64_t z_id = zp->z_id; 434eda14cbcSMatt Macy uint64_t acl_obj; 435eda14cbcSMatt Macy uint64_t xattr_obj; 436eda14cbcSMatt Macy uint64_t count; 437eda14cbcSMatt Macy int error; 438eda14cbcSMatt Macy 43916038816SMartin Matuska ASSERT3U(zp->z_links, ==, 0); 440eda14cbcSMatt Macy if (zfsvfs->z_replay == B_FALSE) 441eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 442eda14cbcSMatt Macy 443eda14cbcSMatt Macy /* 444eda14cbcSMatt Macy * If this is an attribute directory, purge its contents. 445eda14cbcSMatt Macy */ 446eda14cbcSMatt Macy if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && 447eda14cbcSMatt Macy (zp->z_pflags & ZFS_XATTR)) { 448eda14cbcSMatt Macy if (zfs_purgedir(zp) != 0) { 449eda14cbcSMatt Macy /* 450eda14cbcSMatt Macy * Not enough space to delete some xattrs. 451eda14cbcSMatt Macy * Leave it in the unlinked set. 452eda14cbcSMatt Macy */ 45315f0b8c3SMartin Matuska ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 454eda14cbcSMatt Macy zfs_znode_dmu_fini(zp); 455eda14cbcSMatt Macy zfs_znode_free(zp); 45615f0b8c3SMartin Matuska ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 457eda14cbcSMatt Macy return; 458eda14cbcSMatt Macy } 459eda14cbcSMatt Macy } else { 460eda14cbcSMatt Macy /* 461eda14cbcSMatt Macy * Free up all the data in the file. We don't do this for 462eda14cbcSMatt Macy * XATTR directories because we need truncate and remove to be 463eda14cbcSMatt Macy * in the same tx, like in zfs_znode_delete(). Otherwise, if 464eda14cbcSMatt Macy * we crash here we'll end up with an inconsistent truncated 465eda14cbcSMatt Macy * zap object in the delete queue. Note a truncated file is 466eda14cbcSMatt Macy * harmless since it only contains user data. 467eda14cbcSMatt Macy */ 468eda14cbcSMatt Macy error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); 469eda14cbcSMatt Macy if (error) { 470eda14cbcSMatt Macy /* 471eda14cbcSMatt Macy * Not enough space or we were interrupted by unmount. 472eda14cbcSMatt Macy * Leave the file in the unlinked set. 473eda14cbcSMatt Macy */ 47415f0b8c3SMartin Matuska ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 475eda14cbcSMatt Macy zfs_znode_dmu_fini(zp); 476eda14cbcSMatt Macy zfs_znode_free(zp); 47715f0b8c3SMartin Matuska ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 478eda14cbcSMatt Macy return; 479eda14cbcSMatt Macy } 480eda14cbcSMatt Macy } 481eda14cbcSMatt Macy 482eda14cbcSMatt Macy /* 483eda14cbcSMatt Macy * If the file has extended attributes, we're going to unlink 484eda14cbcSMatt Macy * the xattr dir. 485eda14cbcSMatt Macy */ 486eda14cbcSMatt Macy error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 487eda14cbcSMatt Macy &xattr_obj, sizeof (xattr_obj)); 488eda14cbcSMatt Macy if (error) 489eda14cbcSMatt Macy xattr_obj = 0; 490eda14cbcSMatt Macy 491eda14cbcSMatt Macy acl_obj = zfs_external_acl(zp); 492eda14cbcSMatt Macy 493eda14cbcSMatt Macy /* 494eda14cbcSMatt Macy * Set up the final transaction. 495eda14cbcSMatt Macy */ 496eda14cbcSMatt Macy tx = dmu_tx_create(os); 497eda14cbcSMatt Macy dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); 498eda14cbcSMatt Macy dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 499eda14cbcSMatt Macy if (xattr_obj) 500eda14cbcSMatt Macy dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); 501eda14cbcSMatt Macy if (acl_obj) 502eda14cbcSMatt Macy dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 503eda14cbcSMatt Macy 504eda14cbcSMatt Macy zfs_sa_upgrade_txholds(tx, zp); 505eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 506eda14cbcSMatt Macy if (error) { 507eda14cbcSMatt Macy /* 508eda14cbcSMatt Macy * Not enough space to delete the file. Leave it in the 509eda14cbcSMatt Macy * unlinked set, leaking it until the fs is remounted (at 510eda14cbcSMatt Macy * which point we'll call zfs_unlinked_drain() to process it). 511eda14cbcSMatt Macy */ 512eda14cbcSMatt Macy dmu_tx_abort(tx); 51315f0b8c3SMartin Matuska ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 514eda14cbcSMatt Macy zfs_znode_dmu_fini(zp); 515eda14cbcSMatt Macy zfs_znode_free(zp); 51615f0b8c3SMartin Matuska ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 517eda14cbcSMatt Macy return; 518eda14cbcSMatt Macy } 519eda14cbcSMatt Macy 520eda14cbcSMatt Macy /* 521eda14cbcSMatt Macy * FreeBSD's implementation of zfs_zget requires a vnode to back it. 522eda14cbcSMatt Macy * This means that we could end up calling into getnewvnode while 523eda14cbcSMatt Macy * calling zfs_rmnode as a result of a prior call to getnewvnode 524eda14cbcSMatt Macy * trying to clear vnodes out of the cache. If this repeats we can 525eda14cbcSMatt Macy * recurse enough that we overflow our stack. To avoid this, we 526eda14cbcSMatt Macy * avoid calling zfs_zget on the xattr znode and instead simply add 527eda14cbcSMatt Macy * it to the unlinked set and schedule a call to zfs_unlinked_drain. 528eda14cbcSMatt Macy */ 529eda14cbcSMatt Macy if (xattr_obj) { 530eda14cbcSMatt Macy /* Add extended attribute directory to the unlinked set. */ 531eda14cbcSMatt Macy VERIFY3U(0, ==, 532eda14cbcSMatt Macy zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx)); 533eda14cbcSMatt Macy } 534eda14cbcSMatt Macy 535eda14cbcSMatt Macy mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock); 536eda14cbcSMatt Macy 537eda14cbcSMatt Macy /* Remove this znode from the unlinked set */ 538eda14cbcSMatt Macy VERIFY3U(0, ==, 539eda14cbcSMatt Macy zap_remove_int(os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 540eda14cbcSMatt Macy 541eda14cbcSMatt Macy if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) { 542eda14cbcSMatt Macy cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv); 543eda14cbcSMatt Macy } 544eda14cbcSMatt Macy 545eda14cbcSMatt Macy mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock); 546eda14cbcSMatt Macy 547eda14cbcSMatt Macy dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1); 548eda14cbcSMatt Macy 549eda14cbcSMatt Macy zfs_znode_delete(zp, tx); 550aca928a5SMartin Matuska zfs_znode_free(zp); 551eda14cbcSMatt Macy 552eda14cbcSMatt Macy dmu_tx_commit(tx); 553eda14cbcSMatt Macy 554eda14cbcSMatt Macy if (xattr_obj) { 555eda14cbcSMatt Macy /* 556eda14cbcSMatt Macy * We're using the FreeBSD taskqueue API here instead of 557eda14cbcSMatt Macy * the Solaris taskq API since the FreeBSD API allows for a 558eda14cbcSMatt Macy * task to be enqueued multiple times but executed once. 559eda14cbcSMatt Macy */ 560eda14cbcSMatt Macy taskqueue_enqueue(zfsvfs_taskq->tq_queue, 561eda14cbcSMatt Macy &zfsvfs->z_unlinked_drain_task); 562eda14cbcSMatt Macy } 563eda14cbcSMatt Macy } 564eda14cbcSMatt Macy 565eda14cbcSMatt Macy static uint64_t 566eda14cbcSMatt Macy zfs_dirent(znode_t *zp, uint64_t mode) 567eda14cbcSMatt Macy { 568eda14cbcSMatt Macy uint64_t de = zp->z_id; 569eda14cbcSMatt Macy 570eda14cbcSMatt Macy if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) 571eda14cbcSMatt Macy de |= IFTODT(mode) << 60; 572eda14cbcSMatt Macy return (de); 573eda14cbcSMatt Macy } 574eda14cbcSMatt Macy 575eda14cbcSMatt Macy /* 576eda14cbcSMatt Macy * Link zp into dzp. Can only fail if zp has been unlinked. 577eda14cbcSMatt Macy */ 578eda14cbcSMatt Macy int 579eda14cbcSMatt Macy zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, 580eda14cbcSMatt Macy int flag) 581eda14cbcSMatt Macy { 582eda14cbcSMatt Macy zfsvfs_t *zfsvfs = zp->z_zfsvfs; 583eda14cbcSMatt Macy vnode_t *vp = ZTOV(zp); 584eda14cbcSMatt Macy uint64_t value; 585eda14cbcSMatt Macy int zp_is_dir = (vp->v_type == VDIR); 586eda14cbcSMatt Macy sa_bulk_attr_t bulk[5]; 587eda14cbcSMatt Macy uint64_t mtime[2], ctime[2]; 588eda14cbcSMatt Macy int count = 0; 589eda14cbcSMatt Macy int error; 590eda14cbcSMatt Macy 591eda14cbcSMatt Macy if (zfsvfs->z_replay == B_FALSE) { 592eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); 593eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 594eda14cbcSMatt Macy } 595eda14cbcSMatt Macy if (zp_is_dir) { 596eda14cbcSMatt Macy if (dzp->z_links >= ZFS_LINK_MAX) 597eda14cbcSMatt Macy return (SET_ERROR(EMLINK)); 598eda14cbcSMatt Macy } 599eda14cbcSMatt Macy if (!(flag & ZRENAMING)) { 600eda14cbcSMatt Macy if (zp->z_unlinked) { /* no new links to unlinked zp */ 601eda14cbcSMatt Macy ASSERT(!(flag & (ZNEW | ZEXISTS))); 602eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 603eda14cbcSMatt Macy } 604eda14cbcSMatt Macy if (zp->z_links >= ZFS_LINK_MAX - zp_is_dir) { 605eda14cbcSMatt Macy return (SET_ERROR(EMLINK)); 606eda14cbcSMatt Macy } 607eda14cbcSMatt Macy zp->z_links++; 608eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 609eda14cbcSMatt Macy &zp->z_links, sizeof (zp->z_links)); 610eda14cbcSMatt Macy 611eda14cbcSMatt Macy } else { 61216038816SMartin Matuska ASSERT(!zp->z_unlinked); 613eda14cbcSMatt Macy } 614eda14cbcSMatt Macy value = zfs_dirent(zp, zp->z_mode); 615eda14cbcSMatt Macy error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name, 616eda14cbcSMatt Macy 8, 1, &value, tx); 617eda14cbcSMatt Macy 618eda14cbcSMatt Macy /* 619eda14cbcSMatt Macy * zap_add could fail to add the entry if it exceeds the capacity of the 620eda14cbcSMatt Macy * leaf-block and zap_leaf_split() failed to help. 621eda14cbcSMatt Macy * The caller of this routine is responsible for failing the transaction 622eda14cbcSMatt Macy * which will rollback the SA updates done above. 623eda14cbcSMatt Macy */ 624eda14cbcSMatt Macy if (error != 0) { 625eda14cbcSMatt Macy if (!(flag & ZRENAMING) && !(flag & ZNEW)) 626eda14cbcSMatt Macy zp->z_links--; 627eda14cbcSMatt Macy return (error); 628eda14cbcSMatt Macy } 629eda14cbcSMatt Macy 630*7a7741afSMartin Matuska /* 631*7a7741afSMartin Matuska * If we added a longname activate the SPA_FEATURE_LONGNAME. 632*7a7741afSMartin Matuska */ 633*7a7741afSMartin Matuska if (strlen(name) >= ZAP_MAXNAMELEN) { 634*7a7741afSMartin Matuska dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os); 635*7a7741afSMartin Matuska ds->ds_feature_activation[SPA_FEATURE_LONGNAME] = 636*7a7741afSMartin Matuska (void *)B_TRUE; 637*7a7741afSMartin Matuska } 638*7a7741afSMartin Matuska 639eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 640eda14cbcSMatt Macy &dzp->z_id, sizeof (dzp->z_id)); 641eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 642eda14cbcSMatt Macy &zp->z_pflags, sizeof (zp->z_pflags)); 643eda14cbcSMatt Macy 644eda14cbcSMatt Macy if (!(flag & ZNEW)) { 645eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 646eda14cbcSMatt Macy ctime, sizeof (ctime)); 647eda14cbcSMatt Macy zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, 648eda14cbcSMatt Macy ctime); 649eda14cbcSMatt Macy } 650eda14cbcSMatt Macy error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 651eda14cbcSMatt Macy ASSERT0(error); 652eda14cbcSMatt Macy 653eda14cbcSMatt Macy dzp->z_size++; 654eda14cbcSMatt Macy dzp->z_links += zp_is_dir; 655eda14cbcSMatt Macy count = 0; 656eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 657eda14cbcSMatt Macy &dzp->z_size, sizeof (dzp->z_size)); 658eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 659eda14cbcSMatt Macy &dzp->z_links, sizeof (dzp->z_links)); 660eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 661eda14cbcSMatt Macy mtime, sizeof (mtime)); 662eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 663eda14cbcSMatt Macy ctime, sizeof (ctime)); 664eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 665eda14cbcSMatt Macy &dzp->z_pflags, sizeof (dzp->z_pflags)); 666eda14cbcSMatt Macy zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime); 667eda14cbcSMatt Macy error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 668eda14cbcSMatt Macy ASSERT0(error); 669eda14cbcSMatt Macy return (0); 670eda14cbcSMatt Macy } 671eda14cbcSMatt Macy 672eda14cbcSMatt Macy /* 673eda14cbcSMatt Macy * The match type in the code for this function should conform to: 674eda14cbcSMatt Macy * 675eda14cbcSMatt Macy * ------------------------------------------------------------------------ 676eda14cbcSMatt Macy * fs type | z_norm | lookup type | match type 677eda14cbcSMatt Macy * ---------|-------------|-------------|---------------------------------- 678eda14cbcSMatt Macy * CS !norm | 0 | 0 | 0 (exact) 679eda14cbcSMatt Macy * CS norm | formX | 0 | MT_NORMALIZE 680eda14cbcSMatt Macy * CI !norm | upper | !ZCIEXACT | MT_NORMALIZE 681eda14cbcSMatt Macy * CI !norm | upper | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE 682eda14cbcSMatt Macy * CI norm | upper|formX | !ZCIEXACT | MT_NORMALIZE 683eda14cbcSMatt Macy * CI norm | upper|formX | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE 684eda14cbcSMatt Macy * CM !norm | upper | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE 685eda14cbcSMatt Macy * CM !norm | upper | ZCILOOK | MT_NORMALIZE 686eda14cbcSMatt Macy * CM norm | upper|formX | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE 687eda14cbcSMatt Macy * CM norm | upper|formX | ZCILOOK | MT_NORMALIZE 688eda14cbcSMatt Macy * 689eda14cbcSMatt Macy * Abbreviations: 690eda14cbcSMatt Macy * CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed 691eda14cbcSMatt Macy * upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER) 692eda14cbcSMatt Macy * formX = unicode normalization form set on fs creation 693eda14cbcSMatt Macy */ 694eda14cbcSMatt Macy static int 695eda14cbcSMatt Macy zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, 696eda14cbcSMatt Macy int flag) 697eda14cbcSMatt Macy { 698eda14cbcSMatt Macy int error; 699eda14cbcSMatt Macy 700eda14cbcSMatt Macy if (zp->z_zfsvfs->z_norm) { 701eda14cbcSMatt Macy matchtype_t mt = MT_NORMALIZE; 702eda14cbcSMatt Macy 703eda14cbcSMatt Macy if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) { 704eda14cbcSMatt Macy mt |= MT_MATCH_CASE; 705eda14cbcSMatt Macy } 706eda14cbcSMatt Macy 707eda14cbcSMatt Macy error = zap_remove_norm(zp->z_zfsvfs->z_os, dzp->z_id, 708eda14cbcSMatt Macy name, mt, tx); 709eda14cbcSMatt Macy } else { 710eda14cbcSMatt Macy error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, name, tx); 711eda14cbcSMatt Macy } 712eda14cbcSMatt Macy 713eda14cbcSMatt Macy return (error); 714eda14cbcSMatt Macy } 715eda14cbcSMatt Macy 716eda14cbcSMatt Macy /* 717eda14cbcSMatt Macy * Unlink zp from dzp, and mark zp for deletion if this was the last link. 718eda14cbcSMatt Macy * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). 719eda14cbcSMatt Macy * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. 720eda14cbcSMatt Macy * If it's non-NULL, we use it to indicate whether the znode needs deletion, 721eda14cbcSMatt Macy * and it's the caller's job to do it. 722eda14cbcSMatt Macy */ 723eda14cbcSMatt Macy int 724eda14cbcSMatt Macy zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, 725eda14cbcSMatt Macy int flag, boolean_t *unlinkedp) 726eda14cbcSMatt Macy { 727eda14cbcSMatt Macy zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 728eda14cbcSMatt Macy vnode_t *vp = ZTOV(zp); 729eda14cbcSMatt Macy int zp_is_dir = (vp->v_type == VDIR); 730eda14cbcSMatt Macy boolean_t unlinked = B_FALSE; 731eda14cbcSMatt Macy sa_bulk_attr_t bulk[5]; 732eda14cbcSMatt Macy uint64_t mtime[2], ctime[2]; 733eda14cbcSMatt Macy int count = 0; 734eda14cbcSMatt Macy int error; 735eda14cbcSMatt Macy 736eda14cbcSMatt Macy if (zfsvfs->z_replay == B_FALSE) { 737eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); 738eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 739eda14cbcSMatt Macy } 740eda14cbcSMatt Macy if (!(flag & ZRENAMING)) { 741eda14cbcSMatt Macy 742eda14cbcSMatt Macy if (zp_is_dir && !zfs_dirempty(zp)) 743eda14cbcSMatt Macy return (SET_ERROR(ENOTEMPTY)); 744eda14cbcSMatt Macy 745eda14cbcSMatt Macy /* 746eda14cbcSMatt Macy * If we get here, we are going to try to remove the object. 747eda14cbcSMatt Macy * First try removing the name from the directory; if that 748eda14cbcSMatt Macy * fails, return the error. 749eda14cbcSMatt Macy */ 750eda14cbcSMatt Macy error = zfs_dropname(dzp, name, zp, tx, flag); 751eda14cbcSMatt Macy if (error != 0) { 752eda14cbcSMatt Macy return (error); 753eda14cbcSMatt Macy } 754eda14cbcSMatt Macy 755eda14cbcSMatt Macy if (zp->z_links <= zp_is_dir) { 756eda14cbcSMatt Macy zfs_panic_recover("zfs: link count on vnode %p is %u, " 757eda14cbcSMatt Macy "should be at least %u", zp->z_vnode, 758eda14cbcSMatt Macy (int)zp->z_links, 759eda14cbcSMatt Macy zp_is_dir + 1); 760eda14cbcSMatt Macy zp->z_links = zp_is_dir + 1; 761eda14cbcSMatt Macy } 762eda14cbcSMatt Macy if (--zp->z_links == zp_is_dir) { 763eda14cbcSMatt Macy zp->z_unlinked = B_TRUE; 764eda14cbcSMatt Macy zp->z_links = 0; 765eda14cbcSMatt Macy unlinked = B_TRUE; 766eda14cbcSMatt Macy } else { 767eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 768eda14cbcSMatt Macy NULL, &ctime, sizeof (ctime)); 769eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 770eda14cbcSMatt Macy NULL, &zp->z_pflags, sizeof (zp->z_pflags)); 771eda14cbcSMatt Macy zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, 772eda14cbcSMatt Macy ctime); 773eda14cbcSMatt Macy } 774eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 775eda14cbcSMatt Macy NULL, &zp->z_links, sizeof (zp->z_links)); 776eda14cbcSMatt Macy error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 777eda14cbcSMatt Macy count = 0; 778eda14cbcSMatt Macy ASSERT0(error); 779eda14cbcSMatt Macy } else { 78016038816SMartin Matuska ASSERT(!zp->z_unlinked); 781eda14cbcSMatt Macy error = zfs_dropname(dzp, name, zp, tx, flag); 782eda14cbcSMatt Macy if (error != 0) 783eda14cbcSMatt Macy return (error); 784eda14cbcSMatt Macy } 785eda14cbcSMatt Macy 786eda14cbcSMatt Macy dzp->z_size--; /* one dirent removed */ 787eda14cbcSMatt Macy dzp->z_links -= zp_is_dir; /* ".." link from zp */ 788eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 789eda14cbcSMatt Macy NULL, &dzp->z_links, sizeof (dzp->z_links)); 790eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 791eda14cbcSMatt Macy NULL, &dzp->z_size, sizeof (dzp->z_size)); 792eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 793eda14cbcSMatt Macy NULL, ctime, sizeof (ctime)); 794eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 795eda14cbcSMatt Macy NULL, mtime, sizeof (mtime)); 796eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 797eda14cbcSMatt Macy NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); 798eda14cbcSMatt Macy zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime); 799eda14cbcSMatt Macy error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 800eda14cbcSMatt Macy ASSERT0(error); 801eda14cbcSMatt Macy 802eda14cbcSMatt Macy if (unlinkedp != NULL) 803eda14cbcSMatt Macy *unlinkedp = unlinked; 804eda14cbcSMatt Macy else if (unlinked) 805eda14cbcSMatt Macy zfs_unlinked_add(zp, tx); 806eda14cbcSMatt Macy 807eda14cbcSMatt Macy return (0); 808eda14cbcSMatt Macy } 809eda14cbcSMatt Macy 810eda14cbcSMatt Macy /* 811eda14cbcSMatt Macy * Indicate whether the directory is empty. 812eda14cbcSMatt Macy */ 813eda14cbcSMatt Macy boolean_t 814eda14cbcSMatt Macy zfs_dirempty(znode_t *dzp) 815eda14cbcSMatt Macy { 816eda14cbcSMatt Macy return (dzp->z_size == 2); 817eda14cbcSMatt Macy } 818eda14cbcSMatt Macy 819eda14cbcSMatt Macy int 820eda14cbcSMatt Macy zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr) 821eda14cbcSMatt Macy { 822eda14cbcSMatt Macy zfsvfs_t *zfsvfs = zp->z_zfsvfs; 823eda14cbcSMatt Macy znode_t *xzp; 824eda14cbcSMatt Macy dmu_tx_t *tx; 825eda14cbcSMatt Macy int error; 826eda14cbcSMatt Macy zfs_acl_ids_t acl_ids; 827eda14cbcSMatt Macy boolean_t fuid_dirtied; 82816038816SMartin Matuska uint64_t parent __maybe_unused; 829eda14cbcSMatt Macy 830eda14cbcSMatt Macy *xvpp = NULL; 831eda14cbcSMatt Macy 832eda14cbcSMatt Macy if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, 833dbd5678dSMartin Matuska &acl_ids, NULL)) != 0) 834eda14cbcSMatt Macy return (error); 835eda14cbcSMatt Macy if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 0)) { 836eda14cbcSMatt Macy zfs_acl_ids_free(&acl_ids); 837eda14cbcSMatt Macy return (SET_ERROR(EDQUOT)); 838eda14cbcSMatt Macy } 839eda14cbcSMatt Macy 840ce4dcb97SMartin Matuska getnewvnode_reserve(); 841eda14cbcSMatt Macy 842eda14cbcSMatt Macy tx = dmu_tx_create(zfsvfs->z_os); 843eda14cbcSMatt Macy dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 844eda14cbcSMatt Macy ZFS_SA_BASE_ATTR_SIZE); 845eda14cbcSMatt Macy dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 846eda14cbcSMatt Macy dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 847eda14cbcSMatt Macy fuid_dirtied = zfsvfs->z_fuid_dirty; 848eda14cbcSMatt Macy if (fuid_dirtied) 849eda14cbcSMatt Macy zfs_fuid_txhold(zfsvfs, tx); 850eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 851eda14cbcSMatt Macy if (error) { 852eda14cbcSMatt Macy zfs_acl_ids_free(&acl_ids); 853eda14cbcSMatt Macy dmu_tx_abort(tx); 854eda14cbcSMatt Macy getnewvnode_drop_reserve(); 855eda14cbcSMatt Macy return (error); 856eda14cbcSMatt Macy } 857eda14cbcSMatt Macy zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); 858eda14cbcSMatt Macy 859eda14cbcSMatt Macy if (fuid_dirtied) 860eda14cbcSMatt Macy zfs_fuid_sync(zfsvfs, tx); 861eda14cbcSMatt Macy 86216038816SMartin Matuska ASSERT0(sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, 86316038816SMartin Matuska sizeof (parent))); 86416038816SMartin Matuska ASSERT3U(parent, ==, zp->z_id); 865eda14cbcSMatt Macy 86616038816SMartin Matuska VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, 867eda14cbcSMatt Macy sizeof (xzp->z_id), tx)); 868eda14cbcSMatt Macy 86916038816SMartin Matuska zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, 87016038816SMartin Matuska acl_ids.z_fuidp, vap); 871eda14cbcSMatt Macy 872eda14cbcSMatt Macy zfs_acl_ids_free(&acl_ids); 873eda14cbcSMatt Macy dmu_tx_commit(tx); 874eda14cbcSMatt Macy 875eda14cbcSMatt Macy getnewvnode_drop_reserve(); 876eda14cbcSMatt Macy 877eda14cbcSMatt Macy *xvpp = xzp; 878eda14cbcSMatt Macy 879eda14cbcSMatt Macy return (0); 880eda14cbcSMatt Macy } 881eda14cbcSMatt Macy 882eda14cbcSMatt Macy /* 883eda14cbcSMatt Macy * Return a znode for the extended attribute directory for zp. 884eda14cbcSMatt Macy * ** If the directory does not already exist, it is created ** 885eda14cbcSMatt Macy * 886eda14cbcSMatt Macy * IN: zp - znode to obtain attribute directory from 887eda14cbcSMatt Macy * cr - credentials of caller 888eda14cbcSMatt Macy * flags - flags from the VOP_LOOKUP call 889eda14cbcSMatt Macy * 890eda14cbcSMatt Macy * OUT: xzpp - pointer to extended attribute znode 891eda14cbcSMatt Macy * 892eda14cbcSMatt Macy * RETURN: 0 on success 893eda14cbcSMatt Macy * error number on failure 894eda14cbcSMatt Macy */ 895eda14cbcSMatt Macy int 896eda14cbcSMatt Macy zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags) 897eda14cbcSMatt Macy { 898eda14cbcSMatt Macy zfsvfs_t *zfsvfs = zp->z_zfsvfs; 899eda14cbcSMatt Macy znode_t *xzp; 900eda14cbcSMatt Macy vattr_t va; 901eda14cbcSMatt Macy int error; 902eda14cbcSMatt Macy top: 903eda14cbcSMatt Macy error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR); 904eda14cbcSMatt Macy if (error) 905eda14cbcSMatt Macy return (error); 906eda14cbcSMatt Macy 907eda14cbcSMatt Macy if (xzp != NULL) { 908eda14cbcSMatt Macy *xzpp = xzp; 909eda14cbcSMatt Macy return (0); 910eda14cbcSMatt Macy } 911eda14cbcSMatt Macy 912eda14cbcSMatt Macy 913eda14cbcSMatt Macy if (!(flags & CREATE_XATTR_DIR)) 914eda14cbcSMatt Macy return (SET_ERROR(ENOATTR)); 915eda14cbcSMatt Macy 916eda14cbcSMatt Macy if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 917eda14cbcSMatt Macy return (SET_ERROR(EROFS)); 918eda14cbcSMatt Macy } 919eda14cbcSMatt Macy 920eda14cbcSMatt Macy /* 921eda14cbcSMatt Macy * The ability to 'create' files in an attribute 922eda14cbcSMatt Macy * directory comes from the write_xattr permission on the base file. 923eda14cbcSMatt Macy * 924eda14cbcSMatt Macy * The ability to 'search' an attribute directory requires 925eda14cbcSMatt Macy * read_xattr permission on the base file. 926eda14cbcSMatt Macy * 927eda14cbcSMatt Macy * Once in a directory the ability to read/write attributes 928eda14cbcSMatt Macy * is controlled by the permissions on the attribute file. 929eda14cbcSMatt Macy */ 930eda14cbcSMatt Macy va.va_mask = AT_MODE | AT_UID | AT_GID; 931eda14cbcSMatt Macy va.va_type = VDIR; 932eda14cbcSMatt Macy va.va_mode = S_IFDIR | S_ISVTX | 0777; 933eda14cbcSMatt Macy zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); 934eda14cbcSMatt Macy 935eda14cbcSMatt Macy error = zfs_make_xattrdir(zp, &va, xzpp, cr); 936eda14cbcSMatt Macy 937eda14cbcSMatt Macy if (error == ERESTART) { 938eda14cbcSMatt Macy /* NB: we already did dmu_tx_wait() if necessary */ 939eda14cbcSMatt Macy goto top; 940eda14cbcSMatt Macy } 941eda14cbcSMatt Macy if (error == 0) 942ce4dcb97SMartin Matuska VOP_UNLOCK(ZTOV(*xzpp)); 943eda14cbcSMatt Macy 944eda14cbcSMatt Macy return (error); 945eda14cbcSMatt Macy } 946eda14cbcSMatt Macy 947eda14cbcSMatt Macy /* 948eda14cbcSMatt Macy * Decide whether it is okay to remove within a sticky directory. 949eda14cbcSMatt Macy * 950eda14cbcSMatt Macy * In sticky directories, write access is not sufficient; 951eda14cbcSMatt Macy * you can remove entries from a directory only if: 952eda14cbcSMatt Macy * 953eda14cbcSMatt Macy * you own the directory, 954eda14cbcSMatt Macy * you own the entry, 955eda14cbcSMatt Macy * the entry is a plain file and you have write access, 956eda14cbcSMatt Macy * or you are privileged (checked in secpolicy...). 957eda14cbcSMatt Macy * 958eda14cbcSMatt Macy * The function returns 0 if remove access is granted. 959eda14cbcSMatt Macy */ 960eda14cbcSMatt Macy int 961eda14cbcSMatt Macy zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) 962eda14cbcSMatt Macy { 963eda14cbcSMatt Macy uid_t uid; 964eda14cbcSMatt Macy uid_t downer; 965eda14cbcSMatt Macy uid_t fowner; 966eda14cbcSMatt Macy zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 967eda14cbcSMatt Macy 968eda14cbcSMatt Macy if (zdp->z_zfsvfs->z_replay) 969eda14cbcSMatt Macy return (0); 970eda14cbcSMatt Macy 971eda14cbcSMatt Macy if ((zdp->z_mode & S_ISVTX) == 0) 972eda14cbcSMatt Macy return (0); 973eda14cbcSMatt Macy 974eda14cbcSMatt Macy downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); 975eda14cbcSMatt Macy fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); 976eda14cbcSMatt Macy 977eda14cbcSMatt Macy if ((uid = crgetuid(cr)) == downer || uid == fowner || 978eda14cbcSMatt Macy (ZTOV(zp)->v_type == VREG && 979dbd5678dSMartin Matuska zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL) == 0)) 980eda14cbcSMatt Macy return (0); 981eda14cbcSMatt Macy else 982eda14cbcSMatt Macy return (secpolicy_vnode_remove(ZTOV(zp), cr)); 983eda14cbcSMatt Macy } 984