1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/time.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/resource.h>
32 #include <sys/vfs.h>
33 #include <sys/vnode.h>
34 #include <sys/file.h>
35 #include <sys/kmem.h>
36 #include <sys/uio.h>
37 #include <sys/cmn_err.h>
38 #include <sys/errno.h>
39 #include <sys/stat.h>
40 #include <sys/unistd.h>
41 #include <sys/sunddi.h>
42 #include <sys/random.h>
43 #include <sys/policy.h>
44 #ifdef __FreeBSD__
45 #include <sys/kcondvar.h>
46 #include <sys/callb.h>
47 #include <sys/smp.h>
48 #endif
49 #include <sys/zfs_dir.h>
50 #include <sys/zfs_acl.h>
51 #include <sys/fs/zfs.h>
52 #include <sys/zap.h>
53 #include <sys/dmu.h>
54 #include <sys/atomic.h>
55 #include <sys/zfs_ctldir.h>
56 #include <sys/zfs_fuid.h>
57 #include <sys/sa.h>
58 #include <sys/zfs_sa.h>
59 #include <sys/dnlc.h>
60 #include <sys/extdirent.h>
61
62 /*
63 * zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups
64 * of names after deciding which is the appropriate lookup interface.
65 */
66 static int
zfs_match_find(zfsvfs_t * zfsvfs,znode_t * dzp,const char * name,boolean_t exact,uint64_t * zoid)67 zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
68 boolean_t exact, uint64_t *zoid)
69 {
70 int error;
71
72 if (zfsvfs->z_norm) {
73 matchtype_t mt = exact? MT_EXACT : MT_FIRST;
74
75 /*
76 * In the non-mixed case we only expect there would ever
77 * be one match, but we need to use the normalizing lookup.
78 */
79 error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
80 zoid, mt, NULL, 0, NULL);
81 } else {
82 error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
83 }
84 *zoid = ZFS_DIRENT_OBJ(*zoid);
85
86 return (error);
87 }
88
89 /*
90 * Look up a directory entry under a locked vnode.
91 * dvp being locked gives us a guarantee that there are no concurrent
92 * modification of the directory and, thus, if a node can be found in
93 * the directory, then it must not be unlinked.
94 *
95 * Input arguments:
96 * dzp - znode for directory
97 * name - name of entry to lock
98 * flag - ZNEW: if the entry already exists, fail with EEXIST.
99 * ZEXISTS: if the entry does not exist, fail with ENOENT.
100 * ZXATTR: we want dzp's xattr directory
101 *
102 * Output arguments:
103 * zpp - pointer to the znode for the entry (NULL if there isn't one)
104 *
105 * Return value: 0 on success or errno on failure.
106 *
107 * NOTE: Always checks for, and rejects, '.' and '..'.
108 */
109 int
zfs_dirent_lookup(znode_t * dzp,const char * name,znode_t ** zpp,int flag)110 zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag)
111 {
112 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
113 boolean_t exact;
114 uint64_t zoid;
115 vnode_t *vp = NULL;
116 int error = 0;
117
118 ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
119
120 *zpp = NULL;
121
122 /*
123 * Verify that we are not trying to lock '.', '..', or '.zfs'
124 */
125 if (name[0] == '.' &&
126 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
127 zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
128 return (SET_ERROR(EEXIST));
129
130 /*
131 * Case sensitivity and normalization preferences are set when
132 * the file system is created. These are stored in the
133 * zfsvfs->z_case and zfsvfs->z_norm fields. These choices
134 * affect how we perform zap lookups.
135 *
136 * Decide if exact matches should be requested when performing
137 * a zap lookup on file systems supporting case-insensitive
138 * access.
139 *
140 * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
141 * because in that case MT_EXACT and MT_FIRST should produce exactly
142 * the same result.
143 */
144 exact = zfsvfs->z_case == ZFS_CASE_MIXED;
145
146 if (dzp->z_unlinked && !(flag & ZXATTR))
147 return (ENOENT);
148 if (flag & ZXATTR) {
149 error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
150 sizeof (zoid));
151 if (error == 0)
152 error = (zoid == 0 ? ENOENT : 0);
153 } else {
154 error = zfs_match_find(zfsvfs, dzp, name, exact, &zoid);
155 }
156 if (error) {
157 if (error != ENOENT || (flag & ZEXISTS)) {
158 return (error);
159 }
160 } else {
161 if (flag & ZNEW) {
162 return (SET_ERROR(EEXIST));
163 }
164 error = zfs_zget(zfsvfs, zoid, zpp);
165 if (error)
166 return (error);
167 ASSERT(!(*zpp)->z_unlinked);
168 }
169
170 return (0);
171 }
172
173 static int
zfs_dd_lookup(znode_t * dzp,znode_t ** zpp)174 zfs_dd_lookup(znode_t *dzp, znode_t **zpp)
175 {
176 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
177 znode_t *zp;
178 uint64_t parent;
179 int error;
180
181 ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
182 ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
183
184 if (dzp->z_unlinked)
185 return (ENOENT);
186
187 if ((error = sa_lookup(dzp->z_sa_hdl,
188 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
189 return (error);
190
191 error = zfs_zget(zfsvfs, parent, &zp);
192 if (error == 0)
193 *zpp = zp;
194 return (error);
195 }
196
197 int
zfs_dirlook(znode_t * dzp,const char * name,znode_t ** zpp)198 zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp)
199 {
200 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
201 znode_t *zp;
202 int error = 0;
203
204 ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
205 ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
206
207 if (dzp->z_unlinked)
208 return (SET_ERROR(ENOENT));
209
210 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
211 *zpp = dzp;
212 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
213 error = zfs_dd_lookup(dzp, zpp);
214 } else {
215 error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS);
216 if (error == 0) {
217 dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
218 *zpp = zp;
219 }
220 }
221 return (error);
222 }
223
224 /*
225 * unlinked Set (formerly known as the "delete queue") Error Handling
226 *
227 * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
228 * don't specify the name of the entry that we will be manipulating. We
229 * also fib and say that we won't be adding any new entries to the
230 * unlinked set, even though we might (this is to lower the minimum file
231 * size that can be deleted in a full filesystem). So on the small
232 * chance that the nlink list is using a fat zap (ie. has more than
233 * 2000 entries), we *may* not pre-read a block that's needed.
234 * Therefore it is remotely possible for some of the assertions
235 * regarding the unlinked set below to fail due to i/o error. On a
236 * nondebug system, this will result in the space being leaked.
237 */
238 void
zfs_unlinked_add(znode_t * zp,dmu_tx_t * tx)239 zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
240 {
241 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
242
243 ASSERT(zp->z_unlinked);
244 ASSERT(zp->z_links == 0);
245
246 VERIFY3U(0, ==,
247 zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
248 }
249
250 /*
251 * Clean up any znodes that had no links when we either crashed or
252 * (force) umounted the file system.
253 */
254 void
zfs_unlinked_drain(zfsvfs_t * zfsvfs)255 zfs_unlinked_drain(zfsvfs_t *zfsvfs)
256 {
257 zap_cursor_t zc;
258 zap_attribute_t zap;
259 dmu_object_info_t doi;
260 znode_t *zp;
261 int error;
262
263 /*
264 * Interate over the contents of the unlinked set.
265 */
266 for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
267 zap_cursor_retrieve(&zc, &zap) == 0;
268 zap_cursor_advance(&zc)) {
269
270 /*
271 * See what kind of object we have in list
272 */
273
274 error = dmu_object_info(zfsvfs->z_os,
275 zap.za_first_integer, &doi);
276 if (error != 0)
277 continue;
278
279 ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
280 (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
281 /*
282 * We need to re-mark these list entries for deletion,
283 * so we pull them back into core and set zp->z_unlinked.
284 */
285 error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
286
287 /*
288 * We may pick up znodes that are already marked for deletion.
289 * This could happen during the purge of an extended attribute
290 * directory. All we need to do is skip over them, since they
291 * are already in the system marked z_unlinked.
292 */
293 if (error != 0)
294 continue;
295
296 vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
297 zp->z_unlinked = B_TRUE;
298 vput(ZTOV(zp));
299 }
300 zap_cursor_fini(&zc);
301 }
302
303 /*
304 * Delete the entire contents of a directory. Return a count
305 * of the number of entries that could not be deleted. If we encounter
306 * an error, return a count of at least one so that the directory stays
307 * in the unlinked set.
308 *
309 * NOTE: this function assumes that the directory is inactive,
310 * so there is no need to lock its entries before deletion.
311 * Also, it assumes the directory contents is *only* regular
312 * files.
313 */
314 static int
zfs_purgedir(znode_t * dzp)315 zfs_purgedir(znode_t *dzp)
316 {
317 zap_cursor_t zc;
318 zap_attribute_t zap;
319 znode_t *xzp;
320 dmu_tx_t *tx;
321 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
322 int skipped = 0;
323 int error;
324
325 for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
326 (error = zap_cursor_retrieve(&zc, &zap)) == 0;
327 zap_cursor_advance(&zc)) {
328 error = zfs_zget(zfsvfs,
329 ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
330 if (error) {
331 skipped += 1;
332 continue;
333 }
334
335 vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
336 ASSERT((ZTOV(xzp)->v_type == VREG) ||
337 (ZTOV(xzp)->v_type == VLNK));
338
339 tx = dmu_tx_create(zfsvfs->z_os);
340 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
341 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
342 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
343 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
344 /* Is this really needed ? */
345 zfs_sa_upgrade_txholds(tx, xzp);
346 dmu_tx_mark_netfree(tx);
347 error = dmu_tx_assign(tx, TXG_WAIT);
348 if (error) {
349 dmu_tx_abort(tx);
350 vput(ZTOV(xzp));
351 skipped += 1;
352 continue;
353 }
354
355 error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL);
356 if (error)
357 skipped += 1;
358 dmu_tx_commit(tx);
359
360 vput(ZTOV(xzp));
361 }
362 zap_cursor_fini(&zc);
363 if (error != ENOENT)
364 skipped += 1;
365 return (skipped);
366 }
367
368 void
zfs_rmnode(znode_t * zp)369 zfs_rmnode(znode_t *zp)
370 {
371 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
372 objset_t *os = zfsvfs->z_os;
373 znode_t *xzp = NULL;
374 dmu_tx_t *tx;
375 uint64_t acl_obj;
376 uint64_t xattr_obj;
377 int error;
378
379 ASSERT(zp->z_links == 0);
380 #ifndef __NetBSD__
381 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
382 #endif
383
384 /*
385 * If this is an attribute directory, purge its contents.
386 */
387 if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
388 (zp->z_pflags & ZFS_XATTR)) {
389 if (zfs_purgedir(zp) != 0) {
390 /*
391 * Not enough space to delete some xattrs.
392 * Leave it in the unlinked set.
393 */
394 zfs_znode_dmu_fini(zp);
395 zfs_znode_free(zp);
396 return;
397 }
398 } else {
399 /*
400 * Free up all the data in the file. We don't do this for
401 * XATTR directories because we need truncate and remove to be
402 * in the same tx, like in zfs_znode_delete(). Otherwise, if
403 * we crash here we'll end up with an inconsistent truncated
404 * zap object in the delete queue. Note a truncated file is
405 * harmless since it only contains user data.
406 */
407 error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
408 if (error) {
409 /*
410 * Not enough space. Leave the file in the unlinked
411 * set.
412 */
413 zfs_znode_dmu_fini(zp);
414 zfs_znode_free(zp);
415 return;
416 }
417 }
418
419 /*
420 * If the file has extended attributes, we're going to unlink
421 * the xattr dir.
422 */
423 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
424 &xattr_obj, sizeof (xattr_obj));
425 if (error == 0 && xattr_obj) {
426 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
427 ASSERT3S(error, ==, 0);
428 vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
429 }
430
431 acl_obj = zfs_external_acl(zp);
432
433 /*
434 * Set up the final transaction.
435 */
436 tx = dmu_tx_create(os);
437 dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
438 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
439 if (xzp) {
440 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
441 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
442 }
443 if (acl_obj)
444 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
445
446 zfs_sa_upgrade_txholds(tx, zp);
447 error = dmu_tx_assign(tx, TXG_WAIT);
448 if (error) {
449 /*
450 * Not enough space to delete the file. Leave it in the
451 * unlinked set, leaking it until the fs is remounted (at
452 * which point we'll call zfs_unlinked_drain() to process it).
453 */
454 dmu_tx_abort(tx);
455 zfs_znode_dmu_fini(zp);
456 zfs_znode_free(zp);
457 goto out;
458 }
459
460 if (xzp) {
461 ASSERT(error == 0);
462 xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
463 xzp->z_links = 0; /* no more links to it */
464 VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
465 &xzp->z_links, sizeof (xzp->z_links), tx));
466 zfs_unlinked_add(xzp, tx);
467 }
468
469 /* Remove this znode from the unlinked set */
470 VERIFY3U(0, ==,
471 zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
472
473 zfs_znode_delete(zp, tx);
474
475 dmu_tx_commit(tx);
476 out:
477 if (xzp)
478 vput(ZTOV(xzp));
479 }
480
481 static uint64_t
zfs_dirent(znode_t * zp,uint64_t mode)482 zfs_dirent(znode_t *zp, uint64_t mode)
483 {
484 uint64_t de = zp->z_id;
485
486 if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
487 de |= IFTODT(mode) << 60;
488 return (de);
489 }
490
491 /*
492 * Link zp into dzp. Can only fail if zp has been unlinked.
493 */
494 int
zfs_link_create(znode_t * dzp,const char * name,znode_t * zp,dmu_tx_t * tx,int flag)495 zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
496 int flag)
497 {
498 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
499 vnode_t *vp = ZTOV(zp);
500 uint64_t value;
501 int zp_is_dir = (vp->v_type == VDIR);
502 sa_bulk_attr_t bulk[5];
503 uint64_t mtime[2], ctime[2];
504 int count = 0;
505 int error;
506
507 ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
508 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
509 #if 0
510 if (zp_is_dir) {
511 error = 0;
512 if (dzp->z_links >= LINK_MAX)
513 error = SET_ERROR(EMLINK);
514 return (error);
515 }
516 #endif
517 if (!(flag & ZRENAMING)) {
518 if (zp->z_unlinked) { /* no new links to unlinked zp */
519 ASSERT(!(flag & (ZNEW | ZEXISTS)));
520 return (SET_ERROR(ENOENT));
521 }
522 #if 0
523 if (zp->z_links >= LINK_MAX) {
524 return (SET_ERROR(EMLINK));
525 }
526 #endif
527 zp->z_links++;
528 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
529 &zp->z_links, sizeof (zp->z_links));
530
531 } else {
532 ASSERT(zp->z_unlinked == 0);
533 }
534 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
535 &dzp->z_id, sizeof (dzp->z_id));
536 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
537 &zp->z_pflags, sizeof (zp->z_pflags));
538
539 if (!(flag & ZNEW)) {
540 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
541 ctime, sizeof (ctime));
542 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
543 ctime, B_TRUE);
544 }
545 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
546 ASSERT0(error);
547
548 dzp->z_size++;
549 dzp->z_links += zp_is_dir;
550 count = 0;
551 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
552 &dzp->z_size, sizeof (dzp->z_size));
553 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
554 &dzp->z_links, sizeof (dzp->z_links));
555 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
556 mtime, sizeof (mtime));
557 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
558 ctime, sizeof (ctime));
559 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
560 &dzp->z_pflags, sizeof (dzp->z_pflags));
561 zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
562 error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
563 ASSERT0(error);
564
565 value = zfs_dirent(zp, zp->z_mode);
566 error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name,
567 8, 1, &value, tx);
568 VERIFY0(error);
569
570 return (0);
571 }
572
573 static int
zfs_dropname(znode_t * dzp,const char * name,znode_t * zp,dmu_tx_t * tx,int flag)574 zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
575 int flag)
576 {
577 int error;
578
579 if (zp->z_zfsvfs->z_norm) {
580 if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED)
581 error = zap_remove_norm(zp->z_zfsvfs->z_os,
582 dzp->z_id, name, MT_EXACT, tx);
583 else
584 error = zap_remove_norm(zp->z_zfsvfs->z_os,
585 dzp->z_id, name, MT_FIRST, tx);
586 } else {
587 error = zap_remove(zp->z_zfsvfs->z_os,
588 dzp->z_id, name, tx);
589 }
590
591 return (error);
592 }
593
594 /*
595 * Unlink zp from dzp, and mark zp for deletion if this was the last link.
596 * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
597 * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
598 * If it's non-NULL, we use it to indicate whether the znode needs deletion,
599 * and it's the caller's job to do it.
600 */
601 int
zfs_link_destroy(znode_t * dzp,const char * name,znode_t * zp,dmu_tx_t * tx,int flag,boolean_t * unlinkedp)602 zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
603 int flag, boolean_t *unlinkedp)
604 {
605 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
606 vnode_t *vp = ZTOV(zp);
607 int zp_is_dir = (vp->v_type == VDIR);
608 boolean_t unlinked = B_FALSE;
609 sa_bulk_attr_t bulk[5];
610 uint64_t mtime[2], ctime[2];
611 int count = 0;
612 int error;
613
614 ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
615 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
616
617 if (!(flag & ZRENAMING)) {
618
619 if (zp_is_dir && !zfs_dirempty(zp)) {
620 #ifdef illumos
621 return (SET_ERROR(EEXIST));
622 #else
623 return (SET_ERROR(ENOTEMPTY));
624 #endif
625 }
626
627 /*
628 * If we get here, we are going to try to remove the object.
629 * First try removing the name from the directory; if that
630 * fails, return the error.
631 */
632 error = zfs_dropname(dzp, name, zp, tx, flag);
633 if (error != 0) {
634 return (error);
635 }
636
637 if (zp->z_links <= zp_is_dir) {
638 zfs_panic_recover("zfs: link count on vnode %p is %u, "
639 "should be at least %u", zp->z_vnode,
640 (int)zp->z_links,
641 zp_is_dir + 1);
642 zp->z_links = zp_is_dir + 1;
643 }
644 if (--zp->z_links == zp_is_dir) {
645 zp->z_unlinked = B_TRUE;
646 zp->z_links = 0;
647 unlinked = B_TRUE;
648 } else {
649 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
650 NULL, &ctime, sizeof (ctime));
651 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
652 NULL, &zp->z_pflags, sizeof (zp->z_pflags));
653 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
654 B_TRUE);
655 }
656 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
657 NULL, &zp->z_links, sizeof (zp->z_links));
658 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
659 count = 0;
660 ASSERT0(error);
661 } else {
662 ASSERT(zp->z_unlinked == 0);
663 error = zfs_dropname(dzp, name, zp, tx, flag);
664 if (error != 0)
665 return (error);
666 }
667
668 dzp->z_size--; /* one dirent removed */
669 dzp->z_links -= zp_is_dir; /* ".." link from zp */
670 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
671 NULL, &dzp->z_links, sizeof (dzp->z_links));
672 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
673 NULL, &dzp->z_size, sizeof (dzp->z_size));
674 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
675 NULL, ctime, sizeof (ctime));
676 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
677 NULL, mtime, sizeof (mtime));
678 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
679 NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
680 zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
681 error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
682 ASSERT0(error);
683
684 if (unlinkedp != NULL)
685 *unlinkedp = unlinked;
686 else if (unlinked)
687 zfs_unlinked_add(zp, tx);
688
689 return (0);
690 }
691
692 /*
693 * Indicate whether the directory is empty.
694 */
695 boolean_t
zfs_dirempty(znode_t * dzp)696 zfs_dirempty(znode_t *dzp)
697 {
698 return (dzp->z_size == 2);
699 }
700
701 int
zfs_make_xattrdir(znode_t * zp,vattr_t * vap,vnode_t ** xvpp,cred_t * cr)702 zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
703 {
704 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
705 znode_t *xzp;
706 dmu_tx_t *tx;
707 int error;
708 zfs_acl_ids_t acl_ids;
709 boolean_t fuid_dirtied;
710 uint64_t parent;
711
712 *xvpp = NULL;
713
714 /*
715 * In FreeBSD, access checking for creating an EA is being done
716 * in zfs_setextattr(),
717 */
718 #ifndef __FreeBSD_kernel__
719 if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
720 return (error);
721 #endif
722
723 if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
724 &acl_ids)) != 0)
725 return (error);
726 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
727 zfs_acl_ids_free(&acl_ids);
728 return (SET_ERROR(EDQUOT));
729 }
730
731 getnewvnode_reserve(1);
732
733 tx = dmu_tx_create(zfsvfs->z_os);
734 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
735 ZFS_SA_BASE_ATTR_SIZE);
736 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
737 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
738 fuid_dirtied = zfsvfs->z_fuid_dirty;
739 if (fuid_dirtied)
740 zfs_fuid_txhold(zfsvfs, tx);
741 error = dmu_tx_assign(tx, TXG_WAIT);
742 if (error) {
743 zfs_acl_ids_free(&acl_ids);
744 dmu_tx_abort(tx);
745 return (error);
746 }
747 zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
748
749 if (fuid_dirtied)
750 zfs_fuid_sync(zfsvfs, tx);
751
752 #ifdef DEBUG
753 error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
754 &parent, sizeof (parent));
755 ASSERT(error == 0 && parent == zp->z_id);
756 #endif
757
758 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
759 sizeof (xzp->z_id), tx));
760
761 (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
762 xzp, "", NULL, acl_ids.z_fuidp, vap);
763
764 zfs_acl_ids_free(&acl_ids);
765 dmu_tx_commit(tx);
766
767 getnewvnode_drop_reserve();
768
769 *xvpp = ZTOV(xzp);
770
771 return (0);
772 }
773
774 /*
775 * Return a znode for the extended attribute directory for zp.
776 * ** If the directory does not already exist, it is created **
777 *
778 * IN: zp - znode to obtain attribute directory from
779 * cr - credentials of caller
780 * flags - flags from the VOP_LOOKUP call
781 *
782 * OUT: xzpp - pointer to extended attribute znode
783 *
784 * RETURN: 0 on success
785 * error number on failure
786 */
787 int
zfs_get_xattrdir(znode_t * zp,vnode_t ** xvpp,cred_t * cr,int flags)788 zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
789 {
790 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
791 znode_t *xzp;
792 vattr_t va;
793 int error;
794 top:
795 error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR);
796 if (error)
797 return (error);
798
799 if (xzp != NULL) {
800 *xvpp = ZTOV(xzp);
801 return (0);
802 }
803
804
805 if (!(flags & CREATE_XATTR_DIR)) {
806 #ifdef illumos
807 return (SET_ERROR(ENOENT));
808 #else
809 return (SET_ERROR(ENOATTR));
810 #endif
811 }
812
813 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
814 return (SET_ERROR(EROFS));
815 }
816
817 /*
818 * The ability to 'create' files in an attribute
819 * directory comes from the write_xattr permission on the base file.
820 *
821 * The ability to 'search' an attribute directory requires
822 * read_xattr permission on the base file.
823 *
824 * Once in a directory the ability to read/write attributes
825 * is controlled by the permissions on the attribute file.
826 */
827 va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
828 va.va_type = VDIR;
829 va.va_mode = S_IFDIR | S_ISVTX | 0777;
830 zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
831
832 error = zfs_make_xattrdir(zp, &va, xvpp, cr);
833
834 if (error == ERESTART) {
835 /* NB: we already did dmu_tx_wait() if necessary */
836 goto top;
837 }
838 if (error == 0)
839 VOP_UNLOCK(*xvpp, 0);
840
841 return (error);
842 }
843
844 /*
845 * Decide whether it is okay to remove within a sticky directory.
846 *
847 * In sticky directories, write access is not sufficient;
848 * you can remove entries from a directory only if:
849 *
850 * you own the directory,
851 * you own the entry,
852 * the entry is a plain file and you have write access,
853 * or you are privileged (checked in secpolicy...).
854 *
855 * The function returns 0 if remove access is granted.
856 */
857 int
zfs_sticky_remove_access(znode_t * zdp,znode_t * zp,cred_t * cr)858 zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
859 {
860 uid_t uid;
861 uid_t downer;
862 uid_t fowner;
863 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
864
865 if (zdp->z_zfsvfs->z_replay)
866 return (0);
867
868 if ((zdp->z_mode & S_ISVTX) == 0)
869 return (0);
870
871 downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
872 fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
873
874 if ((uid = crgetuid(cr)) == downer || uid == fowner ||
875 (ZTOV(zp)->v_type == VREG &&
876 zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
877 return (0);
878 else
879 return (secpolicy_vnode_remove(ZTOV(zp), cr));
880 }
881