xref: /freebsd-src/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c (revision ce4dcb97ca433b2a2f03fbae957dae0ff16f6f51)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2014 Integros [integros.com]
26  * Copyright 2017 Nexenta Systems, Inc.
27  */
28 
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
31 
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <security/mac/mac_framework.h>
38 #include <sys/vfs.h>
39 #include <sys/endian.h>
40 #include <sys/vm.h>
41 #include <sys/vnode.h>
42 #include <sys/smr.h>
43 #include <sys/dirent.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/kmem.h>
47 #include <sys/taskq.h>
48 #include <sys/uio.h>
49 #include <sys/atomic.h>
50 #include <sys/namei.h>
51 #include <sys/mman.h>
52 #include <sys/cmn_err.h>
53 #include <sys/kdb.h>
54 #include <sys/sysproto.h>
55 #include <sys/errno.h>
56 #include <sys/unistd.h>
57 #include <sys/zfs_dir.h>
58 #include <sys/zfs_ioctl.h>
59 #include <sys/fs/zfs.h>
60 #include <sys/dmu.h>
61 #include <sys/dmu_objset.h>
62 #include <sys/spa.h>
63 #include <sys/txg.h>
64 #include <sys/dbuf.h>
65 #include <sys/zap.h>
66 #include <sys/sa.h>
67 #include <sys/policy.h>
68 #include <sys/sunddi.h>
69 #include <sys/filio.h>
70 #include <sys/sid.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_fuid.h>
73 #include <sys/zfs_quota.h>
74 #include <sys/zfs_sa.h>
75 #include <sys/zfs_rlock.h>
76 #include <sys/bio.h>
77 #include <sys/buf.h>
78 #include <sys/sched.h>
79 #include <sys/acl.h>
80 #include <sys/vmmeter.h>
81 #include <vm/vm_param.h>
82 #include <sys/zil.h>
83 #include <sys/zfs_vnops.h>
84 #include <sys/module.h>
85 #include <sys/sysent.h>
86 #include <sys/dmu_impl.h>
87 #include <sys/brt.h>
88 #include <sys/zfeature.h>
89 
90 #include <vm/vm_object.h>
91 
92 #include <sys/extattr.h>
93 #include <sys/priv.h>
94 
95 #ifndef VN_OPEN_INVFS
96 #define	VN_OPEN_INVFS	0x0
97 #endif
98 
99 VFS_SMR_DECLARE;
100 
101 #ifdef DEBUG_VFS_LOCKS
102 #define	VNCHECKREF(vp)				  \
103 	VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp,	\
104 	    ("%s: wrong ref counts", __func__));
105 #else
106 #define	VNCHECKREF(vp)
107 #endif
108 
109 #if __FreeBSD_version >= 1400045
110 typedef uint64_t cookie_t;
111 #else
112 typedef ulong_t cookie_t;
113 #endif
114 
115 /*
116  * Programming rules.
117  *
118  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
119  * properly lock its in-core state, create a DMU transaction, do the work,
120  * record this work in the intent log (ZIL), commit the DMU transaction,
121  * and wait for the intent log to commit if it is a synchronous operation.
122  * Moreover, the vnode ops must work in both normal and log replay context.
123  * The ordering of events is important to avoid deadlocks and references
124  * to freed memory.  The example below illustrates the following Big Rules:
125  *
126  *  (1)	A check must be made in each zfs thread for a mounted file system.
127  *	This is done avoiding races using zfs_enter(zfsvfs).
128  *	A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
129  *	must be checked with zfs_verify_zp(zp).  Both of these macros
130  *	can return EIO from the calling function.
131  *
132  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
133  *	(if necessary) and zfs_exit(). This is for 3 reasons:
134  *	First, if it's the last reference, the vnode/znode
135  *	can be freed, so the zp may point to freed memory.  Second, the last
136  *	reference will call zfs_zinactive(), which may induce a lot of work --
137  *	pushing cached pages (which acquires range locks) and syncing out
138  *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
139  *	which could deadlock the system if you were already holding one.
140  *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
141  *
142  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
143  *	as they can span dmu_tx_assign() calls.
144  *
145  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
146  *      dmu_tx_assign().  This is critical because we don't want to block
147  *      while holding locks.
148  *
149  *	If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
150  *	reduces lock contention and CPU usage when we must wait (note that if
151  *	throughput is constrained by the storage, nearly every transaction
152  *	must wait).
153  *
154  *      Note, in particular, that if a lock is sometimes acquired before
155  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
156  *      to use a non-blocking assign can deadlock the system.  The scenario:
157  *
158  *	Thread A has grabbed a lock before calling dmu_tx_assign().
159  *	Thread B is in an already-assigned tx, and blocks for this lock.
160  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
161  *	forever, because the previous txg can't quiesce until B's tx commits.
162  *
163  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
164  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
165  *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
166  *	to indicate that this operation has already called dmu_tx_wait().
167  *	This will ensure that we don't retry forever, waiting a short bit
168  *	each time.
169  *
170  *  (5)	If the operation succeeded, generate the intent log entry for it
171  *	before dropping locks.  This ensures that the ordering of events
172  *	in the intent log matches the order in which they actually occurred.
173  *	During ZIL replay the zfs_log_* functions will update the sequence
174  *	number to indicate the zil transaction has replayed.
175  *
176  *  (6)	At the end of each vnode op, the DMU tx must always commit,
177  *	regardless of whether there were any errors.
178  *
179  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
180  *	to ensure that synchronous semantics are provided when necessary.
181  *
182  * In general, this is how things should be ordered in each vnode op:
183  *
184  *	zfs_enter(zfsvfs);		// exit if unmounted
185  * top:
186  *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
187  *	rw_enter(...);			// grab any other locks you need
188  *	tx = dmu_tx_create(...);	// get DMU tx
189  *	dmu_tx_hold_*();		// hold each object you might modify
190  *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
191  *	if (error) {
192  *		rw_exit(...);		// drop locks
193  *		zfs_dirent_unlock(dl);	// unlock directory entry
194  *		VN_RELE(...);		// release held vnodes
195  *		if (error == ERESTART) {
196  *			waited = B_TRUE;
197  *			dmu_tx_wait(tx);
198  *			dmu_tx_abort(tx);
199  *			goto top;
200  *		}
201  *		dmu_tx_abort(tx);	// abort DMU tx
202  *		zfs_exit(zfsvfs);	// finished in zfs
203  *		return (error);		// really out of space
204  *	}
205  *	error = do_real_work();		// do whatever this VOP does
206  *	if (error == 0)
207  *		zfs_log_*(...);		// on success, make ZIL entry
208  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
209  *	rw_exit(...);			// drop locks
210  *	zfs_dirent_unlock(dl);		// unlock directory entry
211  *	VN_RELE(...);			// release held vnodes
212  *	zil_commit(zilog, foid);	// synchronous when necessary
213  *	zfs_exit(zfsvfs);		// finished in zfs
214  *	return (error);			// done, report error
215  */
216 static int
217 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
218 {
219 	(void) cr;
220 	znode_t	*zp = VTOZ(*vpp);
221 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
222 	int error;
223 
224 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
225 		return (error);
226 
227 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
228 	    ((flag & FAPPEND) == 0)) {
229 		zfs_exit(zfsvfs, FTAG);
230 		return (SET_ERROR(EPERM));
231 	}
232 
233 	/*
234 	 * Keep a count of the synchronous opens in the znode.  On first
235 	 * synchronous open we must convert all previous async transactions
236 	 * into sync to keep correct ordering.
237 	 */
238 	if (flag & O_SYNC) {
239 		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
240 			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
241 	}
242 
243 	zfs_exit(zfsvfs, FTAG);
244 	return (0);
245 }
246 
247 static int
248 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
249 {
250 	(void) offset, (void) cr;
251 	znode_t	*zp = VTOZ(vp);
252 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
253 	int error;
254 
255 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
256 		return (error);
257 
258 	/* Decrement the synchronous opens in the znode */
259 	if ((flag & O_SYNC) && (count == 1))
260 		atomic_dec_32(&zp->z_sync_cnt);
261 
262 	zfs_exit(zfsvfs, FTAG);
263 	return (0);
264 }
265 
266 static int
267 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
268     int *rvalp)
269 {
270 	(void) flag, (void) cred, (void) rvalp;
271 	loff_t off;
272 	int error;
273 
274 	switch (com) {
275 	case _FIOFFS:
276 	{
277 		return (0);
278 
279 		/*
280 		 * The following two ioctls are used by bfu.  Faking out,
281 		 * necessary to avoid bfu errors.
282 		 */
283 	}
284 	case _FIOGDIO:
285 	case _FIOSDIO:
286 	{
287 		return (0);
288 	}
289 
290 	case F_SEEK_DATA:
291 	case F_SEEK_HOLE:
292 	{
293 		off = *(offset_t *)data;
294 		/* offset parameter is in/out */
295 		error = zfs_holey(VTOZ(vp), com, &off);
296 		if (error)
297 			return (error);
298 		*(offset_t *)data = off;
299 		return (0);
300 	}
301 	}
302 	return (SET_ERROR(ENOTTY));
303 }
304 
305 static vm_page_t
306 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
307 {
308 	vm_object_t obj;
309 	vm_page_t pp;
310 	int64_t end;
311 
312 	/*
313 	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
314 	 * aligned boundaries, if the range is not aligned.  As a result a
315 	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
316 	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
317 	 * the whole page would be considered clean despite have some
318 	 * dirty data.
319 	 * For this reason we should shrink the range to DEV_BSIZE aligned
320 	 * boundaries before calling vm_page_clear_dirty.
321 	 */
322 	end = rounddown2(off + nbytes, DEV_BSIZE);
323 	off = roundup2(off, DEV_BSIZE);
324 	nbytes = end - off;
325 
326 	obj = vp->v_object;
327 	vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
328 	    VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
329 	    VM_ALLOC_IGN_SBUSY);
330 	if (pp != NULL) {
331 		ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
332 		vm_object_pip_add(obj, 1);
333 		pmap_remove_write(pp);
334 		if (nbytes != 0)
335 			vm_page_clear_dirty(pp, off, nbytes);
336 	}
337 	return (pp);
338 }
339 
340 static void
341 page_unbusy(vm_page_t pp)
342 {
343 
344 	vm_page_sunbusy(pp);
345 	vm_object_pip_wakeup(pp->object);
346 }
347 
348 static vm_page_t
349 page_hold(vnode_t *vp, int64_t start)
350 {
351 	vm_object_t obj;
352 	vm_page_t m;
353 
354 	obj = vp->v_object;
355 	vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
356 	    VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
357 	    VM_ALLOC_NOBUSY);
358 	return (m);
359 }
360 
361 static void
362 page_unhold(vm_page_t pp)
363 {
364 	vm_page_unwire(pp, PQ_ACTIVE);
365 }
366 
367 /*
368  * When a file is memory mapped, we must keep the IO data synchronized
369  * between the DMU cache and the memory mapped pages.  What this means:
370  *
371  * On Write:	If we find a memory mapped page, we write to *both*
372  *		the page and the dmu buffer.
373  */
374 void
375 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
376 {
377 	vm_object_t obj;
378 	struct sf_buf *sf;
379 	vnode_t *vp = ZTOV(zp);
380 	caddr_t va;
381 	int off;
382 
383 	ASSERT3P(vp->v_mount, !=, NULL);
384 	obj = vp->v_object;
385 	ASSERT3P(obj, !=, NULL);
386 
387 	off = start & PAGEOFFSET;
388 	vm_object_pip_add(obj, 1);
389 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
390 		vm_page_t pp;
391 		int nbytes = imin(PAGESIZE - off, len);
392 
393 		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
394 			va = zfs_map_page(pp, &sf);
395 			(void) dmu_read(os, zp->z_id, start + off, nbytes,
396 			    va + off, DMU_READ_PREFETCH);
397 			zfs_unmap_page(sf);
398 			page_unbusy(pp);
399 		}
400 		len -= nbytes;
401 		off = 0;
402 	}
403 	vm_object_pip_wakeup(obj);
404 }
405 
406 /*
407  * Read with UIO_NOCOPY flag means that sendfile(2) requests
408  * ZFS to populate a range of page cache pages with data.
409  *
410  * NOTE: this function could be optimized to pre-allocate
411  * all pages in advance, drain exclusive busy on all of them,
412  * map them into contiguous KVA region and populate them
413  * in one single dmu_read() call.
414  */
415 int
416 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
417 {
418 	vnode_t *vp = ZTOV(zp);
419 	objset_t *os = zp->z_zfsvfs->z_os;
420 	struct sf_buf *sf;
421 	vm_object_t obj;
422 	vm_page_t pp;
423 	int64_t start;
424 	caddr_t va;
425 	int len = nbytes;
426 	int error = 0;
427 
428 	ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
429 	ASSERT3P(vp->v_mount, !=, NULL);
430 	obj = vp->v_object;
431 	ASSERT3P(obj, !=, NULL);
432 	ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
433 
434 	for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
435 		int bytes = MIN(PAGESIZE, len);
436 
437 		pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
438 		    VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
439 		if (vm_page_none_valid(pp)) {
440 			va = zfs_map_page(pp, &sf);
441 			error = dmu_read(os, zp->z_id, start, bytes, va,
442 			    DMU_READ_PREFETCH);
443 			if (bytes != PAGESIZE && error == 0)
444 				memset(va + bytes, 0, PAGESIZE - bytes);
445 			zfs_unmap_page(sf);
446 			if (error == 0) {
447 				vm_page_valid(pp);
448 				vm_page_activate(pp);
449 				vm_page_sunbusy(pp);
450 			} else {
451 				zfs_vmobject_wlock(obj);
452 				if (!vm_page_wired(pp) && pp->valid == 0 &&
453 				    vm_page_busy_tryupgrade(pp))
454 					vm_page_free(pp);
455 				else
456 					vm_page_sunbusy(pp);
457 				zfs_vmobject_wunlock(obj);
458 			}
459 		} else {
460 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
461 			vm_page_sunbusy(pp);
462 		}
463 		if (error)
464 			break;
465 		zfs_uio_advance(uio, bytes);
466 		len -= bytes;
467 	}
468 	return (error);
469 }
470 
471 /*
472  * When a file is memory mapped, we must keep the IO data synchronized
473  * between the DMU cache and the memory mapped pages.  What this means:
474  *
475  * On Read:	We "read" preferentially from memory mapped pages,
476  *		else we default from the dmu buffer.
477  *
478  * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
479  *	 the file is memory mapped.
480  */
481 int
482 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
483 {
484 	vnode_t *vp = ZTOV(zp);
485 	vm_object_t obj;
486 	int64_t start;
487 	int len = nbytes;
488 	int off;
489 	int error = 0;
490 
491 	ASSERT3P(vp->v_mount, !=, NULL);
492 	obj = vp->v_object;
493 	ASSERT3P(obj, !=, NULL);
494 
495 	start = zfs_uio_offset(uio);
496 	off = start & PAGEOFFSET;
497 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
498 		vm_page_t pp;
499 		uint64_t bytes = MIN(PAGESIZE - off, len);
500 
501 		if ((pp = page_hold(vp, start))) {
502 			struct sf_buf *sf;
503 			caddr_t va;
504 
505 			va = zfs_map_page(pp, &sf);
506 			error = vn_io_fault_uiomove(va + off, bytes,
507 			    GET_UIO_STRUCT(uio));
508 			zfs_unmap_page(sf);
509 			page_unhold(pp);
510 		} else {
511 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
512 			    uio, bytes);
513 		}
514 		len -= bytes;
515 		off = 0;
516 		if (error)
517 			break;
518 	}
519 	return (error);
520 }
521 
522 int
523 zfs_write_simple(znode_t *zp, const void *data, size_t len,
524     loff_t pos, size_t *presid)
525 {
526 	int error = 0;
527 	ssize_t resid;
528 
529 	error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
530 	    UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
531 
532 	if (error) {
533 		return (SET_ERROR(error));
534 	} else if (presid == NULL) {
535 		if (resid != 0) {
536 			error = SET_ERROR(EIO);
537 		}
538 	} else {
539 		*presid = resid;
540 	}
541 	return (error);
542 }
543 
544 void
545 zfs_zrele_async(znode_t *zp)
546 {
547 	vnode_t *vp = ZTOV(zp);
548 	objset_t *os = ITOZSB(vp)->z_os;
549 
550 	VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
551 }
552 
553 static int
554 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
555 {
556 	int error;
557 
558 	*vpp = arg;
559 	error = vn_lock(*vpp, lkflags);
560 	if (error != 0)
561 		vrele(*vpp);
562 	return (error);
563 }
564 
565 static int
566 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
567 {
568 	znode_t *zdp = VTOZ(dvp);
569 	zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
570 	int error;
571 	int ltype;
572 
573 	if (zfsvfs->z_replay == B_FALSE)
574 		ASSERT_VOP_LOCKED(dvp, __func__);
575 
576 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
577 		ASSERT3P(dvp, ==, vp);
578 		vref(dvp);
579 		ltype = lkflags & LK_TYPE_MASK;
580 		if (ltype != VOP_ISLOCKED(dvp)) {
581 			if (ltype == LK_EXCLUSIVE)
582 				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
583 			else /* if (ltype == LK_SHARED) */
584 				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
585 
586 			/*
587 			 * Relock for the "." case could leave us with
588 			 * reclaimed vnode.
589 			 */
590 			if (VN_IS_DOOMED(dvp)) {
591 				vrele(dvp);
592 				return (SET_ERROR(ENOENT));
593 			}
594 		}
595 		return (0);
596 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
597 		/*
598 		 * Note that in this case, dvp is the child vnode, and we
599 		 * are looking up the parent vnode - exactly reverse from
600 		 * normal operation.  Unlocking dvp requires some rather
601 		 * tricky unlock/relock dance to prevent mp from being freed;
602 		 * use vn_vget_ino_gen() which takes care of all that.
603 		 *
604 		 * XXX Note that there is a time window when both vnodes are
605 		 * unlocked.  It is possible, although highly unlikely, that
606 		 * during that window the parent-child relationship between
607 		 * the vnodes may change, for example, get reversed.
608 		 * In that case we would have a wrong lock order for the vnodes.
609 		 * All other filesystems seem to ignore this problem, so we
610 		 * do the same here.
611 		 * A potential solution could be implemented as follows:
612 		 * - using LK_NOWAIT when locking the second vnode and retrying
613 		 *   if necessary
614 		 * - checking that the parent-child relationship still holds
615 		 *   after locking both vnodes and retrying if it doesn't
616 		 */
617 		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
618 		return (error);
619 	} else {
620 		error = vn_lock(vp, lkflags);
621 		if (error != 0)
622 			vrele(vp);
623 		return (error);
624 	}
625 }
626 
627 /*
628  * Lookup an entry in a directory, or an extended attribute directory.
629  * If it exists, return a held vnode reference for it.
630  *
631  *	IN:	dvp	- vnode of directory to search.
632  *		nm	- name of entry to lookup.
633  *		pnp	- full pathname to lookup [UNUSED].
634  *		flags	- LOOKUP_XATTR set if looking for an attribute.
635  *		rdir	- root directory vnode [UNUSED].
636  *		cr	- credentials of caller.
637  *		ct	- caller context
638  *
639  *	OUT:	vpp	- vnode of located entry, NULL if not found.
640  *
641  *	RETURN:	0 on success, error code on failure.
642  *
643  * Timestamps:
644  *	NA
645  */
646 static int
647 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
648     struct componentname *cnp, int nameiop, cred_t *cr, int flags,
649     boolean_t cached)
650 {
651 	znode_t *zdp = VTOZ(dvp);
652 	znode_t *zp;
653 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
654 	seqc_t dvp_seqc;
655 	int	error = 0;
656 
657 	/*
658 	 * Fast path lookup, however we must skip DNLC lookup
659 	 * for case folding or normalizing lookups because the
660 	 * DNLC code only stores the passed in name.  This means
661 	 * creating 'a' and removing 'A' on a case insensitive
662 	 * file system would work, but DNLC still thinks 'a'
663 	 * exists and won't let you create it again on the next
664 	 * pass through fast path.
665 	 */
666 	if (!(flags & LOOKUP_XATTR)) {
667 		if (dvp->v_type != VDIR) {
668 			return (SET_ERROR(ENOTDIR));
669 		} else if (zdp->z_sa_hdl == NULL) {
670 			return (SET_ERROR(EIO));
671 		}
672 	}
673 
674 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
675 	    const char *, nm);
676 
677 	if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
678 		return (error);
679 
680 	dvp_seqc = vn_seqc_read_notmodify(dvp);
681 
682 	*vpp = NULL;
683 
684 	if (flags & LOOKUP_XATTR) {
685 		/*
686 		 * If the xattr property is off, refuse the lookup request.
687 		 */
688 		if (!(zfsvfs->z_flags & ZSB_XATTR)) {
689 			zfs_exit(zfsvfs, FTAG);
690 			return (SET_ERROR(EOPNOTSUPP));
691 		}
692 
693 		/*
694 		 * We don't allow recursive attributes..
695 		 * Maybe someday we will.
696 		 */
697 		if (zdp->z_pflags & ZFS_XATTR) {
698 			zfs_exit(zfsvfs, FTAG);
699 			return (SET_ERROR(EINVAL));
700 		}
701 
702 		if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
703 			zfs_exit(zfsvfs, FTAG);
704 			return (error);
705 		}
706 		*vpp = ZTOV(zp);
707 
708 		/*
709 		 * Do we have permission to get into attribute directory?
710 		 */
711 		error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
712 		if (error) {
713 			vrele(ZTOV(zp));
714 		}
715 
716 		zfs_exit(zfsvfs, FTAG);
717 		return (error);
718 	}
719 
720 	/*
721 	 * Check accessibility of directory if we're not coming in via
722 	 * VOP_CACHEDLOOKUP.
723 	 */
724 	if (!cached) {
725 #ifdef NOEXECCHECK
726 		if ((cnp->cn_flags & NOEXECCHECK) != 0) {
727 			cnp->cn_flags &= ~NOEXECCHECK;
728 		} else
729 #endif
730 		if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
731 		    NULL))) {
732 			zfs_exit(zfsvfs, FTAG);
733 			return (error);
734 		}
735 	}
736 
737 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
738 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
739 		zfs_exit(zfsvfs, FTAG);
740 		return (SET_ERROR(EILSEQ));
741 	}
742 
743 
744 	/*
745 	 * First handle the special cases.
746 	 */
747 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
748 		/*
749 		 * If we are a snapshot mounted under .zfs, return
750 		 * the vp for the snapshot directory.
751 		 */
752 		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
753 			struct componentname cn;
754 			vnode_t *zfsctl_vp;
755 			int ltype;
756 
757 			zfs_exit(zfsvfs, FTAG);
758 			ltype = VOP_ISLOCKED(dvp);
759 			VOP_UNLOCK(dvp);
760 			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
761 			    &zfsctl_vp);
762 			if (error == 0) {
763 				cn.cn_nameptr = "snapshot";
764 				cn.cn_namelen = strlen(cn.cn_nameptr);
765 				cn.cn_nameiop = cnp->cn_nameiop;
766 				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
767 				cn.cn_lkflags = cnp->cn_lkflags;
768 				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
769 				vput(zfsctl_vp);
770 			}
771 			vn_lock(dvp, ltype | LK_RETRY);
772 			return (error);
773 		}
774 	}
775 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
776 		zfs_exit(zfsvfs, FTAG);
777 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
778 			return (SET_ERROR(ENOTSUP));
779 		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
780 		return (error);
781 	}
782 
783 	/*
784 	 * The loop is retry the lookup if the parent-child relationship
785 	 * changes during the dot-dot locking complexities.
786 	 */
787 	for (;;) {
788 		uint64_t parent;
789 
790 		error = zfs_dirlook(zdp, nm, &zp);
791 		if (error == 0)
792 			*vpp = ZTOV(zp);
793 
794 		zfs_exit(zfsvfs, FTAG);
795 		if (error != 0)
796 			break;
797 
798 		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
799 		if (error != 0) {
800 			/*
801 			 * If we've got a locking error, then the vnode
802 			 * got reclaimed because of a force unmount.
803 			 * We never enter doomed vnodes into the name cache.
804 			 */
805 			*vpp = NULL;
806 			return (error);
807 		}
808 
809 		if ((cnp->cn_flags & ISDOTDOT) == 0)
810 			break;
811 
812 		if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
813 			vput(ZTOV(zp));
814 			*vpp = NULL;
815 			return (error);
816 		}
817 		if (zdp->z_sa_hdl == NULL) {
818 			error = SET_ERROR(EIO);
819 		} else {
820 			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
821 			    &parent, sizeof (parent));
822 		}
823 		if (error != 0) {
824 			zfs_exit(zfsvfs, FTAG);
825 			vput(ZTOV(zp));
826 			break;
827 		}
828 		if (zp->z_id == parent) {
829 			zfs_exit(zfsvfs, FTAG);
830 			break;
831 		}
832 		vput(ZTOV(zp));
833 	}
834 
835 	if (error != 0)
836 		*vpp = NULL;
837 
838 	/* Translate errors and add SAVENAME when needed. */
839 	if (cnp->cn_flags & ISLASTCN) {
840 		switch (nameiop) {
841 		case CREATE:
842 		case RENAME:
843 			if (error == ENOENT) {
844 				error = EJUSTRETURN;
845 #if __FreeBSD_version < 1400068
846 				cnp->cn_flags |= SAVENAME;
847 #endif
848 				break;
849 			}
850 			zfs_fallthrough;
851 		case DELETE:
852 #if __FreeBSD_version < 1400068
853 			if (error == 0)
854 				cnp->cn_flags |= SAVENAME;
855 #endif
856 			break;
857 		}
858 	}
859 
860 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
861 		/*
862 		 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
863 		 * handle races. In particular different callers may end up
864 		 * with different vnodes and will try to add conflicting
865 		 * entries to the namecache.
866 		 *
867 		 * While finding different result may be acceptable in face
868 		 * of concurrent modification, adding conflicting entries
869 		 * trips over an assert in the namecache.
870 		 *
871 		 * Ultimately let an entry through once everything settles.
872 		 */
873 		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
874 			cnp->cn_flags &= ~MAKEENTRY;
875 		}
876 	}
877 
878 	/* Insert name into cache (as non-existent) if appropriate. */
879 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
880 	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
881 		cache_enter(dvp, NULL, cnp);
882 
883 	/* Insert name into cache if appropriate. */
884 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
885 	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
886 		if (!(cnp->cn_flags & ISLASTCN) ||
887 		    (nameiop != DELETE && nameiop != RENAME)) {
888 			cache_enter(dvp, *vpp, cnp);
889 		}
890 	}
891 
892 	return (error);
893 }
894 
895 /*
896  * Attempt to create a new entry in a directory.  If the entry
897  * already exists, truncate the file if permissible, else return
898  * an error.  Return the vp of the created or trunc'd file.
899  *
900  *	IN:	dvp	- vnode of directory to put new file entry in.
901  *		name	- name of new file entry.
902  *		vap	- attributes of new file.
903  *		excl	- flag indicating exclusive or non-exclusive mode.
904  *		mode	- mode to open file with.
905  *		cr	- credentials of caller.
906  *		flag	- large file flag [UNUSED].
907  *		ct	- caller context
908  *		vsecp	- ACL to be set
909  *		mnt_ns	- Unused on FreeBSD
910  *
911  *	OUT:	vpp	- vnode of created or trunc'd entry.
912  *
913  *	RETURN:	0 on success, error code on failure.
914  *
915  * Timestamps:
916  *	dvp - ctime|mtime updated if new entry created
917  *	 vp - ctime|mtime always, atime if new
918  */
919 int
920 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
921     znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns)
922 {
923 	(void) excl, (void) mode, (void) flag;
924 	znode_t		*zp;
925 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
926 	zilog_t		*zilog;
927 	objset_t	*os;
928 	dmu_tx_t	*tx;
929 	int		error;
930 	uid_t		uid = crgetuid(cr);
931 	gid_t		gid = crgetgid(cr);
932 	uint64_t	projid = ZFS_DEFAULT_PROJID;
933 	zfs_acl_ids_t   acl_ids;
934 	boolean_t	fuid_dirtied;
935 	uint64_t	txtype;
936 #ifdef DEBUG_VFS_LOCKS
937 	vnode_t	*dvp = ZTOV(dzp);
938 #endif
939 
940 	/*
941 	 * If we have an ephemeral id, ACL, or XVATTR then
942 	 * make sure file system is at proper version
943 	 */
944 	if (zfsvfs->z_use_fuids == B_FALSE &&
945 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
946 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
947 		return (SET_ERROR(EINVAL));
948 
949 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
950 		return (error);
951 	os = zfsvfs->z_os;
952 	zilog = zfsvfs->z_log;
953 
954 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
955 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
956 		zfs_exit(zfsvfs, FTAG);
957 		return (SET_ERROR(EILSEQ));
958 	}
959 
960 	if (vap->va_mask & AT_XVATTR) {
961 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
962 		    crgetuid(cr), cr, vap->va_type)) != 0) {
963 			zfs_exit(zfsvfs, FTAG);
964 			return (error);
965 		}
966 	}
967 
968 	*zpp = NULL;
969 
970 	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
971 		vap->va_mode &= ~S_ISVTX;
972 
973 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
974 	if (error) {
975 		zfs_exit(zfsvfs, FTAG);
976 		return (error);
977 	}
978 	ASSERT3P(zp, ==, NULL);
979 
980 	/*
981 	 * Create a new file object and update the directory
982 	 * to reference it.
983 	 */
984 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
985 		goto out;
986 	}
987 
988 	/*
989 	 * We only support the creation of regular files in
990 	 * extended attribute directories.
991 	 */
992 
993 	if ((dzp->z_pflags & ZFS_XATTR) &&
994 	    (vap->va_type != VREG)) {
995 		error = SET_ERROR(EINVAL);
996 		goto out;
997 	}
998 
999 	if ((error = zfs_acl_ids_create(dzp, 0, vap,
1000 	    cr, vsecp, &acl_ids, NULL)) != 0)
1001 		goto out;
1002 
1003 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1004 		projid = zfs_inherit_projid(dzp);
1005 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1006 		zfs_acl_ids_free(&acl_ids);
1007 		error = SET_ERROR(EDQUOT);
1008 		goto out;
1009 	}
1010 
1011 	getnewvnode_reserve();
1012 
1013 	tx = dmu_tx_create(os);
1014 
1015 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1016 	    ZFS_SA_BASE_ATTR_SIZE);
1017 
1018 	fuid_dirtied = zfsvfs->z_fuid_dirty;
1019 	if (fuid_dirtied)
1020 		zfs_fuid_txhold(zfsvfs, tx);
1021 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1022 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1023 	if (!zfsvfs->z_use_sa &&
1024 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1025 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1026 		    0, acl_ids.z_aclp->z_acl_bytes);
1027 	}
1028 	error = dmu_tx_assign(tx, TXG_WAIT);
1029 	if (error) {
1030 		zfs_acl_ids_free(&acl_ids);
1031 		dmu_tx_abort(tx);
1032 		getnewvnode_drop_reserve();
1033 		zfs_exit(zfsvfs, FTAG);
1034 		return (error);
1035 	}
1036 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1037 
1038 	error = zfs_link_create(dzp, name, zp, tx, ZNEW);
1039 	if (error != 0) {
1040 		/*
1041 		 * Since, we failed to add the directory entry for it,
1042 		 * delete the newly created dnode.
1043 		 */
1044 		zfs_znode_delete(zp, tx);
1045 		VOP_UNLOCK(ZTOV(zp));
1046 		zrele(zp);
1047 		zfs_acl_ids_free(&acl_ids);
1048 		dmu_tx_commit(tx);
1049 		getnewvnode_drop_reserve();
1050 		goto out;
1051 	}
1052 
1053 	if (fuid_dirtied)
1054 		zfs_fuid_sync(zfsvfs, tx);
1055 
1056 	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1057 	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1058 	    vsecp, acl_ids.z_fuidp, vap);
1059 	zfs_acl_ids_free(&acl_ids);
1060 	dmu_tx_commit(tx);
1061 
1062 	getnewvnode_drop_reserve();
1063 
1064 out:
1065 	VNCHECKREF(dvp);
1066 	if (error == 0) {
1067 		*zpp = zp;
1068 	}
1069 
1070 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1071 		zil_commit(zilog, 0);
1072 
1073 	zfs_exit(zfsvfs, FTAG);
1074 	return (error);
1075 }
1076 
1077 /*
1078  * Remove an entry from a directory.
1079  *
1080  *	IN:	dvp	- vnode of directory to remove entry from.
1081  *		name	- name of entry to remove.
1082  *		cr	- credentials of caller.
1083  *		ct	- caller context
1084  *		flags	- case flags
1085  *
1086  *	RETURN:	0 on success, error code on failure.
1087  *
1088  * Timestamps:
1089  *	dvp - ctime|mtime
1090  *	 vp - ctime (if nlink > 0)
1091  */
1092 static int
1093 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1094 {
1095 	znode_t		*dzp = VTOZ(dvp);
1096 	znode_t		*zp;
1097 	znode_t		*xzp;
1098 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1099 	zilog_t		*zilog;
1100 	uint64_t	xattr_obj;
1101 	uint64_t	obj = 0;
1102 	dmu_tx_t	*tx;
1103 	boolean_t	unlinked;
1104 	uint64_t	txtype;
1105 	int		error;
1106 
1107 
1108 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1109 		return (error);
1110 	zp = VTOZ(vp);
1111 	if ((error = zfs_verify_zp(zp)) != 0) {
1112 		zfs_exit(zfsvfs, FTAG);
1113 		return (error);
1114 	}
1115 	zilog = zfsvfs->z_log;
1116 
1117 	xattr_obj = 0;
1118 	xzp = NULL;
1119 
1120 	if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1121 		goto out;
1122 	}
1123 
1124 	/*
1125 	 * Need to use rmdir for removing directories.
1126 	 */
1127 	if (vp->v_type == VDIR) {
1128 		error = SET_ERROR(EPERM);
1129 		goto out;
1130 	}
1131 
1132 	vnevent_remove(vp, dvp, name, ct);
1133 
1134 	obj = zp->z_id;
1135 
1136 	/* are there any extended attributes? */
1137 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1138 	    &xattr_obj, sizeof (xattr_obj));
1139 	if (error == 0 && xattr_obj) {
1140 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1141 		ASSERT0(error);
1142 	}
1143 
1144 	/*
1145 	 * We may delete the znode now, or we may put it in the unlinked set;
1146 	 * it depends on whether we're the last link, and on whether there are
1147 	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
1148 	 * allow for either case.
1149 	 */
1150 	tx = dmu_tx_create(zfsvfs->z_os);
1151 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1152 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1153 	zfs_sa_upgrade_txholds(tx, zp);
1154 	zfs_sa_upgrade_txholds(tx, dzp);
1155 
1156 	if (xzp) {
1157 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1158 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1159 	}
1160 
1161 	/* charge as an update -- would be nice not to charge at all */
1162 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1163 
1164 	/*
1165 	 * Mark this transaction as typically resulting in a net free of space
1166 	 */
1167 	dmu_tx_mark_netfree(tx);
1168 
1169 	error = dmu_tx_assign(tx, TXG_WAIT);
1170 	if (error) {
1171 		dmu_tx_abort(tx);
1172 		zfs_exit(zfsvfs, FTAG);
1173 		return (error);
1174 	}
1175 
1176 	/*
1177 	 * Remove the directory entry.
1178 	 */
1179 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1180 
1181 	if (error) {
1182 		dmu_tx_commit(tx);
1183 		goto out;
1184 	}
1185 
1186 	if (unlinked) {
1187 		zfs_unlinked_add(zp, tx);
1188 		vp->v_vflag |= VV_NOSYNC;
1189 	}
1190 	/* XXX check changes to linux vnops */
1191 	txtype = TX_REMOVE;
1192 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1193 
1194 	dmu_tx_commit(tx);
1195 out:
1196 
1197 	if (xzp)
1198 		vrele(ZTOV(xzp));
1199 
1200 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1201 		zil_commit(zilog, 0);
1202 
1203 
1204 	zfs_exit(zfsvfs, FTAG);
1205 	return (error);
1206 }
1207 
1208 
1209 static int
1210 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1211     struct componentname *cnp, int nameiop)
1212 {
1213 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1214 	int error;
1215 
1216 	cnp->cn_nameptr = __DECONST(char *, name);
1217 	cnp->cn_namelen = strlen(name);
1218 	cnp->cn_nameiop = nameiop;
1219 	cnp->cn_flags = ISLASTCN;
1220 #if __FreeBSD_version < 1400068
1221 	cnp->cn_flags |= SAVENAME;
1222 #endif
1223 	cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1224 	cnp->cn_cred = kcred;
1225 #if __FreeBSD_version < 1400037
1226 	cnp->cn_thread = curthread;
1227 #endif
1228 
1229 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1230 		struct vop_lookup_args a;
1231 
1232 		a.a_gen.a_desc = &vop_lookup_desc;
1233 		a.a_dvp = ZTOV(dzp);
1234 		a.a_vpp = vpp;
1235 		a.a_cnp = cnp;
1236 		error = vfs_cache_lookup(&a);
1237 	} else {
1238 		error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1239 		    B_FALSE);
1240 	}
1241 #ifdef ZFS_DEBUG
1242 	if (error) {
1243 		printf("got error %d on name %s on op %d\n", error, name,
1244 		    nameiop);
1245 		kdb_backtrace();
1246 	}
1247 #endif
1248 	return (error);
1249 }
1250 
1251 int
1252 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1253 {
1254 	vnode_t *vp;
1255 	int error;
1256 	struct componentname cn;
1257 
1258 	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1259 		return (error);
1260 
1261 	error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1262 	vput(vp);
1263 	return (error);
1264 }
1265 /*
1266  * Create a new directory and insert it into dvp using the name
1267  * provided.  Return a pointer to the inserted directory.
1268  *
1269  *	IN:	dvp	- vnode of directory to add subdir to.
1270  *		dirname	- name of new directory.
1271  *		vap	- attributes of new directory.
1272  *		cr	- credentials of caller.
1273  *		ct	- caller context
1274  *		flags	- case flags
1275  *		vsecp	- ACL to be set
1276  *		mnt_ns	- Unused on FreeBSD
1277  *
1278  *	OUT:	vpp	- vnode of created directory.
1279  *
1280  *	RETURN:	0 on success, error code on failure.
1281  *
1282  * Timestamps:
1283  *	dvp - ctime|mtime updated
1284  *	 vp - ctime|mtime|atime updated
1285  */
1286 int
1287 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1288     cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1289 {
1290 	(void) flags, (void) vsecp;
1291 	znode_t		*zp;
1292 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1293 	zilog_t		*zilog;
1294 	uint64_t	txtype;
1295 	dmu_tx_t	*tx;
1296 	int		error;
1297 	uid_t		uid = crgetuid(cr);
1298 	gid_t		gid = crgetgid(cr);
1299 	zfs_acl_ids_t   acl_ids;
1300 	boolean_t	fuid_dirtied;
1301 
1302 	ASSERT3U(vap->va_type, ==, VDIR);
1303 
1304 	/*
1305 	 * If we have an ephemeral id, ACL, or XVATTR then
1306 	 * make sure file system is at proper version
1307 	 */
1308 	if (zfsvfs->z_use_fuids == B_FALSE &&
1309 	    ((vap->va_mask & AT_XVATTR) ||
1310 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1311 		return (SET_ERROR(EINVAL));
1312 
1313 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1314 		return (error);
1315 	zilog = zfsvfs->z_log;
1316 
1317 	if (dzp->z_pflags & ZFS_XATTR) {
1318 		zfs_exit(zfsvfs, FTAG);
1319 		return (SET_ERROR(EINVAL));
1320 	}
1321 
1322 	if (zfsvfs->z_utf8 && u8_validate(dirname,
1323 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1324 		zfs_exit(zfsvfs, FTAG);
1325 		return (SET_ERROR(EILSEQ));
1326 	}
1327 
1328 	if (vap->va_mask & AT_XVATTR) {
1329 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1330 		    crgetuid(cr), cr, vap->va_type)) != 0) {
1331 			zfs_exit(zfsvfs, FTAG);
1332 			return (error);
1333 		}
1334 	}
1335 
1336 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1337 	    NULL, &acl_ids, NULL)) != 0) {
1338 		zfs_exit(zfsvfs, FTAG);
1339 		return (error);
1340 	}
1341 
1342 	/*
1343 	 * First make sure the new directory doesn't exist.
1344 	 *
1345 	 * Existence is checked first to make sure we don't return
1346 	 * EACCES instead of EEXIST which can cause some applications
1347 	 * to fail.
1348 	 */
1349 	*zpp = NULL;
1350 
1351 	if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1352 		zfs_acl_ids_free(&acl_ids);
1353 		zfs_exit(zfsvfs, FTAG);
1354 		return (error);
1355 	}
1356 	ASSERT3P(zp, ==, NULL);
1357 
1358 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
1359 	    mnt_ns))) {
1360 		zfs_acl_ids_free(&acl_ids);
1361 		zfs_exit(zfsvfs, FTAG);
1362 		return (error);
1363 	}
1364 
1365 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1366 		zfs_acl_ids_free(&acl_ids);
1367 		zfs_exit(zfsvfs, FTAG);
1368 		return (SET_ERROR(EDQUOT));
1369 	}
1370 
1371 	/*
1372 	 * Add a new entry to the directory.
1373 	 */
1374 	getnewvnode_reserve();
1375 	tx = dmu_tx_create(zfsvfs->z_os);
1376 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1377 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1378 	fuid_dirtied = zfsvfs->z_fuid_dirty;
1379 	if (fuid_dirtied)
1380 		zfs_fuid_txhold(zfsvfs, tx);
1381 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1382 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1383 		    acl_ids.z_aclp->z_acl_bytes);
1384 	}
1385 
1386 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1387 	    ZFS_SA_BASE_ATTR_SIZE);
1388 
1389 	error = dmu_tx_assign(tx, TXG_WAIT);
1390 	if (error) {
1391 		zfs_acl_ids_free(&acl_ids);
1392 		dmu_tx_abort(tx);
1393 		getnewvnode_drop_reserve();
1394 		zfs_exit(zfsvfs, FTAG);
1395 		return (error);
1396 	}
1397 
1398 	/*
1399 	 * Create new node.
1400 	 */
1401 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1402 
1403 	/*
1404 	 * Now put new name in parent dir.
1405 	 */
1406 	error = zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1407 	if (error != 0) {
1408 		zfs_znode_delete(zp, tx);
1409 		VOP_UNLOCK(ZTOV(zp));
1410 		zrele(zp);
1411 		goto out;
1412 	}
1413 
1414 	if (fuid_dirtied)
1415 		zfs_fuid_sync(zfsvfs, tx);
1416 
1417 	*zpp = zp;
1418 
1419 	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1420 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1421 	    acl_ids.z_fuidp, vap);
1422 
1423 out:
1424 	zfs_acl_ids_free(&acl_ids);
1425 
1426 	dmu_tx_commit(tx);
1427 
1428 	getnewvnode_drop_reserve();
1429 
1430 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1431 		zil_commit(zilog, 0);
1432 
1433 	zfs_exit(zfsvfs, FTAG);
1434 	return (error);
1435 }
1436 
1437 /*
1438  * Remove a directory subdir entry.  If the current working
1439  * directory is the same as the subdir to be removed, the
1440  * remove will fail.
1441  *
1442  *	IN:	dvp	- vnode of directory to remove from.
1443  *		name	- name of directory to be removed.
1444  *		cwd	- vnode of current working directory.
1445  *		cr	- credentials of caller.
1446  *		ct	- caller context
1447  *		flags	- case flags
1448  *
1449  *	RETURN:	0 on success, error code on failure.
1450  *
1451  * Timestamps:
1452  *	dvp - ctime|mtime updated
1453  */
1454 static int
1455 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1456 {
1457 	znode_t		*dzp = VTOZ(dvp);
1458 	znode_t		*zp = VTOZ(vp);
1459 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1460 	zilog_t		*zilog;
1461 	dmu_tx_t	*tx;
1462 	int		error;
1463 
1464 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1465 		return (error);
1466 	if ((error = zfs_verify_zp(zp)) != 0) {
1467 		zfs_exit(zfsvfs, FTAG);
1468 		return (error);
1469 	}
1470 	zilog = zfsvfs->z_log;
1471 
1472 
1473 	if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1474 		goto out;
1475 	}
1476 
1477 	if (vp->v_type != VDIR) {
1478 		error = SET_ERROR(ENOTDIR);
1479 		goto out;
1480 	}
1481 
1482 	vnevent_rmdir(vp, dvp, name, ct);
1483 
1484 	tx = dmu_tx_create(zfsvfs->z_os);
1485 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1486 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1487 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1488 	zfs_sa_upgrade_txholds(tx, zp);
1489 	zfs_sa_upgrade_txholds(tx, dzp);
1490 	dmu_tx_mark_netfree(tx);
1491 	error = dmu_tx_assign(tx, TXG_WAIT);
1492 	if (error) {
1493 		dmu_tx_abort(tx);
1494 		zfs_exit(zfsvfs, FTAG);
1495 		return (error);
1496 	}
1497 
1498 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1499 
1500 	if (error == 0) {
1501 		uint64_t txtype = TX_RMDIR;
1502 		zfs_log_remove(zilog, tx, txtype, dzp, name,
1503 		    ZFS_NO_OBJECT, B_FALSE);
1504 	}
1505 
1506 	dmu_tx_commit(tx);
1507 
1508 	if (zfsvfs->z_use_namecache)
1509 		cache_vop_rmdir(dvp, vp);
1510 out:
1511 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1512 		zil_commit(zilog, 0);
1513 
1514 	zfs_exit(zfsvfs, FTAG);
1515 	return (error);
1516 }
1517 
1518 int
1519 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1520 {
1521 	struct componentname cn;
1522 	vnode_t *vp;
1523 	int error;
1524 
1525 	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1526 		return (error);
1527 
1528 	error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1529 	vput(vp);
1530 	return (error);
1531 }
1532 
1533 /*
1534  * Read as many directory entries as will fit into the provided
1535  * buffer from the given directory cursor position (specified in
1536  * the uio structure).
1537  *
1538  *	IN:	vp	- vnode of directory to read.
1539  *		uio	- structure supplying read location, range info,
1540  *			  and return buffer.
1541  *		cr	- credentials of caller.
1542  *		ct	- caller context
1543  *
1544  *	OUT:	uio	- updated offset and range, buffer filled.
1545  *		eofp	- set to true if end-of-file detected.
1546  *		ncookies- number of entries in cookies
1547  *		cookies	- offsets to directory entries
1548  *
1549  *	RETURN:	0 on success, error code on failure.
1550  *
1551  * Timestamps:
1552  *	vp - atime updated
1553  *
1554  * Note that the low 4 bits of the cookie returned by zap is always zero.
1555  * This allows us to use the low range for "special" directory entries:
1556  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
1557  * we use the offset 2 for the '.zfs' directory.
1558  */
1559 static int
1560 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1561     int *ncookies, cookie_t **cookies)
1562 {
1563 	znode_t		*zp = VTOZ(vp);
1564 	iovec_t		*iovp;
1565 	dirent64_t	*odp;
1566 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1567 	objset_t	*os;
1568 	caddr_t		outbuf;
1569 	size_t		bufsize;
1570 	zap_cursor_t	zc;
1571 	zap_attribute_t	zap;
1572 	uint_t		bytes_wanted;
1573 	uint64_t	offset; /* must be unsigned; checks for < 1 */
1574 	uint64_t	parent;
1575 	int		local_eof;
1576 	int		outcount;
1577 	int		error;
1578 	uint8_t		prefetch;
1579 	uint8_t		type;
1580 	int		ncooks;
1581 	cookie_t	*cooks = NULL;
1582 
1583 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1584 		return (error);
1585 
1586 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1587 	    &parent, sizeof (parent))) != 0) {
1588 		zfs_exit(zfsvfs, FTAG);
1589 		return (error);
1590 	}
1591 
1592 	/*
1593 	 * If we are not given an eof variable,
1594 	 * use a local one.
1595 	 */
1596 	if (eofp == NULL)
1597 		eofp = &local_eof;
1598 
1599 	/*
1600 	 * Check for valid iov_len.
1601 	 */
1602 	if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1603 		zfs_exit(zfsvfs, FTAG);
1604 		return (SET_ERROR(EINVAL));
1605 	}
1606 
1607 	/*
1608 	 * Quit if directory has been removed (posix)
1609 	 */
1610 	if ((*eofp = zp->z_unlinked) != 0) {
1611 		zfs_exit(zfsvfs, FTAG);
1612 		return (0);
1613 	}
1614 
1615 	error = 0;
1616 	os = zfsvfs->z_os;
1617 	offset = zfs_uio_offset(uio);
1618 	prefetch = zp->z_zn_prefetch;
1619 
1620 	/*
1621 	 * Initialize the iterator cursor.
1622 	 */
1623 	if (offset <= 3) {
1624 		/*
1625 		 * Start iteration from the beginning of the directory.
1626 		 */
1627 		zap_cursor_init(&zc, os, zp->z_id);
1628 	} else {
1629 		/*
1630 		 * The offset is a serialized cursor.
1631 		 */
1632 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1633 	}
1634 
1635 	/*
1636 	 * Get space to change directory entries into fs independent format.
1637 	 */
1638 	iovp = GET_UIO_STRUCT(uio)->uio_iov;
1639 	bytes_wanted = iovp->iov_len;
1640 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1641 		bufsize = bytes_wanted;
1642 		outbuf = kmem_alloc(bufsize, KM_SLEEP);
1643 		odp = (struct dirent64 *)outbuf;
1644 	} else {
1645 		bufsize = bytes_wanted;
1646 		outbuf = NULL;
1647 		odp = (struct dirent64 *)iovp->iov_base;
1648 	}
1649 
1650 	if (ncookies != NULL) {
1651 		/*
1652 		 * Minimum entry size is dirent size and 1 byte for a file name.
1653 		 */
1654 		ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1655 		    sizeof (((struct dirent *)NULL)->d_name) + 1);
1656 		cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1657 		*cookies = cooks;
1658 		*ncookies = ncooks;
1659 	}
1660 
1661 	/*
1662 	 * Transform to file-system independent format
1663 	 */
1664 	outcount = 0;
1665 	while (outcount < bytes_wanted) {
1666 		ino64_t objnum;
1667 		ushort_t reclen;
1668 		off64_t *next = NULL;
1669 
1670 		/*
1671 		 * Special case `.', `..', and `.zfs'.
1672 		 */
1673 		if (offset == 0) {
1674 			(void) strcpy(zap.za_name, ".");
1675 			zap.za_normalization_conflict = 0;
1676 			objnum = zp->z_id;
1677 			type = DT_DIR;
1678 		} else if (offset == 1) {
1679 			(void) strcpy(zap.za_name, "..");
1680 			zap.za_normalization_conflict = 0;
1681 			objnum = parent;
1682 			type = DT_DIR;
1683 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
1684 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
1685 			zap.za_normalization_conflict = 0;
1686 			objnum = ZFSCTL_INO_ROOT;
1687 			type = DT_DIR;
1688 		} else {
1689 			/*
1690 			 * Grab next entry.
1691 			 */
1692 			if ((error = zap_cursor_retrieve(&zc, &zap))) {
1693 				if ((*eofp = (error == ENOENT)) != 0)
1694 					break;
1695 				else
1696 					goto update;
1697 			}
1698 
1699 			if (zap.za_integer_length != 8 ||
1700 			    zap.za_num_integers != 1) {
1701 				cmn_err(CE_WARN, "zap_readdir: bad directory "
1702 				    "entry, obj = %lld, offset = %lld\n",
1703 				    (u_longlong_t)zp->z_id,
1704 				    (u_longlong_t)offset);
1705 				error = SET_ERROR(ENXIO);
1706 				goto update;
1707 			}
1708 
1709 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
1710 			/*
1711 			 * MacOS X can extract the object type here such as:
1712 			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1713 			 */
1714 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1715 		}
1716 
1717 		reclen = DIRENT64_RECLEN(strlen(zap.za_name));
1718 
1719 		/*
1720 		 * Will this entry fit in the buffer?
1721 		 */
1722 		if (outcount + reclen > bufsize) {
1723 			/*
1724 			 * Did we manage to fit anything in the buffer?
1725 			 */
1726 			if (!outcount) {
1727 				error = SET_ERROR(EINVAL);
1728 				goto update;
1729 			}
1730 			break;
1731 		}
1732 		/*
1733 		 * Add normal entry:
1734 		 */
1735 		odp->d_ino = objnum;
1736 		odp->d_reclen = reclen;
1737 		odp->d_namlen = strlen(zap.za_name);
1738 		/* NOTE: d_off is the offset for the *next* entry. */
1739 		next = &odp->d_off;
1740 		strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
1741 		odp->d_type = type;
1742 		dirent_terminate(odp);
1743 		odp = (dirent64_t *)((intptr_t)odp + reclen);
1744 
1745 		outcount += reclen;
1746 
1747 		ASSERT3S(outcount, <=, bufsize);
1748 
1749 		if (prefetch)
1750 			dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
1751 
1752 		/*
1753 		 * Move to the next entry, fill in the previous offset.
1754 		 */
1755 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1756 			zap_cursor_advance(&zc);
1757 			offset = zap_cursor_serialize(&zc);
1758 		} else {
1759 			offset += 1;
1760 		}
1761 
1762 		/* Fill the offset right after advancing the cursor. */
1763 		if (next != NULL)
1764 			*next = offset;
1765 		if (cooks != NULL) {
1766 			*cooks++ = offset;
1767 			ncooks--;
1768 			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1769 		}
1770 	}
1771 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1772 
1773 	/* Subtract unused cookies */
1774 	if (ncookies != NULL)
1775 		*ncookies -= ncooks;
1776 
1777 	if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1778 		iovp->iov_base += outcount;
1779 		iovp->iov_len -= outcount;
1780 		zfs_uio_resid(uio) -= outcount;
1781 	} else if ((error =
1782 	    zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1783 		/*
1784 		 * Reset the pointer.
1785 		 */
1786 		offset = zfs_uio_offset(uio);
1787 	}
1788 
1789 update:
1790 	zap_cursor_fini(&zc);
1791 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1792 		kmem_free(outbuf, bufsize);
1793 
1794 	if (error == ENOENT)
1795 		error = 0;
1796 
1797 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1798 
1799 	zfs_uio_setoffset(uio, offset);
1800 	zfs_exit(zfsvfs, FTAG);
1801 	if (error != 0 && cookies != NULL) {
1802 		free(*cookies, M_TEMP);
1803 		*cookies = NULL;
1804 		*ncookies = 0;
1805 	}
1806 	return (error);
1807 }
1808 
1809 /*
1810  * Get the requested file attributes and place them in the provided
1811  * vattr structure.
1812  *
1813  *	IN:	vp	- vnode of file.
1814  *		vap	- va_mask identifies requested attributes.
1815  *			  If AT_XVATTR set, then optional attrs are requested
1816  *		flags	- ATTR_NOACLCHECK (CIFS server context)
1817  *		cr	- credentials of caller.
1818  *
1819  *	OUT:	vap	- attribute values.
1820  *
1821  *	RETURN:	0 (always succeeds).
1822  */
1823 static int
1824 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1825 {
1826 	znode_t *zp = VTOZ(vp);
1827 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1828 	int	error = 0;
1829 	uint32_t blksize;
1830 	u_longlong_t nblocks;
1831 	uint64_t mtime[2], ctime[2], crtime[2], rdev;
1832 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
1833 	xoptattr_t *xoap = NULL;
1834 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1835 	sa_bulk_attr_t bulk[4];
1836 	int count = 0;
1837 
1838 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1839 		return (error);
1840 
1841 	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1842 
1843 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1844 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1845 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1846 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1847 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
1848 		    &rdev, 8);
1849 
1850 	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
1851 		zfs_exit(zfsvfs, FTAG);
1852 		return (error);
1853 	}
1854 
1855 	/*
1856 	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
1857 	 * Also, if we are the owner don't bother, since owner should
1858 	 * always be allowed to read basic attributes of file.
1859 	 */
1860 	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
1861 	    (vap->va_uid != crgetuid(cr))) {
1862 		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
1863 		    skipaclchk, cr, NULL))) {
1864 			zfs_exit(zfsvfs, FTAG);
1865 			return (error);
1866 		}
1867 	}
1868 
1869 	/*
1870 	 * Return all attributes.  It's cheaper to provide the answer
1871 	 * than to determine whether we were asked the question.
1872 	 */
1873 
1874 	vap->va_type = IFTOVT(zp->z_mode);
1875 	vap->va_mode = zp->z_mode & ~S_IFMT;
1876 	vn_fsid(vp, vap);
1877 	vap->va_nodeid = zp->z_id;
1878 	vap->va_nlink = zp->z_links;
1879 	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
1880 	    zp->z_links < ZFS_LINK_MAX)
1881 		vap->va_nlink++;
1882 	vap->va_size = zp->z_size;
1883 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1884 		vap->va_rdev = zfs_cmpldev(rdev);
1885 	else
1886 		vap->va_rdev = 0;
1887 	vap->va_gen = zp->z_gen;
1888 	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
1889 	vap->va_filerev = zp->z_seq;
1890 
1891 	/*
1892 	 * Add in any requested optional attributes and the create time.
1893 	 * Also set the corresponding bits in the returned attribute bitmap.
1894 	 */
1895 	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
1896 		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1897 			xoap->xoa_archive =
1898 			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
1899 			XVA_SET_RTN(xvap, XAT_ARCHIVE);
1900 		}
1901 
1902 		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1903 			xoap->xoa_readonly =
1904 			    ((zp->z_pflags & ZFS_READONLY) != 0);
1905 			XVA_SET_RTN(xvap, XAT_READONLY);
1906 		}
1907 
1908 		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1909 			xoap->xoa_system =
1910 			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
1911 			XVA_SET_RTN(xvap, XAT_SYSTEM);
1912 		}
1913 
1914 		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1915 			xoap->xoa_hidden =
1916 			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
1917 			XVA_SET_RTN(xvap, XAT_HIDDEN);
1918 		}
1919 
1920 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1921 			xoap->xoa_nounlink =
1922 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
1923 			XVA_SET_RTN(xvap, XAT_NOUNLINK);
1924 		}
1925 
1926 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1927 			xoap->xoa_immutable =
1928 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
1929 			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1930 		}
1931 
1932 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1933 			xoap->xoa_appendonly =
1934 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
1935 			XVA_SET_RTN(xvap, XAT_APPENDONLY);
1936 		}
1937 
1938 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1939 			xoap->xoa_nodump =
1940 			    ((zp->z_pflags & ZFS_NODUMP) != 0);
1941 			XVA_SET_RTN(xvap, XAT_NODUMP);
1942 		}
1943 
1944 		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1945 			xoap->xoa_opaque =
1946 			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
1947 			XVA_SET_RTN(xvap, XAT_OPAQUE);
1948 		}
1949 
1950 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1951 			xoap->xoa_av_quarantined =
1952 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
1953 			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1954 		}
1955 
1956 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1957 			xoap->xoa_av_modified =
1958 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
1959 			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1960 		}
1961 
1962 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
1963 		    vp->v_type == VREG) {
1964 			zfs_sa_get_scanstamp(zp, xvap);
1965 		}
1966 
1967 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1968 			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
1969 			XVA_SET_RTN(xvap, XAT_REPARSE);
1970 		}
1971 		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
1972 			xoap->xoa_generation = zp->z_gen;
1973 			XVA_SET_RTN(xvap, XAT_GEN);
1974 		}
1975 
1976 		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1977 			xoap->xoa_offline =
1978 			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
1979 			XVA_SET_RTN(xvap, XAT_OFFLINE);
1980 		}
1981 
1982 		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1983 			xoap->xoa_sparse =
1984 			    ((zp->z_pflags & ZFS_SPARSE) != 0);
1985 			XVA_SET_RTN(xvap, XAT_SPARSE);
1986 		}
1987 
1988 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
1989 			xoap->xoa_projinherit =
1990 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
1991 			XVA_SET_RTN(xvap, XAT_PROJINHERIT);
1992 		}
1993 
1994 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
1995 			xoap->xoa_projid = zp->z_projid;
1996 			XVA_SET_RTN(xvap, XAT_PROJID);
1997 		}
1998 	}
1999 
2000 	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2001 	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2002 	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2003 	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2004 
2005 
2006 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2007 	vap->va_blksize = blksize;
2008 	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
2009 
2010 	if (zp->z_blksz == 0) {
2011 		/*
2012 		 * Block size hasn't been set; suggest maximal I/O transfers.
2013 		 */
2014 		vap->va_blksize = zfsvfs->z_max_blksz;
2015 	}
2016 
2017 	zfs_exit(zfsvfs, FTAG);
2018 	return (0);
2019 }
2020 
2021 /*
2022  * Set the file attributes to the values contained in the
2023  * vattr structure.
2024  *
2025  *	IN:	zp	- znode of file to be modified.
2026  *		vap	- new attribute values.
2027  *			  If AT_XVATTR set, then optional attrs are being set
2028  *		flags	- ATTR_UTIME set if non-default time values provided.
2029  *			- ATTR_NOACLCHECK (CIFS context only).
2030  *		cr	- credentials of caller.
2031  *		mnt_ns	- Unused on FreeBSD
2032  *
2033  *	RETURN:	0 on success, error code on failure.
2034  *
2035  * Timestamps:
2036  *	vp - ctime updated, mtime updated if size changed.
2037  */
2038 int
2039 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
2040 {
2041 	vnode_t		*vp = ZTOV(zp);
2042 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2043 	objset_t	*os;
2044 	zilog_t		*zilog;
2045 	dmu_tx_t	*tx;
2046 	vattr_t		oldva;
2047 	xvattr_t	tmpxvattr;
2048 	uint_t		mask = vap->va_mask;
2049 	uint_t		saved_mask = 0;
2050 	uint64_t	saved_mode;
2051 	int		trim_mask = 0;
2052 	uint64_t	new_mode;
2053 	uint64_t	new_uid, new_gid;
2054 	uint64_t	xattr_obj;
2055 	uint64_t	mtime[2], ctime[2];
2056 	uint64_t	projid = ZFS_INVALID_PROJID;
2057 	znode_t		*attrzp;
2058 	int		need_policy = FALSE;
2059 	int		err, err2;
2060 	zfs_fuid_info_t *fuidp = NULL;
2061 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
2062 	xoptattr_t	*xoap;
2063 	zfs_acl_t	*aclp;
2064 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2065 	boolean_t	fuid_dirtied = B_FALSE;
2066 	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
2067 	int		count = 0, xattr_count = 0;
2068 
2069 	if (mask == 0)
2070 		return (0);
2071 
2072 	if (mask & AT_NOSET)
2073 		return (SET_ERROR(EINVAL));
2074 
2075 	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
2076 		return (err);
2077 
2078 	os = zfsvfs->z_os;
2079 	zilog = zfsvfs->z_log;
2080 
2081 	/*
2082 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
2083 	 * that file system is at proper version level
2084 	 */
2085 
2086 	if (zfsvfs->z_use_fuids == B_FALSE &&
2087 	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2088 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2089 	    (mask & AT_XVATTR))) {
2090 		zfs_exit(zfsvfs, FTAG);
2091 		return (SET_ERROR(EINVAL));
2092 	}
2093 
2094 	if (mask & AT_SIZE && vp->v_type == VDIR) {
2095 		zfs_exit(zfsvfs, FTAG);
2096 		return (SET_ERROR(EISDIR));
2097 	}
2098 
2099 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2100 		zfs_exit(zfsvfs, FTAG);
2101 		return (SET_ERROR(EINVAL));
2102 	}
2103 
2104 	/*
2105 	 * If this is an xvattr_t, then get a pointer to the structure of
2106 	 * optional attributes.  If this is NULL, then we have a vattr_t.
2107 	 */
2108 	xoap = xva_getxoptattr(xvap);
2109 
2110 	xva_init(&tmpxvattr);
2111 
2112 	/*
2113 	 * Immutable files can only alter immutable bit and atime
2114 	 */
2115 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2116 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2117 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2118 		zfs_exit(zfsvfs, FTAG);
2119 		return (SET_ERROR(EPERM));
2120 	}
2121 
2122 	/*
2123 	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2124 	 */
2125 
2126 	/*
2127 	 * Verify timestamps doesn't overflow 32 bits.
2128 	 * ZFS can handle large timestamps, but 32bit syscalls can't
2129 	 * handle times greater than 2039.  This check should be removed
2130 	 * once large timestamps are fully supported.
2131 	 */
2132 	if (mask & (AT_ATIME | AT_MTIME)) {
2133 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2134 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2135 			zfs_exit(zfsvfs, FTAG);
2136 			return (SET_ERROR(EOVERFLOW));
2137 		}
2138 	}
2139 	if (xoap != NULL && (mask & AT_XVATTR)) {
2140 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2141 		    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2142 			zfs_exit(zfsvfs, FTAG);
2143 			return (SET_ERROR(EOVERFLOW));
2144 		}
2145 
2146 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2147 			if (!dmu_objset_projectquota_enabled(os) ||
2148 			    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2149 				zfs_exit(zfsvfs, FTAG);
2150 				return (SET_ERROR(EOPNOTSUPP));
2151 			}
2152 
2153 			projid = xoap->xoa_projid;
2154 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
2155 				zfs_exit(zfsvfs, FTAG);
2156 				return (SET_ERROR(EINVAL));
2157 			}
2158 
2159 			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2160 				projid = ZFS_INVALID_PROJID;
2161 			else
2162 				need_policy = TRUE;
2163 		}
2164 
2165 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2166 		    (xoap->xoa_projinherit !=
2167 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2168 		    (!dmu_objset_projectquota_enabled(os) ||
2169 		    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2170 			zfs_exit(zfsvfs, FTAG);
2171 			return (SET_ERROR(EOPNOTSUPP));
2172 		}
2173 	}
2174 
2175 	attrzp = NULL;
2176 	aclp = NULL;
2177 
2178 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2179 		zfs_exit(zfsvfs, FTAG);
2180 		return (SET_ERROR(EROFS));
2181 	}
2182 
2183 	/*
2184 	 * First validate permissions
2185 	 */
2186 
2187 	if (mask & AT_SIZE) {
2188 		/*
2189 		 * XXX - Note, we are not providing any open
2190 		 * mode flags here (like FNDELAY), so we may
2191 		 * block if there are locks present... this
2192 		 * should be addressed in openat().
2193 		 */
2194 		/* XXX - would it be OK to generate a log record here? */
2195 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2196 		if (err) {
2197 			zfs_exit(zfsvfs, FTAG);
2198 			return (err);
2199 		}
2200 	}
2201 
2202 	if (mask & (AT_ATIME|AT_MTIME) ||
2203 	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2204 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2205 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2206 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2207 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2208 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2209 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2210 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2211 		    skipaclchk, cr, mnt_ns);
2212 	}
2213 
2214 	if (mask & (AT_UID|AT_GID)) {
2215 		int	idmask = (mask & (AT_UID|AT_GID));
2216 		int	take_owner;
2217 		int	take_group;
2218 
2219 		/*
2220 		 * NOTE: even if a new mode is being set,
2221 		 * we may clear S_ISUID/S_ISGID bits.
2222 		 */
2223 
2224 		if (!(mask & AT_MODE))
2225 			vap->va_mode = zp->z_mode;
2226 
2227 		/*
2228 		 * Take ownership or chgrp to group we are a member of
2229 		 */
2230 
2231 		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2232 		take_group = (mask & AT_GID) &&
2233 		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
2234 
2235 		/*
2236 		 * If both AT_UID and AT_GID are set then take_owner and
2237 		 * take_group must both be set in order to allow taking
2238 		 * ownership.
2239 		 *
2240 		 * Otherwise, send the check through secpolicy_vnode_setattr()
2241 		 *
2242 		 */
2243 
2244 		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2245 		    ((idmask == AT_UID) && take_owner) ||
2246 		    ((idmask == AT_GID) && take_group)) {
2247 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2248 			    skipaclchk, cr, mnt_ns) == 0) {
2249 				/*
2250 				 * Remove setuid/setgid for non-privileged users
2251 				 */
2252 				secpolicy_setid_clear(vap, vp, cr);
2253 				trim_mask = (mask & (AT_UID|AT_GID));
2254 			} else {
2255 				need_policy =  TRUE;
2256 			}
2257 		} else {
2258 			need_policy =  TRUE;
2259 		}
2260 	}
2261 
2262 	oldva.va_mode = zp->z_mode;
2263 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2264 	if (mask & AT_XVATTR) {
2265 		/*
2266 		 * Update xvattr mask to include only those attributes
2267 		 * that are actually changing.
2268 		 *
2269 		 * the bits will be restored prior to actually setting
2270 		 * the attributes so the caller thinks they were set.
2271 		 */
2272 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2273 			if (xoap->xoa_appendonly !=
2274 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2275 				need_policy = TRUE;
2276 			} else {
2277 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2278 				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2279 			}
2280 		}
2281 
2282 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2283 			if (xoap->xoa_projinherit !=
2284 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2285 				need_policy = TRUE;
2286 			} else {
2287 				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2288 				XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2289 			}
2290 		}
2291 
2292 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2293 			if (xoap->xoa_nounlink !=
2294 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2295 				need_policy = TRUE;
2296 			} else {
2297 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2298 				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2299 			}
2300 		}
2301 
2302 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2303 			if (xoap->xoa_immutable !=
2304 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2305 				need_policy = TRUE;
2306 			} else {
2307 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2308 				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2309 			}
2310 		}
2311 
2312 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2313 			if (xoap->xoa_nodump !=
2314 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2315 				need_policy = TRUE;
2316 			} else {
2317 				XVA_CLR_REQ(xvap, XAT_NODUMP);
2318 				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2319 			}
2320 		}
2321 
2322 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2323 			if (xoap->xoa_av_modified !=
2324 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2325 				need_policy = TRUE;
2326 			} else {
2327 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2328 				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2329 			}
2330 		}
2331 
2332 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2333 			if ((vp->v_type != VREG &&
2334 			    xoap->xoa_av_quarantined) ||
2335 			    xoap->xoa_av_quarantined !=
2336 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2337 				need_policy = TRUE;
2338 			} else {
2339 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2340 				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2341 			}
2342 		}
2343 
2344 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2345 			zfs_exit(zfsvfs, FTAG);
2346 			return (SET_ERROR(EPERM));
2347 		}
2348 
2349 		if (need_policy == FALSE &&
2350 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2351 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2352 			need_policy = TRUE;
2353 		}
2354 	}
2355 
2356 	if (mask & AT_MODE) {
2357 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
2358 		    mnt_ns) == 0) {
2359 			err = secpolicy_setid_setsticky_clear(vp, vap,
2360 			    &oldva, cr);
2361 			if (err) {
2362 				zfs_exit(zfsvfs, FTAG);
2363 				return (err);
2364 			}
2365 			trim_mask |= AT_MODE;
2366 		} else {
2367 			need_policy = TRUE;
2368 		}
2369 	}
2370 
2371 	if (need_policy) {
2372 		/*
2373 		 * If trim_mask is set then take ownership
2374 		 * has been granted or write_acl is present and user
2375 		 * has the ability to modify mode.  In that case remove
2376 		 * UID|GID and or MODE from mask so that
2377 		 * secpolicy_vnode_setattr() doesn't revoke it.
2378 		 */
2379 
2380 		if (trim_mask) {
2381 			saved_mask = vap->va_mask;
2382 			vap->va_mask &= ~trim_mask;
2383 			if (trim_mask & AT_MODE) {
2384 				/*
2385 				 * Save the mode, as secpolicy_vnode_setattr()
2386 				 * will overwrite it with ova.va_mode.
2387 				 */
2388 				saved_mode = vap->va_mode;
2389 			}
2390 		}
2391 		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2392 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2393 		if (err) {
2394 			zfs_exit(zfsvfs, FTAG);
2395 			return (err);
2396 		}
2397 
2398 		if (trim_mask) {
2399 			vap->va_mask |= saved_mask;
2400 			if (trim_mask & AT_MODE) {
2401 				/*
2402 				 * Recover the mode after
2403 				 * secpolicy_vnode_setattr().
2404 				 */
2405 				vap->va_mode = saved_mode;
2406 			}
2407 		}
2408 	}
2409 
2410 	/*
2411 	 * secpolicy_vnode_setattr, or take ownership may have
2412 	 * changed va_mask
2413 	 */
2414 	mask = vap->va_mask;
2415 
2416 	if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2417 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2418 		    &xattr_obj, sizeof (xattr_obj));
2419 
2420 		if (err == 0 && xattr_obj) {
2421 			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2422 			if (err == 0) {
2423 				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2424 				if (err != 0)
2425 					vrele(ZTOV(attrzp));
2426 			}
2427 			if (err)
2428 				goto out2;
2429 		}
2430 		if (mask & AT_UID) {
2431 			new_uid = zfs_fuid_create(zfsvfs,
2432 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2433 			if (new_uid != zp->z_uid &&
2434 			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2435 			    new_uid)) {
2436 				if (attrzp)
2437 					vput(ZTOV(attrzp));
2438 				err = SET_ERROR(EDQUOT);
2439 				goto out2;
2440 			}
2441 		}
2442 
2443 		if (mask & AT_GID) {
2444 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2445 			    cr, ZFS_GROUP, &fuidp);
2446 			if (new_gid != zp->z_gid &&
2447 			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2448 			    new_gid)) {
2449 				if (attrzp)
2450 					vput(ZTOV(attrzp));
2451 				err = SET_ERROR(EDQUOT);
2452 				goto out2;
2453 			}
2454 		}
2455 
2456 		if (projid != ZFS_INVALID_PROJID &&
2457 		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2458 			if (attrzp)
2459 				vput(ZTOV(attrzp));
2460 			err = SET_ERROR(EDQUOT);
2461 			goto out2;
2462 		}
2463 	}
2464 	tx = dmu_tx_create(os);
2465 
2466 	if (mask & AT_MODE) {
2467 		uint64_t pmode = zp->z_mode;
2468 		uint64_t acl_obj;
2469 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2470 
2471 		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2472 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2473 			err = SET_ERROR(EPERM);
2474 			goto out;
2475 		}
2476 
2477 		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2478 			goto out;
2479 
2480 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2481 			/*
2482 			 * Are we upgrading ACL from old V0 format
2483 			 * to V1 format?
2484 			 */
2485 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2486 			    zfs_znode_acl_version(zp) ==
2487 			    ZFS_ACL_VERSION_INITIAL) {
2488 				dmu_tx_hold_free(tx, acl_obj, 0,
2489 				    DMU_OBJECT_END);
2490 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2491 				    0, aclp->z_acl_bytes);
2492 			} else {
2493 				dmu_tx_hold_write(tx, acl_obj, 0,
2494 				    aclp->z_acl_bytes);
2495 			}
2496 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2497 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2498 			    0, aclp->z_acl_bytes);
2499 		}
2500 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2501 	} else {
2502 		if (((mask & AT_XVATTR) &&
2503 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2504 		    (projid != ZFS_INVALID_PROJID &&
2505 		    !(zp->z_pflags & ZFS_PROJID)))
2506 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2507 		else
2508 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2509 	}
2510 
2511 	if (attrzp) {
2512 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2513 	}
2514 
2515 	fuid_dirtied = zfsvfs->z_fuid_dirty;
2516 	if (fuid_dirtied)
2517 		zfs_fuid_txhold(zfsvfs, tx);
2518 
2519 	zfs_sa_upgrade_txholds(tx, zp);
2520 
2521 	err = dmu_tx_assign(tx, TXG_WAIT);
2522 	if (err)
2523 		goto out;
2524 
2525 	count = 0;
2526 	/*
2527 	 * Set each attribute requested.
2528 	 * We group settings according to the locks they need to acquire.
2529 	 *
2530 	 * Note: you cannot set ctime directly, although it will be
2531 	 * updated as a side-effect of calling this function.
2532 	 */
2533 
2534 	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2535 		/*
2536 		 * For the existed object that is upgraded from old system,
2537 		 * its on-disk layout has no slot for the project ID attribute.
2538 		 * But quota accounting logic needs to access related slots by
2539 		 * offset directly. So we need to adjust old objects' layout
2540 		 * to make the project ID to some unified and fixed offset.
2541 		 */
2542 		if (attrzp)
2543 			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2544 		if (err == 0)
2545 			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2546 
2547 		if (unlikely(err == EEXIST))
2548 			err = 0;
2549 		else if (err != 0)
2550 			goto out;
2551 		else
2552 			projid = ZFS_INVALID_PROJID;
2553 	}
2554 
2555 	if (mask & (AT_UID|AT_GID|AT_MODE))
2556 		mutex_enter(&zp->z_acl_lock);
2557 
2558 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2559 	    &zp->z_pflags, sizeof (zp->z_pflags));
2560 
2561 	if (attrzp) {
2562 		if (mask & (AT_UID|AT_GID|AT_MODE))
2563 			mutex_enter(&attrzp->z_acl_lock);
2564 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2565 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2566 		    sizeof (attrzp->z_pflags));
2567 		if (projid != ZFS_INVALID_PROJID) {
2568 			attrzp->z_projid = projid;
2569 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2570 			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2571 			    sizeof (attrzp->z_projid));
2572 		}
2573 	}
2574 
2575 	if (mask & (AT_UID|AT_GID)) {
2576 
2577 		if (mask & AT_UID) {
2578 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2579 			    &new_uid, sizeof (new_uid));
2580 			zp->z_uid = new_uid;
2581 			if (attrzp) {
2582 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2583 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2584 				    sizeof (new_uid));
2585 				attrzp->z_uid = new_uid;
2586 			}
2587 		}
2588 
2589 		if (mask & AT_GID) {
2590 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2591 			    NULL, &new_gid, sizeof (new_gid));
2592 			zp->z_gid = new_gid;
2593 			if (attrzp) {
2594 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2595 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2596 				    sizeof (new_gid));
2597 				attrzp->z_gid = new_gid;
2598 			}
2599 		}
2600 		if (!(mask & AT_MODE)) {
2601 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2602 			    NULL, &new_mode, sizeof (new_mode));
2603 			new_mode = zp->z_mode;
2604 		}
2605 		err = zfs_acl_chown_setattr(zp);
2606 		ASSERT0(err);
2607 		if (attrzp) {
2608 			vn_seqc_write_begin(ZTOV(attrzp));
2609 			err = zfs_acl_chown_setattr(attrzp);
2610 			vn_seqc_write_end(ZTOV(attrzp));
2611 			ASSERT0(err);
2612 		}
2613 	}
2614 
2615 	if (mask & AT_MODE) {
2616 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2617 		    &new_mode, sizeof (new_mode));
2618 		zp->z_mode = new_mode;
2619 		ASSERT3P(aclp, !=, NULL);
2620 		err = zfs_aclset_common(zp, aclp, cr, tx);
2621 		ASSERT0(err);
2622 		if (zp->z_acl_cached)
2623 			zfs_acl_free(zp->z_acl_cached);
2624 		zp->z_acl_cached = aclp;
2625 		aclp = NULL;
2626 	}
2627 
2628 
2629 	if (mask & AT_ATIME) {
2630 		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2631 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2632 		    &zp->z_atime, sizeof (zp->z_atime));
2633 	}
2634 
2635 	if (mask & AT_MTIME) {
2636 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2637 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2638 		    mtime, sizeof (mtime));
2639 	}
2640 
2641 	if (projid != ZFS_INVALID_PROJID) {
2642 		zp->z_projid = projid;
2643 		SA_ADD_BULK_ATTR(bulk, count,
2644 		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2645 		    sizeof (zp->z_projid));
2646 	}
2647 
2648 	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2649 	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2650 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2651 		    NULL, mtime, sizeof (mtime));
2652 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2653 		    &ctime, sizeof (ctime));
2654 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2655 	} else if (mask != 0) {
2656 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2657 		    &ctime, sizeof (ctime));
2658 		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2659 		if (attrzp) {
2660 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2661 			    SA_ZPL_CTIME(zfsvfs), NULL,
2662 			    &ctime, sizeof (ctime));
2663 			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2664 			    mtime, ctime);
2665 		}
2666 	}
2667 
2668 	/*
2669 	 * Do this after setting timestamps to prevent timestamp
2670 	 * update from toggling bit
2671 	 */
2672 
2673 	if (xoap && (mask & AT_XVATTR)) {
2674 
2675 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2676 			xoap->xoa_createtime = vap->va_birthtime;
2677 		/*
2678 		 * restore trimmed off masks
2679 		 * so that return masks can be set for caller.
2680 		 */
2681 
2682 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2683 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
2684 		}
2685 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2686 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
2687 		}
2688 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2689 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2690 		}
2691 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2692 			XVA_SET_REQ(xvap, XAT_NODUMP);
2693 		}
2694 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2695 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2696 		}
2697 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2698 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2699 		}
2700 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2701 			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2702 		}
2703 
2704 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2705 			ASSERT3S(vp->v_type, ==, VREG);
2706 
2707 		zfs_xvattr_set(zp, xvap, tx);
2708 	}
2709 
2710 	if (fuid_dirtied)
2711 		zfs_fuid_sync(zfsvfs, tx);
2712 
2713 	if (mask != 0)
2714 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2715 
2716 	if (mask & (AT_UID|AT_GID|AT_MODE))
2717 		mutex_exit(&zp->z_acl_lock);
2718 
2719 	if (attrzp) {
2720 		if (mask & (AT_UID|AT_GID|AT_MODE))
2721 			mutex_exit(&attrzp->z_acl_lock);
2722 	}
2723 out:
2724 	if (err == 0 && attrzp) {
2725 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2726 		    xattr_count, tx);
2727 		ASSERT0(err2);
2728 	}
2729 
2730 	if (attrzp)
2731 		vput(ZTOV(attrzp));
2732 
2733 	if (aclp)
2734 		zfs_acl_free(aclp);
2735 
2736 	if (fuidp) {
2737 		zfs_fuid_info_free(fuidp);
2738 		fuidp = NULL;
2739 	}
2740 
2741 	if (err) {
2742 		dmu_tx_abort(tx);
2743 	} else {
2744 		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2745 		dmu_tx_commit(tx);
2746 	}
2747 
2748 out2:
2749 	if (os->os_sync == ZFS_SYNC_ALWAYS)
2750 		zil_commit(zilog, 0);
2751 
2752 	zfs_exit(zfsvfs, FTAG);
2753 	return (err);
2754 }
2755 
2756 /*
2757  * Look up the directory entries corresponding to the source and target
2758  * directory/name pairs.
2759  */
2760 static int
2761 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2762     znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2763     znode_t **tzpp)
2764 {
2765 	zfsvfs_t *zfsvfs;
2766 	znode_t *szp, *tzp;
2767 	int error;
2768 
2769 	/*
2770 	 * Before using sdzp and tdzp we must ensure that they are live.
2771 	 * As a porting legacy from illumos we have two things to worry
2772 	 * about.  One is typical for FreeBSD and it is that the vnode is
2773 	 * not reclaimed (doomed).  The other is that the znode is live.
2774 	 * The current code can invalidate the znode without acquiring the
2775 	 * corresponding vnode lock if the object represented by the znode
2776 	 * and vnode is no longer valid after a rollback or receive operation.
2777 	 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
2778 	 * that protects the znodes from the invalidation.
2779 	 */
2780 	zfsvfs = sdzp->z_zfsvfs;
2781 	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2782 	if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
2783 		return (error);
2784 	if ((error = zfs_verify_zp(tdzp)) != 0) {
2785 		zfs_exit(zfsvfs, FTAG);
2786 		return (error);
2787 	}
2788 
2789 	/*
2790 	 * Re-resolve svp to be certain it still exists and fetch the
2791 	 * correct vnode.
2792 	 */
2793 	error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2794 	if (error != 0) {
2795 		/* Source entry invalid or not there. */
2796 		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2797 		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2798 			error = SET_ERROR(EINVAL);
2799 		goto out;
2800 	}
2801 	*szpp = szp;
2802 
2803 	/*
2804 	 * Re-resolve tvp, if it disappeared we just carry on.
2805 	 */
2806 	error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2807 	if (error != 0) {
2808 		vrele(ZTOV(szp));
2809 		if ((tcnp->cn_flags & ISDOTDOT) != 0)
2810 			error = SET_ERROR(EINVAL);
2811 		goto out;
2812 	}
2813 	*tzpp = tzp;
2814 out:
2815 	zfs_exit(zfsvfs, FTAG);
2816 	return (error);
2817 }
2818 
2819 /*
2820  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
2821  * fail to acquire any lock in the path we will drop all held locks,
2822  * acquire the new lock in a blocking fashion, and then release it and
2823  * restart the rename.  This acquire/release step ensures that we do not
2824  * spin on a lock waiting for release.  On error release all vnode locks
2825  * and decrement references the way tmpfs_rename() would do.
2826  */
2827 static int
2828 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2829     struct vnode *tdvp, struct vnode **tvpp,
2830     const struct componentname *scnp, const struct componentname *tcnp)
2831 {
2832 	struct vnode	*nvp, *svp, *tvp;
2833 	znode_t		*sdzp, *tdzp, *szp, *tzp;
2834 	int		error;
2835 
2836 	VOP_UNLOCK(tdvp);
2837 	if (*tvpp != NULL && *tvpp != tdvp)
2838 		VOP_UNLOCK(*tvpp);
2839 
2840 relock:
2841 	error = vn_lock(sdvp, LK_EXCLUSIVE);
2842 	if (error)
2843 		goto out;
2844 	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
2845 	if (error != 0) {
2846 		VOP_UNLOCK(sdvp);
2847 		if (error != EBUSY)
2848 			goto out;
2849 		error = vn_lock(tdvp, LK_EXCLUSIVE);
2850 		if (error)
2851 			goto out;
2852 		VOP_UNLOCK(tdvp);
2853 		goto relock;
2854 	}
2855 	tdzp = VTOZ(tdvp);
2856 	sdzp = VTOZ(sdvp);
2857 
2858 	error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
2859 	if (error != 0) {
2860 		VOP_UNLOCK(sdvp);
2861 		VOP_UNLOCK(tdvp);
2862 		goto out;
2863 	}
2864 	svp = ZTOV(szp);
2865 	tvp = tzp != NULL ? ZTOV(tzp) : NULL;
2866 
2867 	/*
2868 	 * Now try acquire locks on svp and tvp.
2869 	 */
2870 	nvp = svp;
2871 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2872 	if (error != 0) {
2873 		VOP_UNLOCK(sdvp);
2874 		VOP_UNLOCK(tdvp);
2875 		if (tvp != NULL)
2876 			vrele(tvp);
2877 		if (error != EBUSY) {
2878 			vrele(nvp);
2879 			goto out;
2880 		}
2881 		error = vn_lock(nvp, LK_EXCLUSIVE);
2882 		if (error != 0) {
2883 			vrele(nvp);
2884 			goto out;
2885 		}
2886 		VOP_UNLOCK(nvp);
2887 		/*
2888 		 * Concurrent rename race.
2889 		 * XXX ?
2890 		 */
2891 		if (nvp == tdvp) {
2892 			vrele(nvp);
2893 			error = SET_ERROR(EINVAL);
2894 			goto out;
2895 		}
2896 		vrele(*svpp);
2897 		*svpp = nvp;
2898 		goto relock;
2899 	}
2900 	vrele(*svpp);
2901 	*svpp = nvp;
2902 
2903 	if (*tvpp != NULL)
2904 		vrele(*tvpp);
2905 	*tvpp = NULL;
2906 	if (tvp != NULL) {
2907 		nvp = tvp;
2908 		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2909 		if (error != 0) {
2910 			VOP_UNLOCK(sdvp);
2911 			VOP_UNLOCK(tdvp);
2912 			VOP_UNLOCK(*svpp);
2913 			if (error != EBUSY) {
2914 				vrele(nvp);
2915 				goto out;
2916 			}
2917 			error = vn_lock(nvp, LK_EXCLUSIVE);
2918 			if (error != 0) {
2919 				vrele(nvp);
2920 				goto out;
2921 			}
2922 			vput(nvp);
2923 			goto relock;
2924 		}
2925 		*tvpp = nvp;
2926 	}
2927 
2928 	return (0);
2929 
2930 out:
2931 	return (error);
2932 }
2933 
2934 /*
2935  * Note that we must use VRELE_ASYNC in this function as it walks
2936  * up the directory tree and vrele may need to acquire an exclusive
2937  * lock if a last reference to a vnode is dropped.
2938  */
2939 static int
2940 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
2941 {
2942 	zfsvfs_t	*zfsvfs;
2943 	znode_t		*zp, *zp1;
2944 	uint64_t	parent;
2945 	int		error;
2946 
2947 	zfsvfs = tdzp->z_zfsvfs;
2948 	if (tdzp == szp)
2949 		return (SET_ERROR(EINVAL));
2950 	if (tdzp == sdzp)
2951 		return (0);
2952 	if (tdzp->z_id == zfsvfs->z_root)
2953 		return (0);
2954 	zp = tdzp;
2955 	for (;;) {
2956 		ASSERT(!zp->z_unlinked);
2957 		if ((error = sa_lookup(zp->z_sa_hdl,
2958 		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
2959 			break;
2960 
2961 		if (parent == szp->z_id) {
2962 			error = SET_ERROR(EINVAL);
2963 			break;
2964 		}
2965 		if (parent == zfsvfs->z_root)
2966 			break;
2967 		if (parent == sdzp->z_id)
2968 			break;
2969 
2970 		error = zfs_zget(zfsvfs, parent, &zp1);
2971 		if (error != 0)
2972 			break;
2973 
2974 		if (zp != tdzp)
2975 			VN_RELE_ASYNC(ZTOV(zp),
2976 			    dsl_pool_zrele_taskq(
2977 			    dmu_objset_pool(zfsvfs->z_os)));
2978 		zp = zp1;
2979 	}
2980 
2981 	if (error == ENOTDIR)
2982 		panic("checkpath: .. not a directory\n");
2983 	if (zp != tdzp)
2984 		VN_RELE_ASYNC(ZTOV(zp),
2985 		    dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
2986 	return (error);
2987 }
2988 
2989 static int
2990 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
2991     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
2992     cred_t *cr);
2993 
2994 /*
2995  * Move an entry from the provided source directory to the target
2996  * directory.  Change the entry name as indicated.
2997  *
2998  *	IN:	sdvp	- Source directory containing the "old entry".
2999  *		scnp	- Old entry name.
3000  *		tdvp	- Target directory to contain the "new entry".
3001  *		tcnp	- New entry name.
3002  *		cr	- credentials of caller.
3003  *	INOUT:	svpp	- Source file
3004  *		tvpp	- Target file, may point to NULL initially
3005  *
3006  *	RETURN:	0 on success, error code on failure.
3007  *
3008  * Timestamps:
3009  *	sdvp,tdvp - ctime|mtime updated
3010  */
3011 static int
3012 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3013     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3014     cred_t *cr)
3015 {
3016 	int	error;
3017 
3018 	ASSERT_VOP_ELOCKED(tdvp, __func__);
3019 	if (*tvpp != NULL)
3020 		ASSERT_VOP_ELOCKED(*tvpp, __func__);
3021 
3022 	/* Reject renames across filesystems. */
3023 	if ((*svpp)->v_mount != tdvp->v_mount ||
3024 	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3025 		error = SET_ERROR(EXDEV);
3026 		goto out;
3027 	}
3028 
3029 	if (zfsctl_is_node(tdvp)) {
3030 		error = SET_ERROR(EXDEV);
3031 		goto out;
3032 	}
3033 
3034 	/*
3035 	 * Lock all four vnodes to ensure safety and semantics of renaming.
3036 	 */
3037 	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3038 	if (error != 0) {
3039 		/* no vnodes are locked in the case of error here */
3040 		return (error);
3041 	}
3042 
3043 	error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3044 	VOP_UNLOCK(sdvp);
3045 	VOP_UNLOCK(*svpp);
3046 out:
3047 	if (*tvpp != NULL)
3048 		VOP_UNLOCK(*tvpp);
3049 	if (tdvp != *tvpp)
3050 		VOP_UNLOCK(tdvp);
3051 
3052 	return (error);
3053 }
3054 
3055 static int
3056 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3057     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3058     cred_t *cr)
3059 {
3060 	dmu_tx_t	*tx;
3061 	zfsvfs_t	*zfsvfs;
3062 	zilog_t		*zilog;
3063 	znode_t		*tdzp, *sdzp, *tzp, *szp;
3064 	const char	*snm = scnp->cn_nameptr;
3065 	const char	*tnm = tcnp->cn_nameptr;
3066 	int		error;
3067 
3068 	tdzp = VTOZ(tdvp);
3069 	sdzp = VTOZ(sdvp);
3070 	zfsvfs = tdzp->z_zfsvfs;
3071 
3072 	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3073 		return (error);
3074 	if ((error = zfs_verify_zp(sdzp)) != 0) {
3075 		zfs_exit(zfsvfs, FTAG);
3076 		return (error);
3077 	}
3078 	zilog = zfsvfs->z_log;
3079 
3080 	if (zfsvfs->z_utf8 && u8_validate(tnm,
3081 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3082 		error = SET_ERROR(EILSEQ);
3083 		goto out;
3084 	}
3085 
3086 	/* If source and target are the same file, there is nothing to do. */
3087 	if ((*svpp) == (*tvpp)) {
3088 		error = 0;
3089 		goto out;
3090 	}
3091 
3092 	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3093 	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3094 	    (*tvpp)->v_mountedhere != NULL)) {
3095 		error = SET_ERROR(EXDEV);
3096 		goto out;
3097 	}
3098 
3099 	szp = VTOZ(*svpp);
3100 	if ((error = zfs_verify_zp(szp)) != 0) {
3101 		zfs_exit(zfsvfs, FTAG);
3102 		return (error);
3103 	}
3104 	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3105 	if (tzp != NULL) {
3106 		if ((error = zfs_verify_zp(tzp)) != 0) {
3107 			zfs_exit(zfsvfs, FTAG);
3108 			return (error);
3109 		}
3110 	}
3111 
3112 	/*
3113 	 * This is to prevent the creation of links into attribute space
3114 	 * by renaming a linked file into/outof an attribute directory.
3115 	 * See the comment in zfs_link() for why this is considered bad.
3116 	 */
3117 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3118 		error = SET_ERROR(EINVAL);
3119 		goto out;
3120 	}
3121 
3122 	/*
3123 	 * If we are using project inheritance, means if the directory has
3124 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3125 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3126 	 * such case, we only allow renames into our tree when the project
3127 	 * IDs are the same.
3128 	 */
3129 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3130 	    tdzp->z_projid != szp->z_projid) {
3131 		error = SET_ERROR(EXDEV);
3132 		goto out;
3133 	}
3134 
3135 	/*
3136 	 * Must have write access at the source to remove the old entry
3137 	 * and write access at the target to create the new entry.
3138 	 * Note that if target and source are the same, this can be
3139 	 * done in a single check.
3140 	 */
3141 	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
3142 		goto out;
3143 
3144 	if ((*svpp)->v_type == VDIR) {
3145 		/*
3146 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
3147 		 */
3148 		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3149 		    sdzp == szp ||
3150 		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3151 			error = EINVAL;
3152 			goto out;
3153 		}
3154 
3155 		/*
3156 		 * Check to make sure rename is valid.
3157 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3158 		 */
3159 		if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3160 			goto out;
3161 	}
3162 
3163 	/*
3164 	 * Does target exist?
3165 	 */
3166 	if (tzp) {
3167 		/*
3168 		 * Source and target must be the same type.
3169 		 */
3170 		if ((*svpp)->v_type == VDIR) {
3171 			if ((*tvpp)->v_type != VDIR) {
3172 				error = SET_ERROR(ENOTDIR);
3173 				goto out;
3174 			} else {
3175 				cache_purge(tdvp);
3176 				if (sdvp != tdvp)
3177 					cache_purge(sdvp);
3178 			}
3179 		} else {
3180 			if ((*tvpp)->v_type == VDIR) {
3181 				error = SET_ERROR(EISDIR);
3182 				goto out;
3183 			}
3184 		}
3185 	}
3186 
3187 	vn_seqc_write_begin(*svpp);
3188 	vn_seqc_write_begin(sdvp);
3189 	if (*tvpp != NULL)
3190 		vn_seqc_write_begin(*tvpp);
3191 	if (tdvp != *tvpp)
3192 		vn_seqc_write_begin(tdvp);
3193 
3194 	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3195 	if (tzp)
3196 		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3197 
3198 	/*
3199 	 * notify the target directory if it is not the same
3200 	 * as source directory.
3201 	 */
3202 	if (tdvp != sdvp) {
3203 		vnevent_rename_dest_dir(tdvp, ct);
3204 	}
3205 
3206 	tx = dmu_tx_create(zfsvfs->z_os);
3207 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3208 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3209 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3210 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3211 	if (sdzp != tdzp) {
3212 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3213 		zfs_sa_upgrade_txholds(tx, tdzp);
3214 	}
3215 	if (tzp) {
3216 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3217 		zfs_sa_upgrade_txholds(tx, tzp);
3218 	}
3219 
3220 	zfs_sa_upgrade_txholds(tx, szp);
3221 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3222 	error = dmu_tx_assign(tx, TXG_WAIT);
3223 	if (error) {
3224 		dmu_tx_abort(tx);
3225 		goto out_seq;
3226 	}
3227 
3228 	if (tzp)	/* Attempt to remove the existing target */
3229 		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3230 
3231 	if (error == 0) {
3232 		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3233 		if (error == 0) {
3234 			szp->z_pflags |= ZFS_AV_MODIFIED;
3235 
3236 			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3237 			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3238 			ASSERT0(error);
3239 
3240 			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3241 			    NULL);
3242 			if (error == 0) {
3243 				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3244 				    snm, tdzp, tnm, szp);
3245 			} else {
3246 				/*
3247 				 * At this point, we have successfully created
3248 				 * the target name, but have failed to remove
3249 				 * the source name.  Since the create was done
3250 				 * with the ZRENAMING flag, there are
3251 				 * complications; for one, the link count is
3252 				 * wrong.  The easiest way to deal with this
3253 				 * is to remove the newly created target, and
3254 				 * return the original error.  This must
3255 				 * succeed; fortunately, it is very unlikely to
3256 				 * fail, since we just created it.
3257 				 */
3258 				VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3259 				    ZRENAMING, NULL));
3260 			}
3261 		}
3262 		if (error == 0) {
3263 			cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3264 		}
3265 	}
3266 
3267 	dmu_tx_commit(tx);
3268 
3269 out_seq:
3270 	vn_seqc_write_end(*svpp);
3271 	vn_seqc_write_end(sdvp);
3272 	if (*tvpp != NULL)
3273 		vn_seqc_write_end(*tvpp);
3274 	if (tdvp != *tvpp)
3275 		vn_seqc_write_end(tdvp);
3276 
3277 out:
3278 	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3279 		zil_commit(zilog, 0);
3280 	zfs_exit(zfsvfs, FTAG);
3281 
3282 	return (error);
3283 }
3284 
3285 int
3286 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3287     cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
3288 {
3289 	struct componentname scn, tcn;
3290 	vnode_t *sdvp, *tdvp;
3291 	vnode_t *svp, *tvp;
3292 	int error;
3293 	svp = tvp = NULL;
3294 
3295 	if (rflags != 0 || wo_vap != NULL)
3296 		return (SET_ERROR(EINVAL));
3297 
3298 	sdvp = ZTOV(sdzp);
3299 	tdvp = ZTOV(tdzp);
3300 	error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3301 	if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3302 		VOP_UNLOCK(sdvp);
3303 	if (error != 0)
3304 		goto fail;
3305 	VOP_UNLOCK(svp);
3306 
3307 	vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3308 	error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3309 	if (error == EJUSTRETURN)
3310 		tvp = NULL;
3311 	else if (error != 0) {
3312 		VOP_UNLOCK(tdvp);
3313 		goto fail;
3314 	}
3315 
3316 	error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3317 fail:
3318 	if (svp != NULL)
3319 		vrele(svp);
3320 	if (tvp != NULL)
3321 		vrele(tvp);
3322 
3323 	return (error);
3324 }
3325 
3326 /*
3327  * Insert the indicated symbolic reference entry into the directory.
3328  *
3329  *	IN:	dvp	- Directory to contain new symbolic link.
3330  *		link	- Name for new symlink entry.
3331  *		vap	- Attributes of new entry.
3332  *		cr	- credentials of caller.
3333  *		ct	- caller context
3334  *		flags	- case flags
3335  *		mnt_ns	- Unused on FreeBSD
3336  *
3337  *	RETURN:	0 on success, error code on failure.
3338  *
3339  * Timestamps:
3340  *	dvp - ctime|mtime updated
3341  */
3342 int
3343 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3344     const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
3345 {
3346 	(void) flags;
3347 	znode_t		*zp;
3348 	dmu_tx_t	*tx;
3349 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
3350 	zilog_t		*zilog;
3351 	uint64_t	len = strlen(link);
3352 	int		error;
3353 	zfs_acl_ids_t	acl_ids;
3354 	boolean_t	fuid_dirtied;
3355 	uint64_t	txtype = TX_SYMLINK;
3356 
3357 	ASSERT3S(vap->va_type, ==, VLNK);
3358 
3359 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
3360 		return (error);
3361 	zilog = zfsvfs->z_log;
3362 
3363 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3364 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3365 		zfs_exit(zfsvfs, FTAG);
3366 		return (SET_ERROR(EILSEQ));
3367 	}
3368 
3369 	if (len > MAXPATHLEN) {
3370 		zfs_exit(zfsvfs, FTAG);
3371 		return (SET_ERROR(ENAMETOOLONG));
3372 	}
3373 
3374 	if ((error = zfs_acl_ids_create(dzp, 0,
3375 	    vap, cr, NULL, &acl_ids, NULL)) != 0) {
3376 		zfs_exit(zfsvfs, FTAG);
3377 		return (error);
3378 	}
3379 
3380 	/*
3381 	 * Attempt to lock directory; fail if entry already exists.
3382 	 */
3383 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3384 	if (error) {
3385 		zfs_acl_ids_free(&acl_ids);
3386 		zfs_exit(zfsvfs, FTAG);
3387 		return (error);
3388 	}
3389 
3390 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
3391 		zfs_acl_ids_free(&acl_ids);
3392 		zfs_exit(zfsvfs, FTAG);
3393 		return (error);
3394 	}
3395 
3396 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3397 	    0 /* projid */)) {
3398 		zfs_acl_ids_free(&acl_ids);
3399 		zfs_exit(zfsvfs, FTAG);
3400 		return (SET_ERROR(EDQUOT));
3401 	}
3402 
3403 	getnewvnode_reserve();
3404 	tx = dmu_tx_create(zfsvfs->z_os);
3405 	fuid_dirtied = zfsvfs->z_fuid_dirty;
3406 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3407 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3408 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3409 	    ZFS_SA_BASE_ATTR_SIZE + len);
3410 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3411 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3412 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3413 		    acl_ids.z_aclp->z_acl_bytes);
3414 	}
3415 	if (fuid_dirtied)
3416 		zfs_fuid_txhold(zfsvfs, tx);
3417 	error = dmu_tx_assign(tx, TXG_WAIT);
3418 	if (error) {
3419 		zfs_acl_ids_free(&acl_ids);
3420 		dmu_tx_abort(tx);
3421 		getnewvnode_drop_reserve();
3422 		zfs_exit(zfsvfs, FTAG);
3423 		return (error);
3424 	}
3425 
3426 	/*
3427 	 * Create a new object for the symlink.
3428 	 * for version 4 ZPL datasets the symlink will be an SA attribute
3429 	 */
3430 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3431 
3432 	if (fuid_dirtied)
3433 		zfs_fuid_sync(zfsvfs, tx);
3434 
3435 	if (zp->z_is_sa)
3436 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3437 		    __DECONST(void *, link), len, tx);
3438 	else
3439 		zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3440 
3441 	zp->z_size = len;
3442 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3443 	    &zp->z_size, sizeof (zp->z_size), tx);
3444 	/*
3445 	 * Insert the new object into the directory.
3446 	 */
3447 	error = zfs_link_create(dzp, name, zp, tx, ZNEW);
3448 	if (error != 0) {
3449 		zfs_znode_delete(zp, tx);
3450 		VOP_UNLOCK(ZTOV(zp));
3451 		zrele(zp);
3452 	} else {
3453 		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3454 	}
3455 
3456 	zfs_acl_ids_free(&acl_ids);
3457 
3458 	dmu_tx_commit(tx);
3459 
3460 	getnewvnode_drop_reserve();
3461 
3462 	if (error == 0) {
3463 		*zpp = zp;
3464 
3465 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3466 			zil_commit(zilog, 0);
3467 	}
3468 
3469 	zfs_exit(zfsvfs, FTAG);
3470 	return (error);
3471 }
3472 
3473 /*
3474  * Return, in the buffer contained in the provided uio structure,
3475  * the symbolic path referred to by vp.
3476  *
3477  *	IN:	vp	- vnode of symbolic link.
3478  *		uio	- structure to contain the link path.
3479  *		cr	- credentials of caller.
3480  *		ct	- caller context
3481  *
3482  *	OUT:	uio	- structure containing the link path.
3483  *
3484  *	RETURN:	0 on success, error code on failure.
3485  *
3486  * Timestamps:
3487  *	vp - atime updated
3488  */
3489 static int
3490 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3491 {
3492 	(void) cr, (void) ct;
3493 	znode_t		*zp = VTOZ(vp);
3494 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
3495 	int		error;
3496 
3497 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3498 		return (error);
3499 
3500 	if (zp->z_is_sa)
3501 		error = sa_lookup_uio(zp->z_sa_hdl,
3502 		    SA_ZPL_SYMLINK(zfsvfs), uio);
3503 	else
3504 		error = zfs_sa_readlink(zp, uio);
3505 
3506 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3507 
3508 	zfs_exit(zfsvfs, FTAG);
3509 	return (error);
3510 }
3511 
3512 /*
3513  * Insert a new entry into directory tdvp referencing svp.
3514  *
3515  *	IN:	tdvp	- Directory to contain new entry.
3516  *		svp	- vnode of new entry.
3517  *		name	- name of new entry.
3518  *		cr	- credentials of caller.
3519  *
3520  *	RETURN:	0 on success, error code on failure.
3521  *
3522  * Timestamps:
3523  *	tdvp - ctime|mtime updated
3524  *	 svp - ctime updated
3525  */
3526 int
3527 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3528     int flags)
3529 {
3530 	(void) flags;
3531 	znode_t		*tzp;
3532 	zfsvfs_t	*zfsvfs = tdzp->z_zfsvfs;
3533 	zilog_t		*zilog;
3534 	dmu_tx_t	*tx;
3535 	int		error;
3536 	uint64_t	parent;
3537 	uid_t		owner;
3538 
3539 	ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3540 
3541 	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3542 		return (error);
3543 	zilog = zfsvfs->z_log;
3544 
3545 	/*
3546 	 * POSIX dictates that we return EPERM here.
3547 	 * Better choices include ENOTSUP or EISDIR.
3548 	 */
3549 	if (ZTOV(szp)->v_type == VDIR) {
3550 		zfs_exit(zfsvfs, FTAG);
3551 		return (SET_ERROR(EPERM));
3552 	}
3553 
3554 	if ((error = zfs_verify_zp(szp)) != 0) {
3555 		zfs_exit(zfsvfs, FTAG);
3556 		return (error);
3557 	}
3558 
3559 	/*
3560 	 * If we are using project inheritance, means if the directory has
3561 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3562 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3563 	 * such case, we only allow hard link creation in our tree when the
3564 	 * project IDs are the same.
3565 	 */
3566 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3567 	    tdzp->z_projid != szp->z_projid) {
3568 		zfs_exit(zfsvfs, FTAG);
3569 		return (SET_ERROR(EXDEV));
3570 	}
3571 
3572 	if (szp->z_pflags & (ZFS_APPENDONLY |
3573 	    ZFS_IMMUTABLE | ZFS_READONLY)) {
3574 		zfs_exit(zfsvfs, FTAG);
3575 		return (SET_ERROR(EPERM));
3576 	}
3577 
3578 	/* Prevent links to .zfs/shares files */
3579 
3580 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3581 	    &parent, sizeof (uint64_t))) != 0) {
3582 		zfs_exit(zfsvfs, FTAG);
3583 		return (error);
3584 	}
3585 	if (parent == zfsvfs->z_shares_dir) {
3586 		zfs_exit(zfsvfs, FTAG);
3587 		return (SET_ERROR(EPERM));
3588 	}
3589 
3590 	if (zfsvfs->z_utf8 && u8_validate(name,
3591 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3592 		zfs_exit(zfsvfs, FTAG);
3593 		return (SET_ERROR(EILSEQ));
3594 	}
3595 
3596 	/*
3597 	 * We do not support links between attributes and non-attributes
3598 	 * because of the potential security risk of creating links
3599 	 * into "normal" file space in order to circumvent restrictions
3600 	 * imposed in attribute space.
3601 	 */
3602 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3603 		zfs_exit(zfsvfs, FTAG);
3604 		return (SET_ERROR(EINVAL));
3605 	}
3606 
3607 
3608 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3609 	if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3610 		zfs_exit(zfsvfs, FTAG);
3611 		return (SET_ERROR(EPERM));
3612 	}
3613 
3614 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
3615 		zfs_exit(zfsvfs, FTAG);
3616 		return (error);
3617 	}
3618 
3619 	/*
3620 	 * Attempt to lock directory; fail if entry already exists.
3621 	 */
3622 	error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3623 	if (error) {
3624 		zfs_exit(zfsvfs, FTAG);
3625 		return (error);
3626 	}
3627 
3628 	tx = dmu_tx_create(zfsvfs->z_os);
3629 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3630 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3631 	zfs_sa_upgrade_txholds(tx, szp);
3632 	zfs_sa_upgrade_txholds(tx, tdzp);
3633 	error = dmu_tx_assign(tx, TXG_WAIT);
3634 	if (error) {
3635 		dmu_tx_abort(tx);
3636 		zfs_exit(zfsvfs, FTAG);
3637 		return (error);
3638 	}
3639 
3640 	error = zfs_link_create(tdzp, name, szp, tx, 0);
3641 
3642 	if (error == 0) {
3643 		uint64_t txtype = TX_LINK;
3644 		zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3645 	}
3646 
3647 	dmu_tx_commit(tx);
3648 
3649 	if (error == 0) {
3650 		vnevent_link(ZTOV(szp), ct);
3651 	}
3652 
3653 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3654 		zil_commit(zilog, 0);
3655 
3656 	zfs_exit(zfsvfs, FTAG);
3657 	return (error);
3658 }
3659 
3660 /*
3661  * Free or allocate space in a file.  Currently, this function only
3662  * supports the `F_FREESP' command.  However, this command is somewhat
3663  * misnamed, as its functionality includes the ability to allocate as
3664  * well as free space.
3665  *
3666  *	IN:	ip	- inode of file to free data in.
3667  *		cmd	- action to take (only F_FREESP supported).
3668  *		bfp	- section of file to free/alloc.
3669  *		flag	- current file open mode flags.
3670  *		offset	- current file offset.
3671  *		cr	- credentials of caller.
3672  *
3673  *	RETURN:	0 on success, error code on failure.
3674  *
3675  * Timestamps:
3676  *	ip - ctime|mtime updated
3677  */
3678 int
3679 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3680     offset_t offset, cred_t *cr)
3681 {
3682 	(void) offset;
3683 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
3684 	uint64_t	off, len;
3685 	int		error;
3686 
3687 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3688 		return (error);
3689 
3690 	if (cmd != F_FREESP) {
3691 		zfs_exit(zfsvfs, FTAG);
3692 		return (SET_ERROR(EINVAL));
3693 	}
3694 
3695 	/*
3696 	 * Callers might not be able to detect properly that we are read-only,
3697 	 * so check it explicitly here.
3698 	 */
3699 	if (zfs_is_readonly(zfsvfs)) {
3700 		zfs_exit(zfsvfs, FTAG);
3701 		return (SET_ERROR(EROFS));
3702 	}
3703 
3704 	if (bfp->l_len < 0) {
3705 		zfs_exit(zfsvfs, FTAG);
3706 		return (SET_ERROR(EINVAL));
3707 	}
3708 
3709 	/*
3710 	 * Permissions aren't checked on Solaris because on this OS
3711 	 * zfs_space() can only be called with an opened file handle.
3712 	 * On Linux we can get here through truncate_range() which
3713 	 * operates directly on inodes, so we need to check access rights.
3714 	 */
3715 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
3716 		zfs_exit(zfsvfs, FTAG);
3717 		return (error);
3718 	}
3719 
3720 	off = bfp->l_start;
3721 	len = bfp->l_len; /* 0 means from off to end of file */
3722 
3723 	error = zfs_freesp(zp, off, len, flag, TRUE);
3724 
3725 	zfs_exit(zfsvfs, FTAG);
3726 	return (error);
3727 }
3728 
3729 static void
3730 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3731 {
3732 	(void) cr, (void) ct;
3733 	znode_t	*zp = VTOZ(vp);
3734 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3735 	int error;
3736 
3737 	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3738 	if (zp->z_sa_hdl == NULL) {
3739 		/*
3740 		 * The fs has been unmounted, or we did a
3741 		 * suspend/resume and this file no longer exists.
3742 		 */
3743 		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3744 		vrecycle(vp);
3745 		return;
3746 	}
3747 
3748 	if (zp->z_unlinked) {
3749 		/*
3750 		 * Fast path to recycle a vnode of a removed file.
3751 		 */
3752 		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3753 		vrecycle(vp);
3754 		return;
3755 	}
3756 
3757 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3758 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3759 
3760 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3761 		zfs_sa_upgrade_txholds(tx, zp);
3762 		error = dmu_tx_assign(tx, TXG_WAIT);
3763 		if (error) {
3764 			dmu_tx_abort(tx);
3765 		} else {
3766 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3767 			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3768 			zp->z_atime_dirty = 0;
3769 			dmu_tx_commit(tx);
3770 		}
3771 	}
3772 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3773 }
3774 
3775 
3776 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
3777 	"struct zfid_short bigger than struct fid");
3778 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
3779 	"struct zfid_long bigger than struct fid");
3780 
3781 static int
3782 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3783 {
3784 	(void) ct;
3785 	znode_t		*zp = VTOZ(vp);
3786 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
3787 	uint32_t	gen;
3788 	uint64_t	gen64;
3789 	uint64_t	object = zp->z_id;
3790 	zfid_short_t	*zfid;
3791 	int		size, i, error;
3792 
3793 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3794 		return (error);
3795 
3796 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3797 	    &gen64, sizeof (uint64_t))) != 0) {
3798 		zfs_exit(zfsvfs, FTAG);
3799 		return (error);
3800 	}
3801 
3802 	gen = (uint32_t)gen64;
3803 
3804 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3805 	fidp->fid_len = size;
3806 
3807 	zfid = (zfid_short_t *)fidp;
3808 
3809 	zfid->zf_len = size;
3810 
3811 	for (i = 0; i < sizeof (zfid->zf_object); i++)
3812 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3813 
3814 	/* Must have a non-zero generation number to distinguish from .zfs */
3815 	if (gen == 0)
3816 		gen = 1;
3817 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
3818 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3819 
3820 	if (size == LONG_FID_LEN) {
3821 		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
3822 		zfid_long_t	*zlfid;
3823 
3824 		zlfid = (zfid_long_t *)fidp;
3825 
3826 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3827 			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3828 
3829 		/* XXX - this should be the generation number for the objset */
3830 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3831 			zlfid->zf_setgen[i] = 0;
3832 	}
3833 
3834 	zfs_exit(zfsvfs, FTAG);
3835 	return (0);
3836 }
3837 
3838 static int
3839 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3840     caller_context_t *ct)
3841 {
3842 	znode_t *zp;
3843 	zfsvfs_t *zfsvfs;
3844 	int error;
3845 
3846 	switch (cmd) {
3847 	case _PC_LINK_MAX:
3848 		*valp = MIN(LONG_MAX, ZFS_LINK_MAX);
3849 		return (0);
3850 
3851 	case _PC_FILESIZEBITS:
3852 		*valp = 64;
3853 		return (0);
3854 	case _PC_MIN_HOLE_SIZE:
3855 		*valp = (int)SPA_MINBLOCKSIZE;
3856 		return (0);
3857 	case _PC_ACL_EXTENDED:
3858 #if 0		/* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
3859 		zp = VTOZ(vp);
3860 		zfsvfs = zp->z_zfsvfs;
3861 		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3862 			return (error);
3863 		*valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
3864 		zfs_exit(zfsvfs, FTAG);
3865 #else
3866 		*valp = 0;
3867 #endif
3868 		return (0);
3869 
3870 	case _PC_ACL_NFS4:
3871 		zp = VTOZ(vp);
3872 		zfsvfs = zp->z_zfsvfs;
3873 		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3874 			return (error);
3875 		*valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
3876 		zfs_exit(zfsvfs, FTAG);
3877 		return (0);
3878 
3879 	case _PC_ACL_PATH_MAX:
3880 		*valp = ACL_MAX_ENTRIES;
3881 		return (0);
3882 
3883 	default:
3884 		return (EOPNOTSUPP);
3885 	}
3886 }
3887 
3888 static int
3889 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
3890     int *rahead)
3891 {
3892 	znode_t *zp = VTOZ(vp);
3893 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3894 	zfs_locked_range_t *lr;
3895 	vm_object_t object;
3896 	off_t start, end, obj_size;
3897 	uint_t blksz;
3898 	int pgsin_b, pgsin_a;
3899 	int error;
3900 
3901 	if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
3902 		return (zfs_vm_pagerret_error);
3903 
3904 	start = IDX_TO_OFF(ma[0]->pindex);
3905 	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
3906 
3907 	/*
3908 	 * Lock a range covering all required and optional pages.
3909 	 * Note that we need to handle the case of the block size growing.
3910 	 */
3911 	for (;;) {
3912 		blksz = zp->z_blksz;
3913 		lr = zfs_rangelock_tryenter(&zp->z_rangelock,
3914 		    rounddown(start, blksz),
3915 		    roundup(end, blksz) - rounddown(start, blksz), RL_READER);
3916 		if (lr == NULL) {
3917 			if (rahead != NULL) {
3918 				*rahead = 0;
3919 				rahead = NULL;
3920 			}
3921 			if (rbehind != NULL) {
3922 				*rbehind = 0;
3923 				rbehind = NULL;
3924 			}
3925 			break;
3926 		}
3927 		if (blksz == zp->z_blksz)
3928 			break;
3929 		zfs_rangelock_exit(lr);
3930 	}
3931 
3932 	object = ma[0]->object;
3933 	zfs_vmobject_wlock(object);
3934 	obj_size = object->un_pager.vnp.vnp_size;
3935 	zfs_vmobject_wunlock(object);
3936 	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
3937 		if (lr != NULL)
3938 			zfs_rangelock_exit(lr);
3939 		zfs_exit(zfsvfs, FTAG);
3940 		return (zfs_vm_pagerret_bad);
3941 	}
3942 
3943 	pgsin_b = 0;
3944 	if (rbehind != NULL) {
3945 		pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
3946 		pgsin_b = MIN(*rbehind, pgsin_b);
3947 	}
3948 
3949 	pgsin_a = 0;
3950 	if (rahead != NULL) {
3951 		pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
3952 		if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
3953 			pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
3954 		pgsin_a = MIN(*rahead, pgsin_a);
3955 	}
3956 
3957 	/*
3958 	 * NB: we need to pass the exact byte size of the data that we expect
3959 	 * to read after accounting for the file size.  This is required because
3960 	 * ZFS will panic if we request DMU to read beyond the end of the last
3961 	 * allocated block.
3962 	 */
3963 	error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
3964 	    &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
3965 
3966 	if (lr != NULL)
3967 		zfs_rangelock_exit(lr);
3968 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3969 
3970 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
3971 
3972 	zfs_exit(zfsvfs, FTAG);
3973 
3974 	if (error != 0)
3975 		return (zfs_vm_pagerret_error);
3976 
3977 	VM_CNT_INC(v_vnodein);
3978 	VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
3979 	if (rbehind != NULL)
3980 		*rbehind = pgsin_b;
3981 	if (rahead != NULL)
3982 		*rahead = pgsin_a;
3983 	return (zfs_vm_pagerret_ok);
3984 }
3985 
3986 #ifndef _SYS_SYSPROTO_H_
3987 struct vop_getpages_args {
3988 	struct vnode *a_vp;
3989 	vm_page_t *a_m;
3990 	int a_count;
3991 	int *a_rbehind;
3992 	int *a_rahead;
3993 };
3994 #endif
3995 
3996 static int
3997 zfs_freebsd_getpages(struct vop_getpages_args *ap)
3998 {
3999 
4000 	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4001 	    ap->a_rahead));
4002 }
4003 
4004 static int
4005 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4006     int *rtvals)
4007 {
4008 	znode_t		*zp = VTOZ(vp);
4009 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4010 	zfs_locked_range_t		*lr;
4011 	dmu_tx_t	*tx;
4012 	struct sf_buf	*sf;
4013 	vm_object_t	object;
4014 	vm_page_t	m;
4015 	caddr_t		va;
4016 	size_t		tocopy;
4017 	size_t		lo_len;
4018 	vm_ooffset_t	lo_off;
4019 	vm_ooffset_t	off;
4020 	uint_t		blksz;
4021 	int		ncount;
4022 	int		pcount;
4023 	int		err;
4024 	int		i;
4025 
4026 	object = vp->v_object;
4027 	KASSERT(ma[0]->object == object, ("mismatching object"));
4028 	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4029 
4030 	pcount = btoc(len);
4031 	ncount = pcount;
4032 	for (i = 0; i < pcount; i++)
4033 		rtvals[i] = zfs_vm_pagerret_error;
4034 
4035 	if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4036 		return (zfs_vm_pagerret_error);
4037 
4038 	off = IDX_TO_OFF(ma[0]->pindex);
4039 	blksz = zp->z_blksz;
4040 	lo_off = rounddown(off, blksz);
4041 	lo_len = roundup(len + (off - lo_off), blksz);
4042 	lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4043 
4044 	zfs_vmobject_wlock(object);
4045 	if (len + off > object->un_pager.vnp.vnp_size) {
4046 		if (object->un_pager.vnp.vnp_size > off) {
4047 			int pgoff;
4048 
4049 			len = object->un_pager.vnp.vnp_size - off;
4050 			ncount = btoc(len);
4051 			if ((pgoff = (int)len & PAGE_MASK) != 0) {
4052 				/*
4053 				 * If the object is locked and the following
4054 				 * conditions hold, then the page's dirty
4055 				 * field cannot be concurrently changed by a
4056 				 * pmap operation.
4057 				 */
4058 				m = ma[ncount - 1];
4059 				vm_page_assert_sbusied(m);
4060 				KASSERT(!pmap_page_is_write_mapped(m),
4061 				    ("zfs_putpages: page %p is not read-only",
4062 				    m));
4063 				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4064 				    pgoff);
4065 			}
4066 		} else {
4067 			len = 0;
4068 			ncount = 0;
4069 		}
4070 		if (ncount < pcount) {
4071 			for (i = ncount; i < pcount; i++) {
4072 				rtvals[i] = zfs_vm_pagerret_bad;
4073 			}
4074 		}
4075 	}
4076 	zfs_vmobject_wunlock(object);
4077 
4078 	boolean_t commit = (flags & (zfs_vm_pagerput_sync |
4079 	    zfs_vm_pagerput_inval)) != 0 ||
4080 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
4081 
4082 	if (ncount == 0)
4083 		goto out;
4084 
4085 	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4086 	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4087 	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
4088 	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4089 	    zp->z_projid))) {
4090 		goto out;
4091 	}
4092 
4093 	tx = dmu_tx_create(zfsvfs->z_os);
4094 	dmu_tx_hold_write(tx, zp->z_id, off, len);
4095 
4096 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4097 	zfs_sa_upgrade_txholds(tx, zp);
4098 	err = dmu_tx_assign(tx, TXG_WAIT);
4099 	if (err != 0) {
4100 		dmu_tx_abort(tx);
4101 		goto out;
4102 	}
4103 
4104 	if (zp->z_blksz < PAGE_SIZE) {
4105 		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4106 			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4107 			va = zfs_map_page(ma[i], &sf);
4108 			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4109 			zfs_unmap_page(sf);
4110 		}
4111 	} else {
4112 		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4113 	}
4114 
4115 	if (err == 0) {
4116 		uint64_t mtime[2], ctime[2];
4117 		sa_bulk_attr_t bulk[3];
4118 		int count = 0;
4119 
4120 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4121 		    &mtime, 16);
4122 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4123 		    &ctime, 16);
4124 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4125 		    &zp->z_pflags, 8);
4126 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4127 		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4128 		ASSERT0(err);
4129 		/*
4130 		 * XXX we should be passing a callback to undirty
4131 		 * but that would make the locking messier
4132 		 */
4133 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4134 		    len, commit, NULL, NULL);
4135 
4136 		zfs_vmobject_wlock(object);
4137 		for (i = 0; i < ncount; i++) {
4138 			rtvals[i] = zfs_vm_pagerret_ok;
4139 			vm_page_undirty(ma[i]);
4140 		}
4141 		zfs_vmobject_wunlock(object);
4142 		VM_CNT_INC(v_vnodeout);
4143 		VM_CNT_ADD(v_vnodepgsout, ncount);
4144 	}
4145 	dmu_tx_commit(tx);
4146 
4147 out:
4148 	zfs_rangelock_exit(lr);
4149 	if (commit)
4150 		zil_commit(zfsvfs->z_log, zp->z_id);
4151 
4152 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4153 
4154 	zfs_exit(zfsvfs, FTAG);
4155 	return (rtvals[0]);
4156 }
4157 
4158 #ifndef _SYS_SYSPROTO_H_
4159 struct vop_putpages_args {
4160 	struct vnode *a_vp;
4161 	vm_page_t *a_m;
4162 	int a_count;
4163 	int a_sync;
4164 	int *a_rtvals;
4165 };
4166 #endif
4167 
4168 static int
4169 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4170 {
4171 
4172 	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4173 	    ap->a_rtvals));
4174 }
4175 
4176 #ifndef _SYS_SYSPROTO_H_
4177 struct vop_bmap_args {
4178 	struct vnode *a_vp;
4179 	daddr_t  a_bn;
4180 	struct bufobj **a_bop;
4181 	daddr_t *a_bnp;
4182 	int *a_runp;
4183 	int *a_runb;
4184 };
4185 #endif
4186 
4187 static int
4188 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4189 {
4190 
4191 	if (ap->a_bop != NULL)
4192 		*ap->a_bop = &ap->a_vp->v_bufobj;
4193 	if (ap->a_bnp != NULL)
4194 		*ap->a_bnp = ap->a_bn;
4195 	if (ap->a_runp != NULL)
4196 		*ap->a_runp = 0;
4197 	if (ap->a_runb != NULL)
4198 		*ap->a_runb = 0;
4199 
4200 	return (0);
4201 }
4202 
4203 #ifndef _SYS_SYSPROTO_H_
4204 struct vop_open_args {
4205 	struct vnode *a_vp;
4206 	int a_mode;
4207 	struct ucred *a_cred;
4208 	struct thread *a_td;
4209 };
4210 #endif
4211 
4212 static int
4213 zfs_freebsd_open(struct vop_open_args *ap)
4214 {
4215 	vnode_t	*vp = ap->a_vp;
4216 	znode_t *zp = VTOZ(vp);
4217 	int error;
4218 
4219 	error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4220 	if (error == 0)
4221 		vnode_create_vobject(vp, zp->z_size, ap->a_td);
4222 	return (error);
4223 }
4224 
4225 #ifndef _SYS_SYSPROTO_H_
4226 struct vop_close_args {
4227 	struct vnode *a_vp;
4228 	int  a_fflag;
4229 	struct ucred *a_cred;
4230 	struct thread *a_td;
4231 };
4232 #endif
4233 
4234 static int
4235 zfs_freebsd_close(struct vop_close_args *ap)
4236 {
4237 
4238 	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4239 }
4240 
4241 #ifndef _SYS_SYSPROTO_H_
4242 struct vop_ioctl_args {
4243 	struct vnode *a_vp;
4244 	ulong_t a_command;
4245 	caddr_t a_data;
4246 	int a_fflag;
4247 	struct ucred *cred;
4248 	struct thread *td;
4249 };
4250 #endif
4251 
4252 static int
4253 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4254 {
4255 
4256 	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4257 	    ap->a_fflag, ap->a_cred, NULL));
4258 }
4259 
4260 static int
4261 ioflags(int ioflags)
4262 {
4263 	int flags = 0;
4264 
4265 	if (ioflags & IO_APPEND)
4266 		flags |= O_APPEND;
4267 	if (ioflags & IO_NDELAY)
4268 		flags |= O_NONBLOCK;
4269 	if (ioflags & IO_SYNC)
4270 		flags |= O_SYNC;
4271 
4272 	return (flags);
4273 }
4274 
4275 #ifndef _SYS_SYSPROTO_H_
4276 struct vop_read_args {
4277 	struct vnode *a_vp;
4278 	struct uio *a_uio;
4279 	int a_ioflag;
4280 	struct ucred *a_cred;
4281 };
4282 #endif
4283 
4284 static int
4285 zfs_freebsd_read(struct vop_read_args *ap)
4286 {
4287 	zfs_uio_t uio;
4288 	zfs_uio_init(&uio, ap->a_uio);
4289 	return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4290 	    ap->a_cred));
4291 }
4292 
4293 #ifndef _SYS_SYSPROTO_H_
4294 struct vop_write_args {
4295 	struct vnode *a_vp;
4296 	struct uio *a_uio;
4297 	int a_ioflag;
4298 	struct ucred *a_cred;
4299 };
4300 #endif
4301 
4302 static int
4303 zfs_freebsd_write(struct vop_write_args *ap)
4304 {
4305 	zfs_uio_t uio;
4306 	zfs_uio_init(&uio, ap->a_uio);
4307 	return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4308 	    ap->a_cred));
4309 }
4310 
4311 /*
4312  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4313  * the comment above cache_fplookup for details.
4314  */
4315 static int
4316 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4317 {
4318 	vnode_t *vp;
4319 	znode_t *zp;
4320 	uint64_t pflags;
4321 
4322 	vp = v->a_vp;
4323 	zp = VTOZ_SMR(vp);
4324 	if (__predict_false(zp == NULL))
4325 		return (EAGAIN);
4326 	pflags = atomic_load_64(&zp->z_pflags);
4327 	if (pflags & ZFS_AV_QUARANTINED)
4328 		return (EAGAIN);
4329 	if (pflags & ZFS_XATTR)
4330 		return (EAGAIN);
4331 	if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4332 		return (EAGAIN);
4333 	return (0);
4334 }
4335 
4336 static int
4337 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4338 {
4339 	vnode_t *vp;
4340 	znode_t *zp;
4341 	char *target;
4342 
4343 	vp = v->a_vp;
4344 	zp = VTOZ_SMR(vp);
4345 	if (__predict_false(zp == NULL)) {
4346 		return (EAGAIN);
4347 	}
4348 
4349 	target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4350 	if (target == NULL) {
4351 		return (EAGAIN);
4352 	}
4353 	return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4354 }
4355 
4356 #ifndef _SYS_SYSPROTO_H_
4357 struct vop_access_args {
4358 	struct vnode *a_vp;
4359 	accmode_t a_accmode;
4360 	struct ucred *a_cred;
4361 	struct thread *a_td;
4362 };
4363 #endif
4364 
4365 static int
4366 zfs_freebsd_access(struct vop_access_args *ap)
4367 {
4368 	vnode_t *vp = ap->a_vp;
4369 	znode_t *zp = VTOZ(vp);
4370 	accmode_t accmode;
4371 	int error = 0;
4372 
4373 
4374 	if (ap->a_accmode == VEXEC) {
4375 		if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4376 			return (0);
4377 	}
4378 
4379 	/*
4380 	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4381 	 */
4382 	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4383 	if (accmode != 0)
4384 		error = zfs_access(zp, accmode, 0, ap->a_cred);
4385 
4386 	/*
4387 	 * VADMIN has to be handled by vaccess().
4388 	 */
4389 	if (error == 0) {
4390 		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4391 		if (accmode != 0) {
4392 			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4393 			    zp->z_gid, accmode, ap->a_cred);
4394 		}
4395 	}
4396 
4397 	/*
4398 	 * For VEXEC, ensure that at least one execute bit is set for
4399 	 * non-directories.
4400 	 */
4401 	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4402 	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4403 		error = EACCES;
4404 	}
4405 
4406 	return (error);
4407 }
4408 
4409 #ifndef _SYS_SYSPROTO_H_
4410 struct vop_lookup_args {
4411 	struct vnode *a_dvp;
4412 	struct vnode **a_vpp;
4413 	struct componentname *a_cnp;
4414 };
4415 #endif
4416 
4417 static int
4418 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4419 {
4420 	struct componentname *cnp = ap->a_cnp;
4421 	char nm[NAME_MAX + 1];
4422 
4423 	ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4424 	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4425 
4426 	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4427 	    cnp->cn_cred, 0, cached));
4428 }
4429 
4430 static int
4431 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4432 {
4433 
4434 	return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4435 }
4436 
4437 #ifndef _SYS_SYSPROTO_H_
4438 struct vop_lookup_args {
4439 	struct vnode *a_dvp;
4440 	struct vnode **a_vpp;
4441 	struct componentname *a_cnp;
4442 };
4443 #endif
4444 
4445 static int
4446 zfs_cache_lookup(struct vop_lookup_args *ap)
4447 {
4448 	zfsvfs_t *zfsvfs;
4449 
4450 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4451 	if (zfsvfs->z_use_namecache)
4452 		return (vfs_cache_lookup(ap));
4453 	else
4454 		return (zfs_freebsd_lookup(ap, B_FALSE));
4455 }
4456 
4457 #ifndef _SYS_SYSPROTO_H_
4458 struct vop_create_args {
4459 	struct vnode *a_dvp;
4460 	struct vnode **a_vpp;
4461 	struct componentname *a_cnp;
4462 	struct vattr *a_vap;
4463 };
4464 #endif
4465 
4466 static int
4467 zfs_freebsd_create(struct vop_create_args *ap)
4468 {
4469 	zfsvfs_t *zfsvfs;
4470 	struct componentname *cnp = ap->a_cnp;
4471 	vattr_t *vap = ap->a_vap;
4472 	znode_t *zp = NULL;
4473 	int rc, mode;
4474 
4475 #if __FreeBSD_version < 1400068
4476 	ASSERT(cnp->cn_flags & SAVENAME);
4477 #endif
4478 
4479 	vattr_init_mask(vap);
4480 	mode = vap->va_mode & ALLPERMS;
4481 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4482 	*ap->a_vpp = NULL;
4483 
4484 	rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
4485 	    &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
4486 	if (rc == 0)
4487 		*ap->a_vpp = ZTOV(zp);
4488 	if (zfsvfs->z_use_namecache &&
4489 	    rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4490 		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4491 
4492 	return (rc);
4493 }
4494 
4495 #ifndef _SYS_SYSPROTO_H_
4496 struct vop_remove_args {
4497 	struct vnode *a_dvp;
4498 	struct vnode *a_vp;
4499 	struct componentname *a_cnp;
4500 };
4501 #endif
4502 
4503 static int
4504 zfs_freebsd_remove(struct vop_remove_args *ap)
4505 {
4506 
4507 #if __FreeBSD_version < 1400068
4508 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4509 #endif
4510 
4511 	return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4512 	    ap->a_cnp->cn_cred));
4513 }
4514 
4515 #ifndef _SYS_SYSPROTO_H_
4516 struct vop_mkdir_args {
4517 	struct vnode *a_dvp;
4518 	struct vnode **a_vpp;
4519 	struct componentname *a_cnp;
4520 	struct vattr *a_vap;
4521 };
4522 #endif
4523 
4524 static int
4525 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4526 {
4527 	vattr_t *vap = ap->a_vap;
4528 	znode_t *zp = NULL;
4529 	int rc;
4530 
4531 #if __FreeBSD_version < 1400068
4532 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4533 #endif
4534 
4535 	vattr_init_mask(vap);
4536 	*ap->a_vpp = NULL;
4537 
4538 	rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4539 	    ap->a_cnp->cn_cred, 0, NULL, NULL);
4540 
4541 	if (rc == 0)
4542 		*ap->a_vpp = ZTOV(zp);
4543 	return (rc);
4544 }
4545 
4546 #ifndef _SYS_SYSPROTO_H_
4547 struct vop_rmdir_args {
4548 	struct vnode *a_dvp;
4549 	struct vnode *a_vp;
4550 	struct componentname *a_cnp;
4551 };
4552 #endif
4553 
4554 static int
4555 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4556 {
4557 	struct componentname *cnp = ap->a_cnp;
4558 
4559 #if __FreeBSD_version < 1400068
4560 	ASSERT(cnp->cn_flags & SAVENAME);
4561 #endif
4562 
4563 	return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4564 }
4565 
4566 #ifndef _SYS_SYSPROTO_H_
4567 struct vop_readdir_args {
4568 	struct vnode *a_vp;
4569 	struct uio *a_uio;
4570 	struct ucred *a_cred;
4571 	int *a_eofflag;
4572 	int *a_ncookies;
4573 	cookie_t **a_cookies;
4574 };
4575 #endif
4576 
4577 static int
4578 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4579 {
4580 	zfs_uio_t uio;
4581 	zfs_uio_init(&uio, ap->a_uio);
4582 	return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4583 	    ap->a_ncookies, ap->a_cookies));
4584 }
4585 
4586 #ifndef _SYS_SYSPROTO_H_
4587 struct vop_fsync_args {
4588 	struct vnode *a_vp;
4589 	int a_waitfor;
4590 	struct thread *a_td;
4591 };
4592 #endif
4593 
4594 static int
4595 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4596 {
4597 
4598 	return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4599 }
4600 
4601 #ifndef _SYS_SYSPROTO_H_
4602 struct vop_getattr_args {
4603 	struct vnode *a_vp;
4604 	struct vattr *a_vap;
4605 	struct ucred *a_cred;
4606 };
4607 #endif
4608 
4609 static int
4610 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4611 {
4612 	vattr_t *vap = ap->a_vap;
4613 	xvattr_t xvap;
4614 	ulong_t fflags = 0;
4615 	int error;
4616 
4617 	xva_init(&xvap);
4618 	xvap.xva_vattr = *vap;
4619 	xvap.xva_vattr.va_mask |= AT_XVATTR;
4620 
4621 	/* Convert chflags into ZFS-type flags. */
4622 	/* XXX: what about SF_SETTABLE?. */
4623 	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4624 	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4625 	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4626 	XVA_SET_REQ(&xvap, XAT_NODUMP);
4627 	XVA_SET_REQ(&xvap, XAT_READONLY);
4628 	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4629 	XVA_SET_REQ(&xvap, XAT_SYSTEM);
4630 	XVA_SET_REQ(&xvap, XAT_HIDDEN);
4631 	XVA_SET_REQ(&xvap, XAT_REPARSE);
4632 	XVA_SET_REQ(&xvap, XAT_OFFLINE);
4633 	XVA_SET_REQ(&xvap, XAT_SPARSE);
4634 
4635 	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4636 	if (error != 0)
4637 		return (error);
4638 
4639 	/* Convert ZFS xattr into chflags. */
4640 #define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
4641 	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
4642 		fflags |= (fflag);					\
4643 } while (0)
4644 	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4645 	    xvap.xva_xoptattrs.xoa_immutable);
4646 	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4647 	    xvap.xva_xoptattrs.xoa_appendonly);
4648 	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4649 	    xvap.xva_xoptattrs.xoa_nounlink);
4650 	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4651 	    xvap.xva_xoptattrs.xoa_archive);
4652 	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4653 	    xvap.xva_xoptattrs.xoa_nodump);
4654 	FLAG_CHECK(UF_READONLY, XAT_READONLY,
4655 	    xvap.xva_xoptattrs.xoa_readonly);
4656 	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4657 	    xvap.xva_xoptattrs.xoa_system);
4658 	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4659 	    xvap.xva_xoptattrs.xoa_hidden);
4660 	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4661 	    xvap.xva_xoptattrs.xoa_reparse);
4662 	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4663 	    xvap.xva_xoptattrs.xoa_offline);
4664 	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4665 	    xvap.xva_xoptattrs.xoa_sparse);
4666 
4667 #undef	FLAG_CHECK
4668 	*vap = xvap.xva_vattr;
4669 	vap->va_flags = fflags;
4670 	return (0);
4671 }
4672 
4673 #ifndef _SYS_SYSPROTO_H_
4674 struct vop_setattr_args {
4675 	struct vnode *a_vp;
4676 	struct vattr *a_vap;
4677 	struct ucred *a_cred;
4678 };
4679 #endif
4680 
4681 static int
4682 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4683 {
4684 	vnode_t *vp = ap->a_vp;
4685 	vattr_t *vap = ap->a_vap;
4686 	cred_t *cred = ap->a_cred;
4687 	xvattr_t xvap;
4688 	ulong_t fflags;
4689 	uint64_t zflags;
4690 
4691 	vattr_init_mask(vap);
4692 	vap->va_mask &= ~AT_NOSET;
4693 
4694 	xva_init(&xvap);
4695 	xvap.xva_vattr = *vap;
4696 
4697 	zflags = VTOZ(vp)->z_pflags;
4698 
4699 	if (vap->va_flags != VNOVAL) {
4700 		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4701 		int error;
4702 
4703 		if (zfsvfs->z_use_fuids == B_FALSE)
4704 			return (EOPNOTSUPP);
4705 
4706 		fflags = vap->va_flags;
4707 		/*
4708 		 * XXX KDM
4709 		 * We need to figure out whether it makes sense to allow
4710 		 * UF_REPARSE through, since we don't really have other
4711 		 * facilities to handle reparse points and zfs_setattr()
4712 		 * doesn't currently allow setting that attribute anyway.
4713 		 */
4714 		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4715 		    UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4716 		    UF_OFFLINE|UF_SPARSE)) != 0)
4717 			return (EOPNOTSUPP);
4718 		/*
4719 		 * Unprivileged processes are not permitted to unset system
4720 		 * flags, or modify flags if any system flags are set.
4721 		 * Privileged non-jail processes may not modify system flags
4722 		 * if securelevel > 0 and any existing system flags are set.
4723 		 * Privileged jail processes behave like privileged non-jail
4724 		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4725 		 * otherwise, they behave like unprivileged processes.
4726 		 */
4727 		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4728 		    priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4729 			if (zflags &
4730 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4731 				error = securelevel_gt(cred, 0);
4732 				if (error != 0)
4733 					return (error);
4734 			}
4735 		} else {
4736 			/*
4737 			 * Callers may only modify the file flags on
4738 			 * objects they have VADMIN rights for.
4739 			 */
4740 			if ((error = VOP_ACCESS(vp, VADMIN, cred,
4741 			    curthread)) != 0)
4742 				return (error);
4743 			if (zflags &
4744 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4745 			    ZFS_NOUNLINK)) {
4746 				return (EPERM);
4747 			}
4748 			if (fflags &
4749 			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4750 				return (EPERM);
4751 			}
4752 		}
4753 
4754 #define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
4755 	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
4756 	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
4757 		XVA_SET_REQ(&xvap, (xflag));				\
4758 		(xfield) = ((fflags & (fflag)) != 0);			\
4759 	}								\
4760 } while (0)
4761 		/* Convert chflags into ZFS-type flags. */
4762 		/* XXX: what about SF_SETTABLE?. */
4763 		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4764 		    xvap.xva_xoptattrs.xoa_immutable);
4765 		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4766 		    xvap.xva_xoptattrs.xoa_appendonly);
4767 		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4768 		    xvap.xva_xoptattrs.xoa_nounlink);
4769 		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4770 		    xvap.xva_xoptattrs.xoa_archive);
4771 		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4772 		    xvap.xva_xoptattrs.xoa_nodump);
4773 		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4774 		    xvap.xva_xoptattrs.xoa_readonly);
4775 		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4776 		    xvap.xva_xoptattrs.xoa_system);
4777 		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4778 		    xvap.xva_xoptattrs.xoa_hidden);
4779 		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4780 		    xvap.xva_xoptattrs.xoa_reparse);
4781 		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4782 		    xvap.xva_xoptattrs.xoa_offline);
4783 		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4784 		    xvap.xva_xoptattrs.xoa_sparse);
4785 #undef	FLAG_CHANGE
4786 	}
4787 	if (vap->va_birthtime.tv_sec != VNOVAL) {
4788 		xvap.xva_vattr.va_mask |= AT_XVATTR;
4789 		XVA_SET_REQ(&xvap, XAT_CREATETIME);
4790 	}
4791 	return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
4792 }
4793 
4794 #ifndef _SYS_SYSPROTO_H_
4795 struct vop_rename_args {
4796 	struct vnode *a_fdvp;
4797 	struct vnode *a_fvp;
4798 	struct componentname *a_fcnp;
4799 	struct vnode *a_tdvp;
4800 	struct vnode *a_tvp;
4801 	struct componentname *a_tcnp;
4802 };
4803 #endif
4804 
4805 static int
4806 zfs_freebsd_rename(struct vop_rename_args *ap)
4807 {
4808 	vnode_t *fdvp = ap->a_fdvp;
4809 	vnode_t *fvp = ap->a_fvp;
4810 	vnode_t *tdvp = ap->a_tdvp;
4811 	vnode_t *tvp = ap->a_tvp;
4812 	int error;
4813 
4814 #if __FreeBSD_version < 1400068
4815 	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4816 	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4817 #endif
4818 
4819 	error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4820 	    ap->a_tcnp, ap->a_fcnp->cn_cred);
4821 
4822 	vrele(fdvp);
4823 	vrele(fvp);
4824 	vrele(tdvp);
4825 	if (tvp != NULL)
4826 		vrele(tvp);
4827 
4828 	return (error);
4829 }
4830 
4831 #ifndef _SYS_SYSPROTO_H_
4832 struct vop_symlink_args {
4833 	struct vnode *a_dvp;
4834 	struct vnode **a_vpp;
4835 	struct componentname *a_cnp;
4836 	struct vattr *a_vap;
4837 	char *a_target;
4838 };
4839 #endif
4840 
4841 static int
4842 zfs_freebsd_symlink(struct vop_symlink_args *ap)
4843 {
4844 	struct componentname *cnp = ap->a_cnp;
4845 	vattr_t *vap = ap->a_vap;
4846 	znode_t *zp = NULL;
4847 	char *symlink;
4848 	size_t symlink_len;
4849 	int rc;
4850 
4851 #if __FreeBSD_version < 1400068
4852 	ASSERT(cnp->cn_flags & SAVENAME);
4853 #endif
4854 
4855 	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
4856 	vattr_init_mask(vap);
4857 	*ap->a_vpp = NULL;
4858 
4859 	rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
4860 	    ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
4861 	if (rc == 0) {
4862 		*ap->a_vpp = ZTOV(zp);
4863 		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
4864 		MPASS(zp->z_cached_symlink == NULL);
4865 		symlink_len = strlen(ap->a_target);
4866 		symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4867 		if (symlink != NULL) {
4868 			memcpy(symlink, ap->a_target, symlink_len);
4869 			symlink[symlink_len] = '\0';
4870 			atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4871 			    (uintptr_t)symlink);
4872 		}
4873 	}
4874 	return (rc);
4875 }
4876 
4877 #ifndef _SYS_SYSPROTO_H_
4878 struct vop_readlink_args {
4879 	struct vnode *a_vp;
4880 	struct uio *a_uio;
4881 	struct ucred *a_cred;
4882 };
4883 #endif
4884 
4885 static int
4886 zfs_freebsd_readlink(struct vop_readlink_args *ap)
4887 {
4888 	zfs_uio_t uio;
4889 	int error;
4890 	znode_t	*zp = VTOZ(ap->a_vp);
4891 	char *symlink, *base;
4892 	size_t symlink_len;
4893 	bool trycache;
4894 
4895 	zfs_uio_init(&uio, ap->a_uio);
4896 	trycache = false;
4897 	if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
4898 	    zfs_uio_iovcnt(&uio) == 1) {
4899 		base = zfs_uio_iovbase(&uio, 0);
4900 		symlink_len = zfs_uio_iovlen(&uio, 0);
4901 		trycache = true;
4902 	}
4903 	error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
4904 	if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
4905 	    error != 0 || !trycache) {
4906 		return (error);
4907 	}
4908 	symlink_len -= zfs_uio_resid(&uio);
4909 	symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4910 	if (symlink != NULL) {
4911 		memcpy(symlink, base, symlink_len);
4912 		symlink[symlink_len] = '\0';
4913 		if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4914 		    (uintptr_t)NULL, (uintptr_t)symlink)) {
4915 			cache_symlink_free(symlink, symlink_len + 1);
4916 		}
4917 	}
4918 	return (error);
4919 }
4920 
4921 #ifndef _SYS_SYSPROTO_H_
4922 struct vop_link_args {
4923 	struct vnode *a_tdvp;
4924 	struct vnode *a_vp;
4925 	struct componentname *a_cnp;
4926 };
4927 #endif
4928 
4929 static int
4930 zfs_freebsd_link(struct vop_link_args *ap)
4931 {
4932 	struct componentname *cnp = ap->a_cnp;
4933 	vnode_t *vp = ap->a_vp;
4934 	vnode_t *tdvp = ap->a_tdvp;
4935 
4936 	if (tdvp->v_mount != vp->v_mount)
4937 		return (EXDEV);
4938 
4939 #if __FreeBSD_version < 1400068
4940 	ASSERT(cnp->cn_flags & SAVENAME);
4941 #endif
4942 
4943 	return (zfs_link(VTOZ(tdvp), VTOZ(vp),
4944 	    cnp->cn_nameptr, cnp->cn_cred, 0));
4945 }
4946 
4947 #ifndef _SYS_SYSPROTO_H_
4948 struct vop_inactive_args {
4949 	struct vnode *a_vp;
4950 	struct thread *a_td;
4951 };
4952 #endif
4953 
4954 static int
4955 zfs_freebsd_inactive(struct vop_inactive_args *ap)
4956 {
4957 	vnode_t *vp = ap->a_vp;
4958 
4959 	zfs_inactive(vp, curthread->td_ucred, NULL);
4960 	return (0);
4961 }
4962 
4963 #ifndef _SYS_SYSPROTO_H_
4964 struct vop_need_inactive_args {
4965 	struct vnode *a_vp;
4966 	struct thread *a_td;
4967 };
4968 #endif
4969 
4970 static int
4971 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
4972 {
4973 	vnode_t *vp = ap->a_vp;
4974 	znode_t	*zp = VTOZ(vp);
4975 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4976 	int need;
4977 
4978 	if (vn_need_pageq_flush(vp))
4979 		return (1);
4980 
4981 	if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
4982 		return (1);
4983 	need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
4984 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
4985 
4986 	return (need);
4987 }
4988 
4989 #ifndef _SYS_SYSPROTO_H_
4990 struct vop_reclaim_args {
4991 	struct vnode *a_vp;
4992 	struct thread *a_td;
4993 };
4994 #endif
4995 
4996 static int
4997 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
4998 {
4999 	vnode_t	*vp = ap->a_vp;
5000 	znode_t	*zp = VTOZ(vp);
5001 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5002 
5003 	ASSERT3P(zp, !=, NULL);
5004 
5005 	/*
5006 	 * z_teardown_inactive_lock protects from a race with
5007 	 * zfs_znode_dmu_fini in zfsvfs_teardown during
5008 	 * force unmount.
5009 	 */
5010 	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5011 	if (zp->z_sa_hdl == NULL)
5012 		zfs_znode_free(zp);
5013 	else
5014 		zfs_zinactive(zp);
5015 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5016 
5017 	vp->v_data = NULL;
5018 	return (0);
5019 }
5020 
5021 #ifndef _SYS_SYSPROTO_H_
5022 struct vop_fid_args {
5023 	struct vnode *a_vp;
5024 	struct fid *a_fid;
5025 };
5026 #endif
5027 
5028 static int
5029 zfs_freebsd_fid(struct vop_fid_args *ap)
5030 {
5031 
5032 	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5033 }
5034 
5035 
5036 #ifndef _SYS_SYSPROTO_H_
5037 struct vop_pathconf_args {
5038 	struct vnode *a_vp;
5039 	int a_name;
5040 	register_t *a_retval;
5041 } *ap;
5042 #endif
5043 
5044 static int
5045 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5046 {
5047 	ulong_t val;
5048 	int error;
5049 
5050 	error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5051 	    curthread->td_ucred, NULL);
5052 	if (error == 0) {
5053 		*ap->a_retval = val;
5054 		return (error);
5055 	}
5056 	if (error != EOPNOTSUPP)
5057 		return (error);
5058 
5059 	switch (ap->a_name) {
5060 	case _PC_NAME_MAX:
5061 		*ap->a_retval = NAME_MAX;
5062 		return (0);
5063 #if __FreeBSD_version >= 1400032
5064 	case _PC_DEALLOC_PRESENT:
5065 		*ap->a_retval = 1;
5066 		return (0);
5067 #endif
5068 	case _PC_PIPE_BUF:
5069 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5070 			*ap->a_retval = PIPE_BUF;
5071 			return (0);
5072 		}
5073 		return (EINVAL);
5074 	default:
5075 		return (vop_stdpathconf(ap));
5076 	}
5077 }
5078 
5079 static int zfs_xattr_compat = 1;
5080 
5081 static int
5082 zfs_check_attrname(const char *name)
5083 {
5084 	/* We don't allow '/' character in attribute name. */
5085 	if (strchr(name, '/') != NULL)
5086 		return (SET_ERROR(EINVAL));
5087 	/* We don't allow attribute names that start with a namespace prefix. */
5088 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5089 		return (SET_ERROR(EINVAL));
5090 	return (0);
5091 }
5092 
5093 /*
5094  * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5095  * extended attribute name:
5096  *
5097  *	NAMESPACE	XATTR_COMPAT	PREFIX
5098  *	system		*		freebsd:system:
5099  *	user		1		(none, can be used to access ZFS
5100  *					fsattr(5) attributes created on Solaris)
5101  *	user		0		user.
5102  */
5103 static int
5104 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5105     size_t size, boolean_t compat)
5106 {
5107 	const char *namespace, *prefix, *suffix;
5108 
5109 	memset(attrname, 0, size);
5110 
5111 	switch (attrnamespace) {
5112 	case EXTATTR_NAMESPACE_USER:
5113 		if (compat) {
5114 			/*
5115 			 * This is the default namespace by which we can access
5116 			 * all attributes created on Solaris.
5117 			 */
5118 			prefix = namespace = suffix = "";
5119 		} else {
5120 			/*
5121 			 * This is compatible with the user namespace encoding
5122 			 * on Linux prior to xattr_compat, but nothing
5123 			 * else.
5124 			 */
5125 			prefix = "";
5126 			namespace = "user";
5127 			suffix = ".";
5128 		}
5129 		break;
5130 	case EXTATTR_NAMESPACE_SYSTEM:
5131 		prefix = "freebsd:";
5132 		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5133 		suffix = ":";
5134 		break;
5135 	case EXTATTR_NAMESPACE_EMPTY:
5136 	default:
5137 		return (SET_ERROR(EINVAL));
5138 	}
5139 	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5140 	    name) >= size) {
5141 		return (SET_ERROR(ENAMETOOLONG));
5142 	}
5143 	return (0);
5144 }
5145 
5146 static int
5147 zfs_ensure_xattr_cached(znode_t *zp)
5148 {
5149 	int error = 0;
5150 
5151 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5152 
5153 	if (zp->z_xattr_cached != NULL)
5154 		return (0);
5155 
5156 	if (rw_write_held(&zp->z_xattr_lock))
5157 		return (zfs_sa_get_xattr(zp));
5158 
5159 	if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5160 		rw_exit(&zp->z_xattr_lock);
5161 		rw_enter(&zp->z_xattr_lock, RW_WRITER);
5162 	}
5163 	if (zp->z_xattr_cached == NULL)
5164 		error = zfs_sa_get_xattr(zp);
5165 	rw_downgrade(&zp->z_xattr_lock);
5166 	return (error);
5167 }
5168 
5169 #ifndef _SYS_SYSPROTO_H_
5170 struct vop_getextattr {
5171 	IN struct vnode *a_vp;
5172 	IN int a_attrnamespace;
5173 	IN const char *a_name;
5174 	INOUT struct uio *a_uio;
5175 	OUT size_t *a_size;
5176 	IN struct ucred *a_cred;
5177 	IN struct thread *a_td;
5178 };
5179 #endif
5180 
5181 static int
5182 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5183 {
5184 	struct thread *td = ap->a_td;
5185 	struct nameidata nd;
5186 	struct vattr va;
5187 	vnode_t *xvp = NULL, *vp;
5188 	int error, flags;
5189 
5190 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5191 	    LOOKUP_XATTR, B_FALSE);
5192 	if (error != 0)
5193 		return (error);
5194 
5195 	flags = FREAD;
5196 #if __FreeBSD_version < 1400043
5197 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5198 	    xvp, td);
5199 #else
5200 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5201 #endif
5202 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5203 	if (error != 0)
5204 		return (SET_ERROR(error));
5205 	vp = nd.ni_vp;
5206 	NDFREE_PNBUF(&nd);
5207 
5208 	if (ap->a_size != NULL) {
5209 		error = VOP_GETATTR(vp, &va, ap->a_cred);
5210 		if (error == 0)
5211 			*ap->a_size = (size_t)va.va_size;
5212 	} else if (ap->a_uio != NULL)
5213 		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5214 
5215 	VOP_UNLOCK(vp);
5216 	vn_close(vp, flags, ap->a_cred, td);
5217 	return (error);
5218 }
5219 
5220 static int
5221 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5222 {
5223 	znode_t *zp = VTOZ(ap->a_vp);
5224 	uchar_t *nv_value;
5225 	uint_t nv_size;
5226 	int error;
5227 
5228 	error = zfs_ensure_xattr_cached(zp);
5229 	if (error != 0)
5230 		return (error);
5231 
5232 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5233 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5234 
5235 	error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5236 	    &nv_value, &nv_size);
5237 	if (error != 0)
5238 		return (SET_ERROR(error));
5239 
5240 	if (ap->a_size != NULL)
5241 		*ap->a_size = nv_size;
5242 	else if (ap->a_uio != NULL)
5243 		error = uiomove(nv_value, nv_size, ap->a_uio);
5244 	if (error != 0)
5245 		return (SET_ERROR(error));
5246 
5247 	return (0);
5248 }
5249 
5250 static int
5251 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
5252 {
5253 	znode_t *zp = VTOZ(ap->a_vp);
5254 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5255 	char attrname[EXTATTR_MAXNAMELEN+1];
5256 	int error;
5257 
5258 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5259 	    sizeof (attrname), compat);
5260 	if (error != 0)
5261 		return (error);
5262 
5263 	error = ENOENT;
5264 	if (zfsvfs->z_use_sa && zp->z_is_sa)
5265 		error = zfs_getextattr_sa(ap, attrname);
5266 	if (error == ENOENT)
5267 		error = zfs_getextattr_dir(ap, attrname);
5268 	return (error);
5269 }
5270 
5271 /*
5272  * Vnode operation to retrieve a named extended attribute.
5273  */
5274 static int
5275 zfs_getextattr(struct vop_getextattr_args *ap)
5276 {
5277 	znode_t *zp = VTOZ(ap->a_vp);
5278 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5279 	int error;
5280 
5281 	/*
5282 	 * If the xattr property is off, refuse the request.
5283 	 */
5284 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5285 		return (SET_ERROR(EOPNOTSUPP));
5286 
5287 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5288 	    ap->a_cred, ap->a_td, VREAD);
5289 	if (error != 0)
5290 		return (SET_ERROR(error));
5291 
5292 	error = zfs_check_attrname(ap->a_name);
5293 	if (error != 0)
5294 		return (error);
5295 
5296 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5297 		return (error);
5298 	error = ENOENT;
5299 	rw_enter(&zp->z_xattr_lock, RW_READER);
5300 
5301 	error = zfs_getextattr_impl(ap, zfs_xattr_compat);
5302 	if ((error == ENOENT || error == ENOATTR) &&
5303 	    ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5304 		/*
5305 		 * Fall back to the alternate namespace format if we failed to
5306 		 * find a user xattr.
5307 		 */
5308 		error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
5309 	}
5310 
5311 	rw_exit(&zp->z_xattr_lock);
5312 	zfs_exit(zfsvfs, FTAG);
5313 	if (error == ENOENT)
5314 		error = SET_ERROR(ENOATTR);
5315 	return (error);
5316 }
5317 
5318 #ifndef _SYS_SYSPROTO_H_
5319 struct vop_deleteextattr {
5320 	IN struct vnode *a_vp;
5321 	IN int a_attrnamespace;
5322 	IN const char *a_name;
5323 	IN struct ucred *a_cred;
5324 	IN struct thread *a_td;
5325 };
5326 #endif
5327 
5328 static int
5329 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5330 {
5331 	struct nameidata nd;
5332 	vnode_t *xvp = NULL, *vp;
5333 	int error;
5334 
5335 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5336 	    LOOKUP_XATTR, B_FALSE);
5337 	if (error != 0)
5338 		return (error);
5339 
5340 #if __FreeBSD_version < 1400043
5341 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5342 	    UIO_SYSSPACE, attrname, xvp, ap->a_td);
5343 #else
5344 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5345 	    UIO_SYSSPACE, attrname, xvp);
5346 #endif
5347 	error = namei(&nd);
5348 	if (error != 0)
5349 		return (SET_ERROR(error));
5350 
5351 	vp = nd.ni_vp;
5352 	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5353 	NDFREE_PNBUF(&nd);
5354 
5355 	vput(nd.ni_dvp);
5356 	if (vp == nd.ni_dvp)
5357 		vrele(vp);
5358 	else
5359 		vput(vp);
5360 
5361 	return (error);
5362 }
5363 
5364 static int
5365 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5366 {
5367 	znode_t *zp = VTOZ(ap->a_vp);
5368 	nvlist_t *nvl;
5369 	int error;
5370 
5371 	error = zfs_ensure_xattr_cached(zp);
5372 	if (error != 0)
5373 		return (error);
5374 
5375 	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5376 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5377 
5378 	nvl = zp->z_xattr_cached;
5379 	error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5380 	if (error != 0)
5381 		error = SET_ERROR(error);
5382 	else
5383 		error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
5384 	if (error != 0) {
5385 		zp->z_xattr_cached = NULL;
5386 		nvlist_free(nvl);
5387 	}
5388 	return (error);
5389 }
5390 
5391 static int
5392 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
5393 {
5394 	znode_t *zp = VTOZ(ap->a_vp);
5395 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5396 	char attrname[EXTATTR_MAXNAMELEN+1];
5397 	int error;
5398 
5399 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5400 	    sizeof (attrname), compat);
5401 	if (error != 0)
5402 		return (error);
5403 
5404 	error = ENOENT;
5405 	if (zfsvfs->z_use_sa && zp->z_is_sa)
5406 		error = zfs_deleteextattr_sa(ap, attrname);
5407 	if (error == ENOENT)
5408 		error = zfs_deleteextattr_dir(ap, attrname);
5409 	return (error);
5410 }
5411 
5412 /*
5413  * Vnode operation to remove a named attribute.
5414  */
5415 static int
5416 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5417 {
5418 	znode_t *zp = VTOZ(ap->a_vp);
5419 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5420 	int error;
5421 
5422 	/*
5423 	 * If the xattr property is off, refuse the request.
5424 	 */
5425 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5426 		return (SET_ERROR(EOPNOTSUPP));
5427 
5428 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5429 	    ap->a_cred, ap->a_td, VWRITE);
5430 	if (error != 0)
5431 		return (SET_ERROR(error));
5432 
5433 	error = zfs_check_attrname(ap->a_name);
5434 	if (error != 0)
5435 		return (error);
5436 
5437 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5438 		return (error);
5439 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
5440 
5441 	error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
5442 	if ((error == ENOENT || error == ENOATTR) &&
5443 	    ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5444 		/*
5445 		 * Fall back to the alternate namespace format if we failed to
5446 		 * find a user xattr.
5447 		 */
5448 		error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
5449 	}
5450 
5451 	rw_exit(&zp->z_xattr_lock);
5452 	zfs_exit(zfsvfs, FTAG);
5453 	if (error == ENOENT)
5454 		error = SET_ERROR(ENOATTR);
5455 	return (error);
5456 }
5457 
5458 #ifndef _SYS_SYSPROTO_H_
5459 struct vop_setextattr {
5460 	IN struct vnode *a_vp;
5461 	IN int a_attrnamespace;
5462 	IN const char *a_name;
5463 	INOUT struct uio *a_uio;
5464 	IN struct ucred *a_cred;
5465 	IN struct thread *a_td;
5466 };
5467 #endif
5468 
5469 static int
5470 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5471 {
5472 	struct thread *td = ap->a_td;
5473 	struct nameidata nd;
5474 	struct vattr va;
5475 	vnode_t *xvp = NULL, *vp;
5476 	int error, flags;
5477 
5478 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5479 	    LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5480 	if (error != 0)
5481 		return (error);
5482 
5483 	flags = FFLAGS(O_WRONLY | O_CREAT);
5484 #if __FreeBSD_version < 1400043
5485 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5486 #else
5487 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5488 #endif
5489 	error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5490 	    NULL);
5491 	if (error != 0)
5492 		return (SET_ERROR(error));
5493 	vp = nd.ni_vp;
5494 	NDFREE_PNBUF(&nd);
5495 
5496 	VATTR_NULL(&va);
5497 	va.va_size = 0;
5498 	error = VOP_SETATTR(vp, &va, ap->a_cred);
5499 	if (error == 0)
5500 		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5501 
5502 	VOP_UNLOCK(vp);
5503 	vn_close(vp, flags, ap->a_cred, td);
5504 	return (error);
5505 }
5506 
5507 static int
5508 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5509 {
5510 	znode_t *zp = VTOZ(ap->a_vp);
5511 	nvlist_t *nvl;
5512 	size_t sa_size;
5513 	int error;
5514 
5515 	error = zfs_ensure_xattr_cached(zp);
5516 	if (error != 0)
5517 		return (error);
5518 
5519 	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5520 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5521 
5522 	nvl = zp->z_xattr_cached;
5523 	size_t entry_size = ap->a_uio->uio_resid;
5524 	if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5525 		return (SET_ERROR(EFBIG));
5526 	error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5527 	if (error != 0)
5528 		return (SET_ERROR(error));
5529 	if (sa_size > DXATTR_MAX_SA_SIZE)
5530 		return (SET_ERROR(EFBIG));
5531 	uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5532 	error = uiomove(buf, entry_size, ap->a_uio);
5533 	if (error != 0) {
5534 		error = SET_ERROR(error);
5535 	} else {
5536 		error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5537 		if (error != 0)
5538 			error = SET_ERROR(error);
5539 	}
5540 	if (error == 0)
5541 		error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
5542 	kmem_free(buf, entry_size);
5543 	if (error != 0) {
5544 		zp->z_xattr_cached = NULL;
5545 		nvlist_free(nvl);
5546 	}
5547 	return (error);
5548 }
5549 
5550 static int
5551 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
5552 {
5553 	znode_t *zp = VTOZ(ap->a_vp);
5554 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5555 	char attrname[EXTATTR_MAXNAMELEN+1];
5556 	int error;
5557 
5558 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5559 	    sizeof (attrname), compat);
5560 	if (error != 0)
5561 		return (error);
5562 
5563 	struct vop_deleteextattr_args vda = {
5564 		.a_vp = ap->a_vp,
5565 		.a_attrnamespace = ap->a_attrnamespace,
5566 		.a_name = ap->a_name,
5567 		.a_cred = ap->a_cred,
5568 		.a_td = ap->a_td,
5569 	};
5570 	error = ENOENT;
5571 	if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5572 		error = zfs_setextattr_sa(ap, attrname);
5573 		if (error == 0) {
5574 			/*
5575 			 * Successfully put into SA, we need to clear the one
5576 			 * in dir if present.
5577 			 */
5578 			zfs_deleteextattr_dir(&vda, attrname);
5579 		}
5580 	}
5581 	if (error != 0) {
5582 		error = zfs_setextattr_dir(ap, attrname);
5583 		if (error == 0 && zp->z_is_sa) {
5584 			/*
5585 			 * Successfully put into dir, we need to clear the one
5586 			 * in SA if present.
5587 			 */
5588 			zfs_deleteextattr_sa(&vda, attrname);
5589 		}
5590 	}
5591 	if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5592 		/*
5593 		 * Also clear all versions of the alternate compat name.
5594 		 */
5595 		zfs_deleteextattr_impl(&vda, !compat);
5596 	}
5597 	return (error);
5598 }
5599 
5600 /*
5601  * Vnode operation to set a named attribute.
5602  */
5603 static int
5604 zfs_setextattr(struct vop_setextattr_args *ap)
5605 {
5606 	znode_t *zp = VTOZ(ap->a_vp);
5607 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5608 	int error;
5609 
5610 	/*
5611 	 * If the xattr property is off, refuse the request.
5612 	 */
5613 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5614 		return (SET_ERROR(EOPNOTSUPP));
5615 
5616 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5617 	    ap->a_cred, ap->a_td, VWRITE);
5618 	if (error != 0)
5619 		return (SET_ERROR(error));
5620 
5621 	error = zfs_check_attrname(ap->a_name);
5622 	if (error != 0)
5623 		return (error);
5624 
5625 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5626 		return (error);
5627 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
5628 
5629 	error = zfs_setextattr_impl(ap, zfs_xattr_compat);
5630 
5631 	rw_exit(&zp->z_xattr_lock);
5632 	zfs_exit(zfsvfs, FTAG);
5633 	return (error);
5634 }
5635 
5636 #ifndef _SYS_SYSPROTO_H_
5637 struct vop_listextattr {
5638 	IN struct vnode *a_vp;
5639 	IN int a_attrnamespace;
5640 	INOUT struct uio *a_uio;
5641 	OUT size_t *a_size;
5642 	IN struct ucred *a_cred;
5643 	IN struct thread *a_td;
5644 };
5645 #endif
5646 
5647 static int
5648 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5649 {
5650 	struct thread *td = ap->a_td;
5651 	struct nameidata nd;
5652 	uint8_t dirbuf[sizeof (struct dirent)];
5653 	struct iovec aiov;
5654 	struct uio auio;
5655 	vnode_t *xvp = NULL, *vp;
5656 	int error, eof;
5657 
5658 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5659 	    LOOKUP_XATTR, B_FALSE);
5660 	if (error != 0) {
5661 		/*
5662 		 * ENOATTR means that the EA directory does not yet exist,
5663 		 * i.e. there are no extended attributes there.
5664 		 */
5665 		if (error == ENOATTR)
5666 			error = 0;
5667 		return (error);
5668 	}
5669 
5670 #if __FreeBSD_version < 1400043
5671 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5672 	    UIO_SYSSPACE, ".", xvp, td);
5673 #else
5674 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5675 	    UIO_SYSSPACE, ".", xvp);
5676 #endif
5677 	error = namei(&nd);
5678 	if (error != 0)
5679 		return (SET_ERROR(error));
5680 	vp = nd.ni_vp;
5681 	NDFREE_PNBUF(&nd);
5682 
5683 	auio.uio_iov = &aiov;
5684 	auio.uio_iovcnt = 1;
5685 	auio.uio_segflg = UIO_SYSSPACE;
5686 	auio.uio_td = td;
5687 	auio.uio_rw = UIO_READ;
5688 	auio.uio_offset = 0;
5689 
5690 	size_t plen = strlen(attrprefix);
5691 
5692 	do {
5693 		aiov.iov_base = (void *)dirbuf;
5694 		aiov.iov_len = sizeof (dirbuf);
5695 		auio.uio_resid = sizeof (dirbuf);
5696 		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5697 		if (error != 0)
5698 			break;
5699 		int done = sizeof (dirbuf) - auio.uio_resid;
5700 		for (int pos = 0; pos < done; ) {
5701 			struct dirent *dp = (struct dirent *)(dirbuf + pos);
5702 			pos += dp->d_reclen;
5703 			/*
5704 			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5705 			 * is what we get when attribute was created on Solaris.
5706 			 */
5707 			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5708 				continue;
5709 			else if (plen == 0 &&
5710 			    ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
5711 				continue;
5712 			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5713 				continue;
5714 			uint8_t nlen = dp->d_namlen - plen;
5715 			if (ap->a_size != NULL) {
5716 				*ap->a_size += 1 + nlen;
5717 			} else if (ap->a_uio != NULL) {
5718 				/*
5719 				 * Format of extattr name entry is one byte for
5720 				 * length and the rest for name.
5721 				 */
5722 				error = uiomove(&nlen, 1, ap->a_uio);
5723 				if (error == 0) {
5724 					char *namep = dp->d_name + plen;
5725 					error = uiomove(namep, nlen, ap->a_uio);
5726 				}
5727 				if (error != 0) {
5728 					error = SET_ERROR(error);
5729 					break;
5730 				}
5731 			}
5732 		}
5733 	} while (!eof && error == 0);
5734 
5735 	vput(vp);
5736 	return (error);
5737 }
5738 
5739 static int
5740 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5741 {
5742 	znode_t *zp = VTOZ(ap->a_vp);
5743 	int error;
5744 
5745 	error = zfs_ensure_xattr_cached(zp);
5746 	if (error != 0)
5747 		return (error);
5748 
5749 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5750 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5751 
5752 	size_t plen = strlen(attrprefix);
5753 	nvpair_t *nvp = NULL;
5754 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5755 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5756 
5757 		const char *name = nvpair_name(nvp);
5758 		if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5759 			continue;
5760 		else if (strncmp(name, attrprefix, plen) != 0)
5761 			continue;
5762 		uint8_t nlen = strlen(name) - plen;
5763 		if (ap->a_size != NULL) {
5764 			*ap->a_size += 1 + nlen;
5765 		} else if (ap->a_uio != NULL) {
5766 			/*
5767 			 * Format of extattr name entry is one byte for
5768 			 * length and the rest for name.
5769 			 */
5770 			error = uiomove(&nlen, 1, ap->a_uio);
5771 			if (error == 0) {
5772 				char *namep = __DECONST(char *, name) + plen;
5773 				error = uiomove(namep, nlen, ap->a_uio);
5774 			}
5775 			if (error != 0) {
5776 				error = SET_ERROR(error);
5777 				break;
5778 			}
5779 		}
5780 	}
5781 
5782 	return (error);
5783 }
5784 
5785 static int
5786 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
5787 {
5788 	znode_t *zp = VTOZ(ap->a_vp);
5789 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5790 	char attrprefix[16];
5791 	int error;
5792 
5793 	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5794 	    sizeof (attrprefix), compat);
5795 	if (error != 0)
5796 		return (error);
5797 
5798 	if (zfsvfs->z_use_sa && zp->z_is_sa)
5799 		error = zfs_listextattr_sa(ap, attrprefix);
5800 	if (error == 0)
5801 		error = zfs_listextattr_dir(ap, attrprefix);
5802 	return (error);
5803 }
5804 
5805 /*
5806  * Vnode operation to retrieve extended attributes on a vnode.
5807  */
5808 static int
5809 zfs_listextattr(struct vop_listextattr_args *ap)
5810 {
5811 	znode_t *zp = VTOZ(ap->a_vp);
5812 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5813 	int error;
5814 
5815 	if (ap->a_size != NULL)
5816 		*ap->a_size = 0;
5817 
5818 	/*
5819 	 * If the xattr property is off, refuse the request.
5820 	 */
5821 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5822 		return (SET_ERROR(EOPNOTSUPP));
5823 
5824 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5825 	    ap->a_cred, ap->a_td, VREAD);
5826 	if (error != 0)
5827 		return (SET_ERROR(error));
5828 
5829 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5830 		return (error);
5831 	rw_enter(&zp->z_xattr_lock, RW_READER);
5832 
5833 	error = zfs_listextattr_impl(ap, zfs_xattr_compat);
5834 	if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5835 		/* Also list user xattrs with the alternate format. */
5836 		error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
5837 	}
5838 
5839 	rw_exit(&zp->z_xattr_lock);
5840 	zfs_exit(zfsvfs, FTAG);
5841 	return (error);
5842 }
5843 
5844 #ifndef _SYS_SYSPROTO_H_
5845 struct vop_getacl_args {
5846 	struct vnode *vp;
5847 	acl_type_t type;
5848 	struct acl *aclp;
5849 	struct ucred *cred;
5850 	struct thread *td;
5851 };
5852 #endif
5853 
5854 static int
5855 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5856 {
5857 	int		error;
5858 	vsecattr_t	vsecattr;
5859 
5860 	if (ap->a_type != ACL_TYPE_NFS4)
5861 		return (EINVAL);
5862 
5863 	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5864 	if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5865 	    &vsecattr, 0, ap->a_cred)))
5866 		return (error);
5867 
5868 	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5869 	    vsecattr.vsa_aclcnt);
5870 	if (vsecattr.vsa_aclentp != NULL)
5871 		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5872 
5873 	return (error);
5874 }
5875 
5876 #ifndef _SYS_SYSPROTO_H_
5877 struct vop_setacl_args {
5878 	struct vnode *vp;
5879 	acl_type_t type;
5880 	struct acl *aclp;
5881 	struct ucred *cred;
5882 	struct thread *td;
5883 };
5884 #endif
5885 
5886 static int
5887 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5888 {
5889 	int		error;
5890 	vsecattr_t vsecattr;
5891 	int		aclbsize;	/* size of acl list in bytes */
5892 	aclent_t	*aaclp;
5893 
5894 	if (ap->a_type != ACL_TYPE_NFS4)
5895 		return (EINVAL);
5896 
5897 	if (ap->a_aclp == NULL)
5898 		return (EINVAL);
5899 
5900 	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5901 		return (EINVAL);
5902 
5903 	/*
5904 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5905 	 * splitting every entry into two and appending "canonical six"
5906 	 * entries at the end.  Don't allow for setting an ACL that would
5907 	 * cause chmod(2) to run out of ACL entries.
5908 	 */
5909 	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
5910 		return (ENOSPC);
5911 
5912 	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
5913 	if (error != 0)
5914 		return (error);
5915 
5916 	vsecattr.vsa_mask = VSA_ACE;
5917 	aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
5918 	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
5919 	aaclp = vsecattr.vsa_aclentp;
5920 	vsecattr.vsa_aclentsz = aclbsize;
5921 
5922 	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
5923 	error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
5924 	kmem_free(aaclp, aclbsize);
5925 
5926 	return (error);
5927 }
5928 
5929 #ifndef _SYS_SYSPROTO_H_
5930 struct vop_aclcheck_args {
5931 	struct vnode *vp;
5932 	acl_type_t type;
5933 	struct acl *aclp;
5934 	struct ucred *cred;
5935 	struct thread *td;
5936 };
5937 #endif
5938 
5939 static int
5940 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
5941 {
5942 
5943 	return (EOPNOTSUPP);
5944 }
5945 
5946 static int
5947 zfs_vptocnp(struct vop_vptocnp_args *ap)
5948 {
5949 	vnode_t *covered_vp;
5950 	vnode_t *vp = ap->a_vp;
5951 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
5952 	znode_t *zp = VTOZ(vp);
5953 	int ltype;
5954 	int error;
5955 
5956 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5957 		return (error);
5958 
5959 	/*
5960 	 * If we are a snapshot mounted under .zfs, run the operation
5961 	 * on the covered vnode.
5962 	 */
5963 	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
5964 		char name[MAXNAMLEN + 1];
5965 		znode_t *dzp;
5966 		size_t len;
5967 
5968 		error = zfs_znode_parent_and_name(zp, &dzp, name);
5969 		if (error == 0) {
5970 			len = strlen(name);
5971 			if (*ap->a_buflen < len)
5972 				error = SET_ERROR(ENOMEM);
5973 		}
5974 		if (error == 0) {
5975 			*ap->a_buflen -= len;
5976 			memcpy(ap->a_buf + *ap->a_buflen, name, len);
5977 			*ap->a_vpp = ZTOV(dzp);
5978 		}
5979 		zfs_exit(zfsvfs, FTAG);
5980 		return (error);
5981 	}
5982 	zfs_exit(zfsvfs, FTAG);
5983 
5984 	covered_vp = vp->v_mount->mnt_vnodecovered;
5985 	enum vgetstate vs = vget_prep(covered_vp);
5986 	ltype = VOP_ISLOCKED(vp);
5987 	VOP_UNLOCK(vp);
5988 	error = vget_finish(covered_vp, LK_SHARED, vs);
5989 	if (error == 0) {
5990 		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
5991 		    ap->a_buflen);
5992 		vput(covered_vp);
5993 	}
5994 	vn_lock(vp, ltype | LK_RETRY);
5995 	if (VN_IS_DOOMED(vp))
5996 		error = SET_ERROR(ENOENT);
5997 	return (error);
5998 }
5999 
6000 #if __FreeBSD_version >= 1400032
6001 static int
6002 zfs_deallocate(struct vop_deallocate_args *ap)
6003 {
6004 	znode_t *zp = VTOZ(ap->a_vp);
6005 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6006 	zilog_t *zilog;
6007 	off_t off, len, file_sz;
6008 	int error;
6009 
6010 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6011 		return (error);
6012 
6013 	/*
6014 	 * Callers might not be able to detect properly that we are read-only,
6015 	 * so check it explicitly here.
6016 	 */
6017 	if (zfs_is_readonly(zfsvfs)) {
6018 		zfs_exit(zfsvfs, FTAG);
6019 		return (SET_ERROR(EROFS));
6020 	}
6021 
6022 	zilog = zfsvfs->z_log;
6023 	off = *ap->a_offset;
6024 	len = *ap->a_len;
6025 	file_sz = zp->z_size;
6026 	if (off + len > file_sz)
6027 		len = file_sz - off;
6028 	/* Fast path for out-of-range request. */
6029 	if (len <= 0) {
6030 		*ap->a_len = 0;
6031 		zfs_exit(zfsvfs, FTAG);
6032 		return (0);
6033 	}
6034 
6035 	error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6036 	if (error == 0) {
6037 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6038 		    (ap->a_ioflag & IO_SYNC) != 0)
6039 			zil_commit(zilog, zp->z_id);
6040 		*ap->a_offset = off + len;
6041 		*ap->a_len = 0;
6042 	}
6043 
6044 	zfs_exit(zfsvfs, FTAG);
6045 	return (error);
6046 }
6047 #endif
6048 
6049 #ifndef _SYS_SYSPROTO_H_
6050 struct vop_copy_file_range_args {
6051 	struct vnode *a_invp;
6052 	off_t *a_inoffp;
6053 	struct vnode *a_outvp;
6054 	off_t *a_outoffp;
6055 	size_t *a_lenp;
6056 	unsigned int a_flags;
6057 	struct ucred *a_incred;
6058 	struct ucred *a_outcred;
6059 	struct thread *a_fsizetd;
6060 }
6061 #endif
6062 /*
6063  * TODO: FreeBSD will only call file system-specific copy_file_range() if both
6064  * files resides under the same mountpoint. In case of ZFS we want to be called
6065  * even is files are in different datasets (but on the same pools, but we need
6066  * to check that ourselves).
6067  */
6068 static int
6069 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
6070 {
6071 	zfsvfs_t *outzfsvfs;
6072 	struct vnode *invp = ap->a_invp;
6073 	struct vnode *outvp = ap->a_outvp;
6074 	struct mount *mp;
6075 	int error;
6076 	uint64_t len = *ap->a_lenp;
6077 
6078 	if (!zfs_bclone_enabled) {
6079 		mp = NULL;
6080 		goto bad_write_fallback;
6081 	}
6082 
6083 	/*
6084 	 * TODO: If offset/length is not aligned to recordsize, use
6085 	 * vn_generic_copy_file_range() on this fragment.
6086 	 * It would be better to do this after we lock the vnodes, but then we
6087 	 * need something else than vn_generic_copy_file_range().
6088 	 */
6089 
6090 	vn_start_write(outvp, &mp, V_WAIT);
6091 	if (__predict_true(mp == outvp->v_mount)) {
6092 		outzfsvfs = (zfsvfs_t *)mp->mnt_data;
6093 		if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os),
6094 		    SPA_FEATURE_BLOCK_CLONING)) {
6095 			goto bad_write_fallback;
6096 		}
6097 	}
6098 	if (invp == outvp) {
6099 		if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
6100 			goto bad_write_fallback;
6101 		}
6102 	} else {
6103 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
6104 	__FreeBSD_version >= 1400086
6105 		vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
6106 		    LK_EXCLUSIVE);
6107 #else
6108 		vn_lock_pair(invp, false, outvp, false);
6109 #endif
6110 		if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
6111 			goto bad_locked_fallback;
6112 		}
6113 	}
6114 
6115 #ifdef MAC
6116 	error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
6117 	    outvp);
6118 	if (error != 0)
6119 		goto out_locked;
6120 #endif
6121 
6122 	error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
6123 	    ap->a_outoffp, &len, ap->a_outcred);
6124 	if (error == EXDEV || error == EAGAIN || error == EINVAL ||
6125 	    error == EOPNOTSUPP)
6126 		goto bad_locked_fallback;
6127 	*ap->a_lenp = (size_t)len;
6128 out_locked:
6129 	if (invp != outvp)
6130 		VOP_UNLOCK(invp);
6131 	VOP_UNLOCK(outvp);
6132 	if (mp != NULL)
6133 		vn_finished_write(mp);
6134 	return (error);
6135 
6136 bad_locked_fallback:
6137 	if (invp != outvp)
6138 		VOP_UNLOCK(invp);
6139 	VOP_UNLOCK(outvp);
6140 bad_write_fallback:
6141 	if (mp != NULL)
6142 		vn_finished_write(mp);
6143 	error = ENOSYS;
6144 	return (error);
6145 }
6146 
6147 struct vop_vector zfs_vnodeops;
6148 struct vop_vector zfs_fifoops;
6149 struct vop_vector zfs_shareops;
6150 
6151 struct vop_vector zfs_vnodeops = {
6152 	.vop_default =		&default_vnodeops,
6153 	.vop_inactive =		zfs_freebsd_inactive,
6154 	.vop_need_inactive =	zfs_freebsd_need_inactive,
6155 	.vop_reclaim =		zfs_freebsd_reclaim,
6156 	.vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6157 	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6158 	.vop_access =		zfs_freebsd_access,
6159 	.vop_allocate =		VOP_EINVAL,
6160 #if __FreeBSD_version >= 1400032
6161 	.vop_deallocate =	zfs_deallocate,
6162 #endif
6163 	.vop_lookup =		zfs_cache_lookup,
6164 	.vop_cachedlookup =	zfs_freebsd_cachedlookup,
6165 	.vop_getattr =		zfs_freebsd_getattr,
6166 	.vop_setattr =		zfs_freebsd_setattr,
6167 	.vop_create =		zfs_freebsd_create,
6168 	.vop_mknod =		(vop_mknod_t *)zfs_freebsd_create,
6169 	.vop_mkdir =		zfs_freebsd_mkdir,
6170 	.vop_readdir =		zfs_freebsd_readdir,
6171 	.vop_fsync =		zfs_freebsd_fsync,
6172 	.vop_open =		zfs_freebsd_open,
6173 	.vop_close =		zfs_freebsd_close,
6174 	.vop_rmdir =		zfs_freebsd_rmdir,
6175 	.vop_ioctl =		zfs_freebsd_ioctl,
6176 	.vop_link =		zfs_freebsd_link,
6177 	.vop_symlink =		zfs_freebsd_symlink,
6178 	.vop_readlink =		zfs_freebsd_readlink,
6179 	.vop_read =		zfs_freebsd_read,
6180 	.vop_write =		zfs_freebsd_write,
6181 	.vop_remove =		zfs_freebsd_remove,
6182 	.vop_rename =		zfs_freebsd_rename,
6183 	.vop_pathconf =		zfs_freebsd_pathconf,
6184 	.vop_bmap =		zfs_freebsd_bmap,
6185 	.vop_fid =		zfs_freebsd_fid,
6186 	.vop_getextattr =	zfs_getextattr,
6187 	.vop_deleteextattr =	zfs_deleteextattr,
6188 	.vop_setextattr =	zfs_setextattr,
6189 	.vop_listextattr =	zfs_listextattr,
6190 	.vop_getacl =		zfs_freebsd_getacl,
6191 	.vop_setacl =		zfs_freebsd_setacl,
6192 	.vop_aclcheck =		zfs_freebsd_aclcheck,
6193 	.vop_getpages =		zfs_freebsd_getpages,
6194 	.vop_putpages =		zfs_freebsd_putpages,
6195 	.vop_vptocnp =		zfs_vptocnp,
6196 	.vop_lock1 =		vop_lock,
6197 	.vop_unlock =		vop_unlock,
6198 	.vop_islocked =		vop_islocked,
6199 #if __FreeBSD_version >= 1400043
6200 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
6201 #endif
6202 	.vop_copy_file_range =	zfs_freebsd_copy_file_range,
6203 };
6204 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6205 
6206 struct vop_vector zfs_fifoops = {
6207 	.vop_default =		&fifo_specops,
6208 	.vop_fsync =		zfs_freebsd_fsync,
6209 	.vop_fplookup_vexec =	zfs_freebsd_fplookup_vexec,
6210 	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6211 	.vop_access =		zfs_freebsd_access,
6212 	.vop_getattr =		zfs_freebsd_getattr,
6213 	.vop_inactive =		zfs_freebsd_inactive,
6214 	.vop_read =		VOP_PANIC,
6215 	.vop_reclaim =		zfs_freebsd_reclaim,
6216 	.vop_setattr =		zfs_freebsd_setattr,
6217 	.vop_write =		VOP_PANIC,
6218 	.vop_pathconf = 	zfs_freebsd_pathconf,
6219 	.vop_fid =		zfs_freebsd_fid,
6220 	.vop_getacl =		zfs_freebsd_getacl,
6221 	.vop_setacl =		zfs_freebsd_setacl,
6222 	.vop_aclcheck =		zfs_freebsd_aclcheck,
6223 #if __FreeBSD_version >= 1400043
6224 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
6225 #endif
6226 };
6227 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6228 
6229 /*
6230  * special share hidden files vnode operations template
6231  */
6232 struct vop_vector zfs_shareops = {
6233 	.vop_default =		&default_vnodeops,
6234 	.vop_fplookup_vexec =	VOP_EAGAIN,
6235 	.vop_fplookup_symlink =	VOP_EAGAIN,
6236 	.vop_access =		zfs_freebsd_access,
6237 	.vop_inactive =		zfs_freebsd_inactive,
6238 	.vop_reclaim =		zfs_freebsd_reclaim,
6239 	.vop_fid =		zfs_freebsd_fid,
6240 	.vop_pathconf =		zfs_freebsd_pathconf,
6241 #if __FreeBSD_version >= 1400043
6242 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
6243 #endif
6244 };
6245 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6246 
6247 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
6248 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
6249