xref: /netbsd-src/sys/fs/tmpfs/tmpfs_vnops.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.95 2012/02/27 16:10:56 chs Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.95 2012/02/27 16:10:56 chs Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/fcntl.h>
43 #include <sys/event.h>
44 #include <sys/malloc.h>
45 #include <sys/namei.h>
46 #include <sys/stat.h>
47 #include <sys/uio.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/lockf.h>
51 #include <sys/kauth.h>
52 
53 #include <uvm/uvm.h>
54 
55 #include <miscfs/fifofs/fifo.h>
56 #include <miscfs/genfs/genfs.h>
57 #include <fs/tmpfs/tmpfs_vnops.h>
58 #include <fs/tmpfs/tmpfs.h>
59 
60 /*
61  * vnode operations vector used for files stored in a tmpfs file system.
62  */
63 int (**tmpfs_vnodeop_p)(void *);
64 const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
65 	{ &vop_default_desc,		vn_default_error },
66 	{ &vop_lookup_desc,		tmpfs_lookup },
67 	{ &vop_create_desc,		tmpfs_create },
68 	{ &vop_mknod_desc,		tmpfs_mknod },
69 	{ &vop_open_desc,		tmpfs_open },
70 	{ &vop_close_desc,		tmpfs_close },
71 	{ &vop_access_desc,		tmpfs_access },
72 	{ &vop_getattr_desc,		tmpfs_getattr },
73 	{ &vop_setattr_desc,		tmpfs_setattr },
74 	{ &vop_read_desc,		tmpfs_read },
75 	{ &vop_write_desc,		tmpfs_write },
76 	{ &vop_ioctl_desc,		tmpfs_ioctl },
77 	{ &vop_fcntl_desc,		tmpfs_fcntl },
78 	{ &vop_poll_desc,		tmpfs_poll },
79 	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
80 	{ &vop_revoke_desc,		tmpfs_revoke },
81 	{ &vop_mmap_desc,		tmpfs_mmap },
82 	{ &vop_fsync_desc,		tmpfs_fsync },
83 	{ &vop_seek_desc,		tmpfs_seek },
84 	{ &vop_remove_desc,		tmpfs_remove },
85 	{ &vop_link_desc,		tmpfs_link },
86 	{ &vop_rename_desc,		tmpfs_rename },
87 	{ &vop_mkdir_desc,		tmpfs_mkdir },
88 	{ &vop_rmdir_desc,		tmpfs_rmdir },
89 	{ &vop_symlink_desc,		tmpfs_symlink },
90 	{ &vop_readdir_desc,		tmpfs_readdir },
91 	{ &vop_readlink_desc,		tmpfs_readlink },
92 	{ &vop_abortop_desc,		tmpfs_abortop },
93 	{ &vop_inactive_desc,		tmpfs_inactive },
94 	{ &vop_reclaim_desc,		tmpfs_reclaim },
95 	{ &vop_lock_desc,		tmpfs_lock },
96 	{ &vop_unlock_desc,		tmpfs_unlock },
97 	{ &vop_bmap_desc,		tmpfs_bmap },
98 	{ &vop_strategy_desc,		tmpfs_strategy },
99 	{ &vop_print_desc,		tmpfs_print },
100 	{ &vop_pathconf_desc,		tmpfs_pathconf },
101 	{ &vop_islocked_desc,		tmpfs_islocked },
102 	{ &vop_advlock_desc,		tmpfs_advlock },
103 	{ &vop_bwrite_desc,		tmpfs_bwrite },
104 	{ &vop_getpages_desc,		tmpfs_getpages },
105 	{ &vop_putpages_desc,		tmpfs_putpages },
106 	{ &vop_whiteout_desc,		tmpfs_whiteout },
107 	{ NULL, NULL }
108 };
109 
110 const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
111 	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
112 };
113 
114 /*
115  * tmpfs_lookup: path name traversal routine.
116  *
117  * Arguments: dvp (directory being searched), vpp (result),
118  * cnp (component name - path).
119  *
120  * => Caller holds a reference and lock on dvp.
121  * => We return looked-up vnode (vpp) locked, with a reference held.
122  */
123 int
124 tmpfs_lookup(void *v)
125 {
126 	struct vop_lookup_args /* {
127 		struct vnode *a_dvp;
128 		struct vnode **a_vpp;
129 		struct componentname *a_cnp;
130 	} */ *ap = v;
131 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
132 	struct componentname *cnp = ap->a_cnp;
133 	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
134 	tmpfs_node_t *dnode, *tnode;
135 	tmpfs_dirent_t *de;
136 	int error;
137 
138 	KASSERT(VOP_ISLOCKED(dvp));
139 
140 	dnode = VP_TO_TMPFS_DIR(dvp);
141 	*vpp = NULL;
142 
143 	/* Check accessibility of directory. */
144 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
145 	if (error) {
146 		goto out;
147 	}
148 
149 	/*
150 	 * If requesting the last path component on a read-only file system
151 	 * with a write operation, deny it.
152 	 */
153 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
154 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
155 		error = EROFS;
156 		goto out;
157 	}
158 
159 	/*
160 	 * Avoid doing a linear scan of the directory if the requested
161 	 * directory/name couple is already in the cache.
162 	 */
163 	error = cache_lookup(dvp, vpp, cnp);
164 	if (error >= 0) {
165 		/* Both cache-hit or an error case. */
166 		goto out;
167 	}
168 
169 	if (cnp->cn_flags & ISDOTDOT) {
170 		tmpfs_node_t *pnode;
171 
172 		/*
173 		 * Lookup of ".." case.
174 		 */
175 		if (lastcn && cnp->cn_nameiop == RENAME) {
176 			error = EINVAL;
177 			goto out;
178 		}
179 		KASSERT(dnode->tn_type == VDIR);
180 		pnode = dnode->tn_spec.tn_dir.tn_parent;
181 		if (pnode == NULL) {
182 			error = ENOENT;
183 			goto out;
184 		}
185 
186 		/*
187 		 * Lock the parent tn_vlock before releasing the vnode lock,
188 		 * and thus prevents parent from disappearing.
189 		 */
190 		mutex_enter(&pnode->tn_vlock);
191 		VOP_UNLOCK(dvp);
192 
193 		/*
194 		 * Get a vnode of the '..' entry and re-acquire the lock.
195 		 * Release the tn_vlock.
196 		 */
197 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
198 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
199 		goto out;
200 
201 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
202 		/*
203 		 * Lookup of "." case.
204 		 */
205 		if (lastcn && cnp->cn_nameiop == RENAME) {
206 			error = EISDIR;
207 			goto out;
208 		}
209 		vref(dvp);
210 		*vpp = dvp;
211 		error = 0;
212 		goto done;
213 	}
214 
215 	/*
216 	 * Other lookup cases: perform directory scan.
217 	 */
218 	de = tmpfs_dir_lookup(dnode, cnp);
219 	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
220 		/*
221 		 * The entry was not found in the directory.  This is valid
222 		 * if we are creating or renaming an entry and are working
223 		 * on the last component of the path name.
224 		 */
225 		if (lastcn && (cnp->cn_nameiop == CREATE ||
226 		    cnp->cn_nameiop == RENAME)) {
227 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
228 			if (error) {
229 				goto out;
230 			}
231 			error = EJUSTRETURN;
232 		} else {
233 			error = ENOENT;
234 		}
235 		if (de) {
236 			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
237 			cnp->cn_flags |= ISWHITEOUT;
238 		}
239 		goto done;
240 	}
241 
242 	tnode = de->td_node;
243 
244 	/*
245 	 * If it is not the last path component and found a non-directory
246 	 * or non-link entry (which may itself be pointing to a directory),
247 	 * raise an error.
248 	 */
249 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
250 		error = ENOTDIR;
251 		goto out;
252 	}
253 
254 	/* Check the permissions. */
255 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
256 		kauth_action_t action = 0;
257 
258 		/* This is the file-system's decision. */
259 		if ((dnode->tn_mode & S_ISTXT) != 0 &&
260 		    kauth_cred_geteuid(cnp->cn_cred) != dnode->tn_uid &&
261 		    kauth_cred_geteuid(cnp->cn_cred) != tnode->tn_uid) {
262 			error = EPERM;
263 		} else {
264 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
265 		}
266 
267 		if (cnp->cn_nameiop == DELETE) {
268 			action |= KAUTH_VNODE_DELETE;
269 		} else {
270 			KASSERT(cnp->cn_nameiop == RENAME);
271 			action |= KAUTH_VNODE_RENAME;
272 		}
273 		error = kauth_authorize_vnode(cnp->cn_cred,
274 		    action, *vpp, dvp, error);
275 		if (error) {
276 			goto out;
277 		}
278 	}
279 
280 	/* Get a vnode for the matching entry. */
281 	mutex_enter(&tnode->tn_vlock);
282 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
283 done:
284 	/*
285 	 * Cache the result, unless request was for creation (as it does
286 	 * not improve the performance).
287 	 */
288 	if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE) {
289 		cache_enter(dvp, *vpp, cnp);
290 	}
291 out:
292 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
293 	KASSERT(VOP_ISLOCKED(dvp));
294 
295 	return error;
296 }
297 
298 int
299 tmpfs_create(void *v)
300 {
301 	struct vop_create_args /* {
302 		struct vnode		*a_dvp;
303 		struct vnode		**a_vpp;
304 		struct componentname	*a_cnp;
305 		struct vattr		*a_vap;
306 	} */ *ap = v;
307 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
308 	struct componentname *cnp = ap->a_cnp;
309 	struct vattr *vap = ap->a_vap;
310 
311 	KASSERT(VOP_ISLOCKED(dvp));
312 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
313 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
314 }
315 
316 int
317 tmpfs_mknod(void *v)
318 {
319 	struct vop_mknod_args /* {
320 		struct vnode		*a_dvp;
321 		struct vnode		**a_vpp;
322 		struct componentname	*a_cnp;
323 		struct vattr		*a_vap;
324 	} */ *ap = v;
325 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
326 	struct componentname *cnp = ap->a_cnp;
327 	struct vattr *vap = ap->a_vap;
328 	enum vtype vt = vap->va_type;
329 
330 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
331 		vput(dvp);
332 		return EINVAL;
333 	}
334 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
335 }
336 
337 int
338 tmpfs_open(void *v)
339 {
340 	struct vop_open_args /* {
341 		struct vnode	*a_vp;
342 		int		a_mode;
343 		kauth_cred_t	a_cred;
344 	} */ *ap = v;
345 	vnode_t *vp = ap->a_vp;
346 	mode_t mode = ap->a_mode;
347 	tmpfs_node_t *node;
348 
349 	KASSERT(VOP_ISLOCKED(vp));
350 
351 	node = VP_TO_TMPFS_NODE(vp);
352 	if (node->tn_links < 1) {
353 		/*
354 		 * The file is still active, but all its names have been
355 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
356 		 * any more, as it is about to be destroyed.
357 		 */
358 		return ENOENT;
359 	}
360 
361 	/* If the file is marked append-only, deny write requests. */
362 	if ((node->tn_flags & APPEND) != 0 &&
363 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
364 		return EPERM;
365 	}
366 	return 0;
367 }
368 
369 int
370 tmpfs_close(void *v)
371 {
372 	struct vop_close_args /* {
373 		struct vnode	*a_vp;
374 		int		a_fflag;
375 		kauth_cred_t	a_cred;
376 	} */ *ap = v;
377 	vnode_t *vp = ap->a_vp;
378 
379 	KASSERT(VOP_ISLOCKED(vp));
380 
381 	tmpfs_update(vp, NULL, NULL, NULL, UPDATE_CLOSE);
382 	return 0;
383 }
384 
385 int
386 tmpfs_access(void *v)
387 {
388 	struct vop_access_args /* {
389 		struct vnode	*a_vp;
390 		int		a_mode;
391 		kauth_cred_t	a_cred;
392 	} */ *ap = v;
393 	vnode_t *vp = ap->a_vp;
394 	mode_t mode = ap->a_mode;
395 	kauth_cred_t cred = ap->a_cred;
396 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
397 	const bool writing = (mode & VWRITE) != 0;
398 	int error;
399 
400 	KASSERT(VOP_ISLOCKED(vp));
401 
402 	/* Possible? */
403 	switch (vp->v_type) {
404 	case VDIR:
405 	case VLNK:
406 	case VREG:
407 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
408 			return EROFS;
409 		}
410 		break;
411 	case VBLK:
412 	case VCHR:
413 	case VSOCK:
414 	case VFIFO:
415 		break;
416 	default:
417 		return EINVAL;
418 	}
419 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
420 		return EPERM;
421 	}
422 
423 	/* Permitted? */
424 	error = genfs_can_access(vp->v_type, node->tn_mode, node->tn_uid,
425 	    node->tn_gid, mode, cred);
426 
427 	return kauth_authorize_vnode(cred, kauth_mode_to_action(mode), vp,
428 	    NULL, error);
429 }
430 
431 int
432 tmpfs_getattr(void *v)
433 {
434 	struct vop_getattr_args /* {
435 		struct vnode	*a_vp;
436 		struct vattr	*a_vap;
437 		kauth_cred_t	a_cred;
438 	} */ *ap = v;
439 	vnode_t *vp = ap->a_vp;
440 	struct vattr *vap = ap->a_vap;
441 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
442 
443 	vattr_null(vap);
444 
445 	tmpfs_update(vp, NULL, NULL, NULL, 0);
446 
447 	vap->va_type = vp->v_type;
448 	vap->va_mode = node->tn_mode;
449 	vap->va_nlink = node->tn_links;
450 	vap->va_uid = node->tn_uid;
451 	vap->va_gid = node->tn_gid;
452 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
453 	vap->va_fileid = node->tn_id;
454 	vap->va_size = node->tn_size;
455 	vap->va_blocksize = PAGE_SIZE;
456 	vap->va_atime = node->tn_atime;
457 	vap->va_mtime = node->tn_mtime;
458 	vap->va_ctime = node->tn_ctime;
459 	vap->va_birthtime = node->tn_birthtime;
460 	vap->va_gen = TMPFS_NODE_GEN(node);
461 	vap->va_flags = node->tn_flags;
462 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
463 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
464 	vap->va_bytes = round_page(node->tn_size);
465 	vap->va_filerev = VNOVAL;
466 	vap->va_vaflags = 0;
467 	vap->va_spare = VNOVAL; /* XXX */
468 
469 	return 0;
470 }
471 
472 #define GOODTIME(tv)	((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL)
473 /* XXX Should this operation be atomic?  I think it should, but code in
474  * XXX other places (e.g., ufs) doesn't seem to be... */
475 int
476 tmpfs_setattr(void *v)
477 {
478 	struct vop_setattr_args /* {
479 		struct vnode	*a_vp;
480 		struct vattr	*a_vap;
481 		kauth_cred_t	a_cred;
482 	} */ *ap = v;
483 	vnode_t *vp = ap->a_vp;
484 	struct vattr *vap = ap->a_vap;
485 	kauth_cred_t cred = ap->a_cred;
486 	lwp_t *l = curlwp;
487 	int error = 0;
488 
489 	KASSERT(VOP_ISLOCKED(vp));
490 
491 	/* Abort if any unsettable attribute is given. */
492 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
493 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
494 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
495 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
496 	    vap->va_bytes != VNOVAL) {
497 		return EINVAL;
498 	}
499 	if (error == 0 && (vap->va_flags != VNOVAL))
500 		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
501 
502 	if (error == 0 && (vap->va_size != VNOVAL))
503 		error = tmpfs_chsize(vp, vap->va_size, cred, l);
504 
505 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
506 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
507 
508 	if (error == 0 && (vap->va_mode != VNOVAL))
509 		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
510 
511 	if (error == 0 && (GOODTIME(&vap->va_atime) || GOODTIME(&vap->va_mtime)
512 	    || GOODTIME(&vap->va_birthtime))) {
513 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
514 		    &vap->va_birthtime, vap->va_vaflags, cred, l);
515 		if (error == 0)
516 			return 0;
517 	}
518 	tmpfs_update(vp, NULL, NULL, NULL, 0);
519 	return error;
520 }
521 
522 int
523 tmpfs_read(void *v)
524 {
525 	struct vop_read_args /* {
526 		struct vnode *a_vp;
527 		struct uio *a_uio;
528 		int a_ioflag;
529 		kauth_cred_t a_cred;
530 	} */ *ap = v;
531 	vnode_t *vp = ap->a_vp;
532 	struct uio *uio = ap->a_uio;
533 	const int ioflag = ap->a_ioflag;
534 	tmpfs_node_t *node;
535 	struct uvm_object *uobj;
536 	int error;
537 
538 	KASSERT(VOP_ISLOCKED(vp));
539 
540 	if (vp->v_type != VREG) {
541 		return EISDIR;
542 	}
543 	if (uio->uio_offset < 0) {
544 		return EINVAL;
545 	}
546 
547 	node = VP_TO_TMPFS_NODE(vp);
548 	node->tn_status |= TMPFS_NODE_ACCESSED;
549 	uobj = node->tn_spec.tn_reg.tn_aobj;
550 	error = 0;
551 
552 	while (error == 0 && uio->uio_resid > 0) {
553 		vsize_t len;
554 
555 		if (node->tn_size <= uio->uio_offset) {
556 			break;
557 		}
558 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
559 		if (len == 0) {
560 			break;
561 		}
562 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
563 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
564 	}
565 	return error;
566 }
567 
568 int
569 tmpfs_write(void *v)
570 {
571 	struct vop_write_args /* {
572 		struct vnode	*a_vp;
573 		struct uio	*a_uio;
574 		int		a_ioflag;
575 		kauth_cred_t	a_cred;
576 	} */ *ap = v;
577 	vnode_t *vp = ap->a_vp;
578 	struct uio *uio = ap->a_uio;
579 	const int ioflag = ap->a_ioflag;
580 	tmpfs_node_t *node;
581 	struct uvm_object *uobj;
582 	off_t oldsize;
583 	bool extended;
584 	int error;
585 
586 	KASSERT(VOP_ISLOCKED(vp));
587 
588 	node = VP_TO_TMPFS_NODE(vp);
589 	oldsize = node->tn_size;
590 
591 	if (uio->uio_offset < 0 || vp->v_type != VREG) {
592 		error = EINVAL;
593 		goto out;
594 	}
595 	if (uio->uio_resid == 0) {
596 		error = 0;
597 		goto out;
598 	}
599 	if (ioflag & IO_APPEND) {
600 		uio->uio_offset = node->tn_size;
601 	}
602 
603 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
604 	if (extended) {
605 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
606 		if (error)
607 			goto out;
608 	}
609 
610 	uobj = node->tn_spec.tn_reg.tn_aobj;
611 	error = 0;
612 	while (error == 0 && uio->uio_resid > 0) {
613 		vsize_t len;
614 
615 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
616 		if (len == 0) {
617 			break;
618 		}
619 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
620 		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
621 	}
622 	if (error) {
623 		(void)tmpfs_reg_resize(vp, oldsize);
624 	}
625 
626 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
627 	    (extended ? TMPFS_NODE_CHANGED : 0);
628 	VN_KNOTE(vp, NOTE_WRITE);
629 out:
630 	if (error) {
631 		KASSERT(oldsize == node->tn_size);
632 	} else {
633 		KASSERT(uio->uio_resid == 0);
634 	}
635 	return error;
636 }
637 
638 int
639 tmpfs_fsync(void *v)
640 {
641 	struct vop_fsync_args /* {
642 		struct vnode *a_vp;
643 		kauth_cred_t a_cred;
644 		int a_flags;
645 		off_t a_offlo;
646 		off_t a_offhi;
647 		struct lwp *a_l;
648 	} */ *ap = v;
649 	vnode_t *vp = ap->a_vp;
650 
651 	/* Nothing to do.  Just update. */
652 	KASSERT(VOP_ISLOCKED(vp));
653 	tmpfs_update(vp, NULL, NULL, NULL, 0);
654 	return 0;
655 }
656 
657 /*
658  * tmpfs_remove: unlink a file.
659  *
660  * => Both directory (dvp) and file (vp) are locked.
661  * => We unlock and drop the reference on both.
662  */
663 int
664 tmpfs_remove(void *v)
665 {
666 	struct vop_remove_args /* {
667 		struct vnode *a_dvp;
668 		struct vnode *a_vp;
669 		struct componentname *a_cnp;
670 	} */ *ap = v;
671 	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
672 	tmpfs_node_t *node;
673 	tmpfs_dirent_t *de;
674 	int error;
675 
676 	KASSERT(VOP_ISLOCKED(dvp));
677 	KASSERT(VOP_ISLOCKED(vp));
678 
679 	if (vp->v_type == VDIR) {
680 		error = EPERM;
681 		goto out;
682 	}
683 	node = VP_TO_TMPFS_NODE(vp);
684 
685 	/* Files marked as immutable or append-only cannot be deleted. */
686 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
687 		error = EPERM;
688 		goto out;
689 	}
690 
691 	/* Lookup the directory entry (check the cached hint first). */
692 	de = tmpfs_dir_cached(node);
693 	if (de == NULL) {
694 		tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
695 		struct componentname *cnp = ap->a_cnp;
696 		de = tmpfs_dir_lookup(dnode, cnp);
697 	}
698 	KASSERT(de && de->td_node == node);
699 
700 	/*
701 	 * Remove the entry from the directory (drops the link count) and
702 	 * destroy it or replace it with a whiteout.
703 	 * Note: the inode referred by it will not be destroyed
704 	 * until the vnode is reclaimed/recycled.
705 	 */
706 	tmpfs_dir_detach(dvp, de);
707 	if (ap->a_cnp->cn_flags & DOWHITEOUT)
708 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
709 	else
710 		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
711 	error = 0;
712 out:
713 	/* Drop the references and unlock the vnodes. */
714 	vput(vp);
715 	if (dvp == vp) {
716 		vrele(dvp);
717 	} else {
718 		vput(dvp);
719 	}
720 	return error;
721 }
722 
723 /*
724  * tmpfs_link: create a hard link.
725  */
726 int
727 tmpfs_link(void *v)
728 {
729 	struct vop_link_args /* {
730 		struct vnode *a_dvp;
731 		struct vnode *a_vp;
732 		struct componentname *a_cnp;
733 	} */ *ap = v;
734 	vnode_t *dvp = ap->a_dvp;
735 	vnode_t *vp = ap->a_vp;
736 	struct componentname *cnp = ap->a_cnp;
737 	tmpfs_node_t *dnode, *node;
738 	tmpfs_dirent_t *de;
739 	int error;
740 
741 	KASSERT(dvp != vp);
742 	KASSERT(VOP_ISLOCKED(dvp));
743 	KASSERT(vp->v_type != VDIR);
744 	KASSERT(dvp->v_mount == vp->v_mount);
745 
746 	dnode = VP_TO_TMPFS_DIR(dvp);
747 	node = VP_TO_TMPFS_NODE(vp);
748 
749 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
750 
751 	/* Check for maximum number of links limit. */
752 	if (node->tn_links == LINK_MAX) {
753 		error = EMLINK;
754 		goto out;
755 	}
756 	KASSERT(node->tn_links < LINK_MAX);
757 
758 	/* We cannot create links of files marked immutable or append-only. */
759 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
760 		error = EPERM;
761 		goto out;
762 	}
763 
764 	/* Allocate a new directory entry to represent the inode. */
765 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
766 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
767 	if (error) {
768 		goto out;
769 	}
770 
771 	/*
772 	 * Insert the entry into the directory.
773 	 * It will increase the inode link count.
774 	 */
775 	tmpfs_dir_attach(dvp, de, node);
776 
777 	/* Update the timestamps and trigger the event. */
778 	if (node->tn_vnode) {
779 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
780 	}
781 	node->tn_status |= TMPFS_NODE_CHANGED;
782 	tmpfs_update(vp, NULL, NULL, NULL, 0);
783 	error = 0;
784 out:
785 	VOP_UNLOCK(vp);
786 	vput(dvp);
787 	return error;
788 }
789 
790 /*
791  * tmpfs_rename: rename routine, the hairiest system call, with the
792  * insane API.
793  *
794  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
795  * and tvp (to-leaf), if exists (NULL if not).
796  *
797  * => Caller holds a reference on fdvp and fvp, they are unlocked.
798  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
799  *
800  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
801  *    to release the references and unlock them (or destroy).
802  */
803 
804 /*
805  * First, some forward declarations of subroutines.
806  */
807 
808 static int tmpfs_sane_rename(struct vnode *, struct componentname *,
809     struct vnode *, struct componentname *, kauth_cred_t, bool);
810 static int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
811     kauth_cred_t,
812     struct vnode *, struct tmpfs_node *, struct componentname *,
813     struct tmpfs_dirent **, struct vnode **,
814     struct vnode *, struct tmpfs_node *, struct componentname *,
815     struct tmpfs_dirent **, struct vnode **);
816 static int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
817     kauth_cred_t,
818     struct vnode *, struct tmpfs_node *,
819     struct componentname *, struct tmpfs_dirent **, struct vnode **,
820     struct componentname *, struct tmpfs_dirent **, struct vnode **);
821 static int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
822     kauth_cred_t,
823     struct vnode *, struct tmpfs_node *, struct componentname *,
824     struct tmpfs_dirent **, struct vnode **,
825     struct vnode *, struct tmpfs_node *, struct componentname *,
826     struct tmpfs_dirent **, struct vnode **);
827 static void tmpfs_rename_exit(struct tmpfs_mount *,
828     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
829 static int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
830 static int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
831     struct tmpfs_node **);
832 static int tmpfs_rename_lock(struct mount *, kauth_cred_t, int,
833     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
834     struct tmpfs_dirent **, struct vnode **,
835     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
836     struct tmpfs_dirent **, struct vnode **);
837 static void tmpfs_rename_attachdetach(struct tmpfs_mount *,
838     struct vnode *, struct tmpfs_dirent *, struct vnode *,
839     struct vnode *, struct tmpfs_dirent *, struct vnode *);
840 static int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
841     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, kauth_cred_t);
842 static int tmpfs_rename_check_possible(struct tmpfs_node *,
843     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
844 static int tmpfs_rename_check_permitted(kauth_cred_t,
845     struct tmpfs_node *, struct tmpfs_node *,
846     struct tmpfs_node *, struct tmpfs_node *);
847 static int tmpfs_remove_check_possible(struct tmpfs_node *,
848     struct tmpfs_node *);
849 static int tmpfs_remove_check_permitted(kauth_cred_t,
850     struct tmpfs_node *, struct tmpfs_node *);
851 static int tmpfs_check_sticky(kauth_cred_t,
852     struct tmpfs_node *, struct tmpfs_node *);
853 
854 int
855 tmpfs_rename(void *v)
856 {
857 	struct vop_rename_args  /* {
858 		struct vnode		*a_fdvp;
859 		struct vnode		*a_fvp;
860 		struct componentname	*a_fcnp;
861 		struct vnode		*a_tdvp;
862 		struct vnode		*a_tvp;
863 		struct componentname	*a_tcnp;
864 	} */ *ap = v;
865 	struct vnode *fdvp = ap->a_fdvp;
866 	struct vnode *fvp = ap->a_fvp;
867 	struct componentname *fcnp = ap->a_fcnp;
868 	struct vnode *tdvp = ap->a_tdvp;
869 	struct vnode *tvp = ap->a_tvp;
870 	struct componentname *tcnp = ap->a_tcnp;
871 	kauth_cred_t cred;
872 	int error;
873 
874 	KASSERT(fdvp != NULL);
875 	KASSERT(fvp != NULL);
876 	KASSERT(fcnp != NULL);
877 	KASSERT(fcnp->cn_nameptr != NULL);
878 	KASSERT(tdvp != NULL);
879 	KASSERT(tcnp != NULL);
880 	KASSERT(fcnp->cn_nameptr != NULL);
881 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
882 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
883 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
884 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
885 	KASSERT(fdvp->v_type == VDIR);
886 	KASSERT(tdvp->v_type == VDIR);
887 
888 	cred = fcnp->cn_cred;
889 	KASSERT(tcnp->cn_cred == cred);
890 
891 	/*
892 	 * Sanitize our world from the VFS insanity.  Unlock the target
893 	 * directory and node, which are locked.  Release the children,
894 	 * which are referenced.  Check for rename("x", "y/."), which
895 	 * it is our responsibility to reject, not the caller's.  (But
896 	 * the caller does reject rename("x/.", "y").  Go figure.)
897 	 */
898 
899 	VOP_UNLOCK(tdvp);
900 	if ((tvp != NULL) && (tvp != tdvp))
901 		VOP_UNLOCK(tvp);
902 
903 	vrele(fvp);
904 	if (tvp != NULL)
905 		vrele(tvp);
906 
907 	if (tvp == tdvp) {
908 		error = EINVAL;
909 		goto out;
910 	}
911 
912 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, false);
913 
914 out:	/*
915 	 * All done, whether with success or failure.  Release the
916 	 * directory nodes now, as the caller expects from the VFS
917 	 * protocol.
918 	 */
919 	vrele(fdvp);
920 	vrele(tdvp);
921 
922 	return error;
923 }
924 
925 /*
926  * tmpfs_sane_rename: rename routine, the hairiest system call, with
927  * the sane API.
928  *
929  * Arguments:
930  *
931  * . fdvp (from directory vnode),
932  * . fcnp (from component name),
933  * . tdvp (to directory vnode), and
934  * . tcnp (to component name).
935  *
936  * fdvp and tdvp must be referenced and unlocked.
937  */
938 static int
939 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
940     struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t cred,
941     bool posixly_correct)
942 {
943 	struct mount *mount;
944 	struct tmpfs_mount *tmpfs;
945 	struct tmpfs_node *fdnode, *tdnode;
946 	struct tmpfs_dirent *fde, *tde;
947 	struct vnode *fvp, *tvp;
948 	char *newname;
949 	int error;
950 
951 	KASSERT(fdvp != NULL);
952 	KASSERT(fcnp != NULL);
953 	KASSERT(tdvp != NULL);
954 	KASSERT(tcnp != NULL);
955 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
956 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
957 	KASSERT(fdvp->v_type == VDIR);
958 	KASSERT(tdvp->v_type == VDIR);
959 	KASSERT(fdvp->v_mount == tdvp->v_mount);
960 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
961 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
962 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
963 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
964 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
965 	    (fcnp->cn_nameptr[1] != '.'));
966 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
967 	    (tcnp->cn_nameptr[1] != '.'));
968 
969 	/*
970 	 * Pull out the tmpfs data structures.
971 	 */
972 	fdnode = VP_TO_TMPFS_NODE(fdvp);
973 	tdnode = VP_TO_TMPFS_NODE(tdvp);
974 	KASSERT(fdnode != NULL);
975 	KASSERT(tdnode != NULL);
976 	KASSERT(fdnode->tn_vnode == fdvp);
977 	KASSERT(tdnode->tn_vnode == tdvp);
978 	KASSERT(fdnode->tn_type == VDIR);
979 	KASSERT(tdnode->tn_type == VDIR);
980 
981 	mount = fdvp->v_mount;
982 	KASSERT(mount != NULL);
983 	KASSERT(mount == tdvp->v_mount);
984 	/* XXX How can we be sure this stays true?  (Not that you're
985 	 * likely to mount a tmpfs read-only...)  */
986 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
987 	tmpfs = VFS_TO_TMPFS(mount);
988 	KASSERT(tmpfs != NULL);
989 
990 	/*
991 	 * Decide whether we need a new name, and allocate memory for
992 	 * it if so.  Do this before locking anything or taking
993 	 * destructive actions so that we can back out safely and sleep
994 	 * safely.  XXX Is sleeping an issue here?  Can this just be
995 	 * moved into tmpfs_rename_attachdetach?
996 	 */
997 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
998 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
999 		if (newname == NULL) {
1000 			error = ENOSPC;
1001 			goto out_unlocked;
1002 		}
1003 	} else {
1004 		newname = NULL;
1005 	}
1006 
1007 	/*
1008 	 * Lock and look up everything.  GCC is not very clever.
1009 	 */
1010 	fde = tde = NULL;
1011 	fvp = tvp = NULL;
1012 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1013 	    fdvp, fdnode, fcnp, &fde, &fvp,
1014 	    tdvp, tdnode, tcnp, &tde, &tvp);
1015 	if (error)
1016 		goto out_unlocked;
1017 
1018 	/*
1019 	 * Check that everything is locked and looks right.
1020 	 */
1021 	KASSERT(fde != NULL);
1022 	KASSERT(fvp != NULL);
1023 	KASSERT(fde->td_node != NULL);
1024 	KASSERT(fde->td_node->tn_vnode == fvp);
1025 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1026 	KASSERT((tde == NULL) == (tvp == NULL));
1027 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1028 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1029 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1030 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1031 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1032 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1033 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1034 
1035 	/*
1036 	 * If the source and destination are the same object, we need
1037 	 * only at most delete the source entry.
1038 	 */
1039 	if (fvp == tvp) {
1040 		KASSERT(tvp != NULL);
1041 		if (fde->td_node->tn_type == VDIR) {
1042 			/* XXX How can this possibly happen?  */
1043 			error = EINVAL;
1044 			goto out_locked;
1045 		}
1046 		if (!posixly_correct && (fde != tde)) {
1047 			/* XXX Doesn't work because of locking.
1048 			 * error = VOP_REMOVE(fdvp, fvp);
1049 			 */
1050 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1051 			    cred);
1052 			if (error)
1053 				goto out_locked;
1054 		}
1055 		goto success;
1056 	}
1057 	KASSERT(fde != tde);
1058 	KASSERT(fvp != tvp);
1059 
1060 	/*
1061 	 * If the target exists, refuse to rename a directory over a
1062 	 * non-directory or vice versa, or to clobber a non-empty
1063 	 * directory.
1064 	 */
1065 	if (tvp != NULL) {
1066 		KASSERT(tde != NULL);
1067 		KASSERT(tde->td_node != NULL);
1068 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1069 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1070 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1071 			error = ENOTDIR;
1072 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1073 			error = EISDIR;
1074 		else
1075 			error = 0;
1076 		if (error)
1077 			goto out_locked;
1078 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1079 	}
1080 
1081 	/*
1082 	 * Authorize the rename.
1083 	 */
1084 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1085 	    tdnode, (tde? tde->td_node : NULL));
1086 	if (error)
1087 		goto out_locked;
1088 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1089 	    tdnode, (tde? tde->td_node : NULL));
1090 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp,
1091 	    error);
1092 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp,
1093 	    error);
1094 	if (error)
1095 		goto out_locked;
1096 
1097 	/*
1098 	 * Everything is hunky-dory.  Shuffle the directory entries.
1099 	 */
1100 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1101 
1102 	/*
1103 	 * Update the directory entry's name necessary, and flag
1104 	 * metadata updates.  A memory allocation failure here is not
1105 	 * OK because we've already committed some changes that we
1106 	 * can't back out at this point, and we have things locked so
1107 	 * we can't sleep, hence the early allocation above.
1108 	 */
1109 	if (newname != NULL) {
1110 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1111 
1112 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1113 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1114 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1115 		/* Commit newname and don't free it on the way out.  */
1116 		fde->td_name = newname;
1117 		newname = NULL;
1118 
1119 		fde->td_node->tn_status |= TMPFS_NODE_CHANGED;
1120 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1121 	}
1122 
1123 success:
1124 	VN_KNOTE(fvp, NOTE_RENAME);
1125 	error = 0;
1126 
1127 out_locked:
1128 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1129 
1130 out_unlocked:
1131 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1132 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1133 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1134 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1135 
1136 	if (newname != NULL)
1137 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1138 
1139 	return error;
1140 }
1141 
1142 /*
1143  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1144  * and the associated vnode in fvp_ret; fail if not found.  Look up
1145  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1146  * associated vnode in tvp_ret; store null instead if not found.  Fail
1147  * if anything has been mounted on any of the nodes involved.
1148  *
1149  * fdvp and tdvp must be referenced.
1150  *
1151  * On entry, nothing is locked.
1152  *
1153  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1154  * nonnull, are referenced.  The only pairs of vnodes that may be
1155  * identical are {fdvp, tdvp} and {fvp, tvp}.
1156  *
1157  * On failure, everything remains as was.
1158  *
1159  * Locking everything including the source and target nodes is
1160  * necessary to make sure that, e.g., link count updates are OK.  The
1161  * locking order is, in general, ancestor-first, matching the order you
1162  * need to use to look up a descendant anyway.
1163  */
1164 static int
1165 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1166     kauth_cred_t cred,
1167     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1168     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1169     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1170     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1171 {
1172 	int error;
1173 
1174 	KASSERT(mount != NULL);
1175 	KASSERT(tmpfs != NULL);
1176 	KASSERT(fdvp != NULL);
1177 	KASSERT(fdnode != NULL);
1178 	KASSERT(fcnp != NULL);
1179 	KASSERT(fde_ret != NULL);
1180 	KASSERT(fvp_ret != NULL);
1181 	KASSERT(tdvp != NULL);
1182 	KASSERT(tdnode != NULL);
1183 	KASSERT(tcnp != NULL);
1184 	KASSERT(tde_ret != NULL);
1185 	KASSERT(tvp_ret != NULL);
1186 	KASSERT(fdnode->tn_vnode == fdvp);
1187 	KASSERT(tdnode->tn_vnode == tdvp);
1188 	KASSERT(fdnode->tn_type == VDIR);
1189 	KASSERT(tdnode->tn_type == VDIR);
1190 
1191 	if (fdvp == tdvp) {
1192 		KASSERT(fdnode == tdnode);
1193 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1194 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1195 	} else {
1196 		KASSERT(fdnode != tdnode);
1197 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1198 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1199 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1200 	}
1201 
1202 	if (error)
1203 		return error;
1204 
1205 	KASSERT(*fde_ret != NULL);
1206 	KASSERT(*fvp_ret != NULL);
1207 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1208 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1209 	KASSERT((*tde_ret == NULL) ||
1210 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1211 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1212 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1213 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1214 	KASSERT((*tvp_ret == NULL) ||
1215 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1216 	KASSERT(*fvp_ret != fdvp);
1217 	KASSERT(*fvp_ret != tdvp);
1218 	KASSERT(*tvp_ret != fdvp);
1219 	KASSERT(*tvp_ret != tdvp);
1220 	return 0;
1221 }
1222 
1223 /*
1224  * Lock and look up with a common source/target directory.
1225  */
1226 static int
1227 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1228     kauth_cred_t cred,
1229     struct vnode *dvp, struct tmpfs_node *dnode,
1230     struct componentname *fcnp,
1231     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1232     struct componentname *tcnp,
1233     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1234 {
1235 	struct tmpfs_dirent *fde, *tde;
1236 	struct vnode *fvp, *tvp;
1237 	int error;
1238 
1239 	error = tmpfs_rename_lock_directory(dvp, dnode);
1240 	if (error)
1241 		goto fail0;
1242 
1243 	/* Did we lose a race with mount?  */
1244 	if (dvp->v_mountedhere != NULL) {
1245 		error = EBUSY;
1246 		goto fail1;
1247 	}
1248 
1249 	/* Make sure the caller may read the directory.  */
1250 	error = VOP_ACCESS(dvp, VEXEC, cred);
1251 	if (error)
1252 		goto fail1;
1253 
1254 	/*
1255 	 * The order in which we lock the source and target nodes is
1256 	 * irrelevant because there can only be one rename on this
1257 	 * directory in flight at a time, and we have it locked.
1258 	 */
1259 
1260 	fde = tmpfs_dir_lookup(dnode, fcnp);
1261 	if (fde == NULL) {
1262 		error = ENOENT;
1263 		goto fail1;
1264 	}
1265 
1266 	KASSERT(fde->td_node != NULL);
1267 	/* We ruled out `.' earlier.  */
1268 	KASSERT(fde->td_node != dnode);
1269 	/* We ruled out `..' earlier.  */
1270 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1271 	mutex_enter(&fde->td_node->tn_vlock);
1272 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1273 	if (error)
1274 		goto fail1;
1275 	KASSERT(fvp != NULL);
1276 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1277 	KASSERT(fvp != dvp);
1278 	KASSERT(fvp->v_mount == mount);
1279 
1280 	/* Refuse to rename a mount point.  */
1281 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1282 		error = EBUSY;
1283 		goto fail2;
1284 	}
1285 
1286 	tde = tmpfs_dir_lookup(dnode, tcnp);
1287 	if (tde == NULL) {
1288 		tvp = NULL;
1289 	} else {
1290 		KASSERT(tde->td_node != NULL);
1291 		/* We ruled out `.' earlier.  */
1292 		KASSERT(tde->td_node != dnode);
1293 		/* We ruled out `..' earlier.  */
1294 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1295 		if (tde->td_node != fde->td_node) {
1296 			mutex_enter(&tde->td_node->tn_vlock);
1297 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1298 			if (error)
1299 				goto fail2;
1300 			KASSERT(tvp->v_mount == mount);
1301 			/* Refuse to rename over a mount point.  */
1302 			if ((tvp->v_type == VDIR) &&
1303 			    (tvp->v_mountedhere != NULL)) {
1304 				error = EBUSY;
1305 				goto fail3;
1306 			}
1307 		} else {
1308 			tvp = fvp;
1309 			vref(tvp);
1310 		}
1311 		KASSERT(tvp != NULL);
1312 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1313 	}
1314 	KASSERT(tvp != dvp);
1315 
1316 	*fde_ret = fde;
1317 	*fvp_ret = fvp;
1318 	*tde_ret = tde;
1319 	*tvp_ret = tvp;
1320 	return 0;
1321 
1322 fail3:	if (tvp != NULL) {
1323 		if (tvp != fvp)
1324 			vput(tvp);
1325 		else
1326 			vrele(tvp);
1327 	}
1328 
1329 fail2:	vput(fvp);
1330 fail1:	VOP_UNLOCK(dvp);
1331 fail0:	return error;
1332 }
1333 
1334 /*
1335  * Lock and look up with separate source and target directories.
1336  */
1337 static int
1338 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1339     kauth_cred_t cred,
1340     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1341     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1342     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1343     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1344 {
1345 	struct tmpfs_node *intermediate_node;
1346 	struct tmpfs_dirent *fde, *tde;
1347 	struct vnode *fvp, *tvp;
1348 	int error;
1349 
1350 	KASSERT(fdvp != tdvp);
1351 	KASSERT(fdnode != tdnode);
1352 
1353 #if 0				/* XXX */
1354 	mutex_enter(&tmpfs->tm_rename_lock);
1355 #endif
1356 
1357 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1358 	if (error)
1359 		goto fail;
1360 
1361 	/*
1362 	 * intermediate_node == NULL means fdnode is not an ancestor of
1363 	 * tdnode.
1364 	 */
1365 	if (intermediate_node == NULL)
1366 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1367 		    tdvp, tdnode, tcnp, true, &tde, &tvp,
1368 		    fdvp, fdnode, fcnp, false, &fde, &fvp);
1369 	else
1370 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1371 		    fdvp, fdnode, fcnp, false, &fde, &fvp,
1372 		    tdvp, tdnode, tcnp, true, &tde, &tvp);
1373 	if (error)
1374 		goto fail;
1375 
1376 	KASSERT(fde != NULL);
1377 	KASSERT(fde->td_node != NULL);
1378 
1379 	/*
1380 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1381 	 */
1382 	if (fde->td_node == intermediate_node) {
1383 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1384 		return EINVAL;
1385 	}
1386 
1387 	*fde_ret = fde;
1388 	*fvp_ret = fvp;
1389 	*tde_ret = tde;
1390 	*tvp_ret = tvp;
1391 	return 0;
1392 
1393 fail:
1394 #if 0				/* XXX */
1395 	mutex_exit(&tmpfs->tm_rename_lock);
1396 #endif
1397 	return error;
1398 }
1399 
1400 /*
1401  * Unlock everything we locked for rename.
1402  *
1403  * fdvp and tdvp must be referenced.
1404  *
1405  * On entry, everything is locked, and fvp and tvp referenced.
1406  *
1407  * On exit, everything is unlocked, and fvp and tvp are released.
1408  */
1409 static void
1410 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1411     struct vnode *fdvp, struct vnode *fvp,
1412     struct vnode *tdvp, struct vnode *tvp)
1413 {
1414 
1415 	KASSERT(tmpfs != NULL);
1416 	KASSERT(fdvp != NULL);
1417 	KASSERT(fvp != NULL);
1418 	KASSERT(fdvp != fvp);
1419 	KASSERT(fdvp != tvp);
1420 	KASSERT(tdvp != tvp);
1421 	KASSERT(tdvp != fvp);
1422 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1423 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1424 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1425 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1426 
1427 	if (tvp != NULL) {
1428 		if (tvp != fvp)
1429 			vput(tvp);
1430 		else
1431 			vrele(tvp);
1432 	}
1433 	VOP_UNLOCK(tdvp);
1434 	vput(fvp);
1435 	if (fdvp != tdvp)
1436 		VOP_UNLOCK(fdvp);
1437 
1438 #if 0				/* XXX */
1439 	if (fdvp != tdvp)
1440 		mutex_exit(&tmpfs->tm_rename_lock);
1441 #endif
1442 }
1443 
1444 /*
1445  * Lock a directory, but fail if it has been rmdir'd.
1446  *
1447  * vp must be referenced.
1448  */
1449 static int
1450 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1451 {
1452 
1453 	KASSERT(vp != NULL);
1454 	KASSERT(node != NULL);
1455 	KASSERT(node->tn_vnode == vp);
1456 	KASSERT(node->tn_type == VDIR);
1457 
1458 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1459 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1460 		VOP_UNLOCK(vp);
1461 		return ENOENT;
1462 	}
1463 
1464 	return 0;
1465 }
1466 
1467 /*
1468  * Analyze the genealogy of the source and target nodes.
1469  *
1470  * On success, stores in *intermediate_node_ret either the child of
1471  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1472  * descendant of fdnode at all.
1473  *
1474  * fdnode and tdnode must be unlocked and referenced.  The file
1475  * system's rename lock must also be held, to exclude concurrent
1476  * changes to the file system's genealogy other than rmdir.
1477  *
1478  * XXX This causes an extra lock/unlock of tdnode in the case when
1479  * we're just about to lock it again before locking anything else.
1480  * However, changing that requires reorganizing the code to make it
1481  * even more horrifically obscure.
1482  */
1483 static int
1484 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1485     struct tmpfs_node **intermediate_node_ret)
1486 {
1487 	struct tmpfs_node *node = tdnode, *parent;
1488 	int error;
1489 
1490 	KASSERT(fdnode != NULL);
1491 	KASSERT(tdnode != NULL);
1492 	KASSERT(fdnode != tdnode);
1493 	KASSERT(intermediate_node_ret != NULL);
1494 
1495 	KASSERT(fdnode->tn_vnode != NULL);
1496 	KASSERT(tdnode->tn_vnode != NULL);
1497 	KASSERT(fdnode->tn_type == VDIR);
1498 	KASSERT(tdnode->tn_type == VDIR);
1499 
1500 	/*
1501 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1502 	 * from deleting it -- or any ancestor -- at an inopportune
1503 	 * moment.
1504 	 */
1505 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1506 	if (error)
1507 		return error;
1508 
1509 	for (;;) {
1510 		parent = node->tn_spec.tn_dir.tn_parent;
1511 		KASSERT(parent != NULL);
1512 		KASSERT(parent->tn_type == VDIR);
1513 
1514 		/* Did we hit the root without finding fdnode?  */
1515 		if (parent == node) {
1516 			*intermediate_node_ret = NULL;
1517 			break;
1518 		}
1519 
1520 		/* Did we find that fdnode is an ancestor?  */
1521 		if (parent == fdnode) {
1522 			*intermediate_node_ret = node;
1523 			break;
1524 		}
1525 
1526 		/* Neither -- keep ascending the family tree.  */
1527 		node = parent;
1528 	}
1529 
1530 	VOP_UNLOCK(tdnode->tn_vnode);
1531 	return 0;
1532 }
1533 
1534 /*
1535  * Lock directories a and b, which must be distinct, and look up and
1536  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1537  * an ancestor of directory a, although directory a may be an ancestor
1538  * of directory b.  Fail with overlap_error if node a is directory b.
1539  * Neither componentname may be `.' or `..'.
1540  *
1541  * a_dvp and b_dvp must be referenced.
1542  *
1543  * On entry, a_dvp and b_dvp are unlocked.
1544  *
1545  * On success,
1546  * . a_dvp and b_dvp are locked,
1547  * . *a_dirent_ret is filled with a directory entry whose node is
1548  *     locked and referenced,
1549  * . *b_vp_ret is filled with the corresponding vnode,
1550  * . *b_dirent_ret is filled either with null or with a directory entry
1551  *     whose node is locked and referenced,
1552  * . *b_vp is filled either with null or with the corresponding vnode,
1553  *     and
1554  * . the only pair of vnodes that may be identical is a_vp and b_vp.
1555  *
1556  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
1557  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
1558  */
1559 static int
1560 tmpfs_rename_lock(struct mount *mount, kauth_cred_t cred, int overlap_error,
1561     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
1562     struct componentname *a_cnp, bool a_missing_ok,
1563     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
1564     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
1565     struct componentname *b_cnp, bool b_missing_ok,
1566     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
1567 {
1568 	struct tmpfs_dirent *a_dirent, *b_dirent;
1569 	struct vnode *a_vp, *b_vp;
1570 	int error;
1571 
1572 	KASSERT(a_dvp != NULL);
1573 	KASSERT(a_dnode != NULL);
1574 	KASSERT(a_cnp != NULL);
1575 	KASSERT(a_dirent_ret != NULL);
1576 	KASSERT(a_vp_ret != NULL);
1577 	KASSERT(b_dvp != NULL);
1578 	KASSERT(b_dnode != NULL);
1579 	KASSERT(b_cnp != NULL);
1580 	KASSERT(b_dirent_ret != NULL);
1581 	KASSERT(b_vp_ret != NULL);
1582 	KASSERT(a_dvp != b_dvp);
1583 	KASSERT(a_dnode != b_dnode);
1584 	KASSERT(a_dnode->tn_vnode == a_dvp);
1585 	KASSERT(b_dnode->tn_vnode == b_dvp);
1586 	KASSERT(a_dnode->tn_type == VDIR);
1587 	KASSERT(b_dnode->tn_type == VDIR);
1588 	KASSERT(a_missing_ok != b_missing_ok);
1589 
1590 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
1591 	if (error)
1592 		goto fail0;
1593 
1594 	/* Did we lose a race with mount?  */
1595 	if (a_dvp->v_mountedhere != NULL) {
1596 		error = EBUSY;
1597 		goto fail1;
1598 	}
1599 
1600 	/* Make sure the caller may read the directory.  */
1601 	error = VOP_ACCESS(a_dvp, VEXEC, cred);
1602 	if (error)
1603 		goto fail1;
1604 
1605 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
1606 	if (a_dirent != NULL) {
1607 		KASSERT(a_dirent->td_node != NULL);
1608 		/* We ruled out `.' earlier.  */
1609 		KASSERT(a_dirent->td_node != a_dnode);
1610 		/* We ruled out `..' earlier.  */
1611 		KASSERT(a_dirent->td_node !=
1612 		    a_dnode->tn_spec.tn_dir.tn_parent);
1613 		if (a_dirent->td_node == b_dnode) {
1614 			error = overlap_error;
1615 			goto fail1;
1616 		}
1617 		mutex_enter(&a_dirent->td_node->tn_vlock);
1618 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
1619 		if (error)
1620 			goto fail1;
1621 		KASSERT(a_vp->v_mount == mount);
1622 		/* Refuse to rename (over) a mount point.  */
1623 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
1624 			error = EBUSY;
1625 			goto fail2;
1626 		}
1627 	} else if (!a_missing_ok) {
1628 		error = ENOENT;
1629 		goto fail1;
1630 	} else {
1631 		a_vp = NULL;
1632 	}
1633 	KASSERT(a_vp != a_dvp);
1634 	KASSERT(a_vp != b_dvp);
1635 
1636 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
1637 	if (error)
1638 		goto fail2;
1639 
1640 	/* Did we lose a race with mount?  */
1641 	if (b_dvp->v_mountedhere != NULL) {
1642 		error = EBUSY;
1643 		goto fail3;
1644 	}
1645 
1646 	/* Make sure the caller may read the directory.  */
1647 	error = VOP_ACCESS(b_dvp, VEXEC, cred);
1648 	if (error)
1649 		goto fail3;
1650 
1651 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
1652 	if (b_dirent != NULL) {
1653 		KASSERT(b_dirent->td_node != NULL);
1654 		/* We ruled out `.' earlier.  */
1655 		KASSERT(b_dirent->td_node != b_dnode);
1656 		/* We ruled out `..' earlier.  */
1657 		KASSERT(b_dirent->td_node !=
1658 		    b_dnode->tn_spec.tn_dir.tn_parent);
1659 		/* b is not an ancestor of a.  */
1660 		KASSERT(b_dirent->td_node != a_dnode);
1661 		/* But the source and target nodes might be the same.  */
1662 		if ((a_dirent == NULL) ||
1663 		    (a_dirent->td_node != b_dirent->td_node)) {
1664 			mutex_enter(&b_dirent->td_node->tn_vlock);
1665 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
1666 			    &b_vp);
1667 			if (error)
1668 				goto fail3;
1669 			KASSERT(b_vp->v_mount == mount);
1670 			KASSERT(a_vp != b_vp);
1671 			/* Refuse to rename (over) a mount point.  */
1672 			if ((b_vp->v_type == VDIR) &&
1673 			    (b_vp->v_mountedhere != NULL)) {
1674 				error = EBUSY;
1675 				goto fail4;
1676 			}
1677 		} else {
1678 			b_vp = a_vp;
1679 			vref(b_vp);
1680 		}
1681 	} else if (!b_missing_ok) {
1682 		error = ENOENT;
1683 		goto fail3;
1684 	} else {
1685 		b_vp = NULL;
1686 	}
1687 	KASSERT(b_vp != a_dvp);
1688 	KASSERT(b_vp != b_dvp);
1689 
1690 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
1691 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
1692 	KASSERT(a_missing_ok || (a_dirent != NULL));
1693 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
1694 	KASSERT(b_missing_ok || (b_dirent != NULL));
1695 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
1696 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
1697 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
1698 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
1699 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
1700 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
1701 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
1702 
1703 	*a_dirent_ret = a_dirent;
1704 	*b_dirent_ret = b_dirent;
1705 	*a_vp_ret = a_vp;
1706 	*b_vp_ret = b_vp;
1707 	return 0;
1708 
1709 fail4:	if (b_vp != NULL) {
1710 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
1711 		if (b_vp != a_vp)
1712 			vput(b_vp);
1713 		else
1714 			vrele(a_vp);
1715 	}
1716 
1717 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
1718 	VOP_UNLOCK(b_dvp);
1719 
1720 fail2:	if (a_vp != NULL) {
1721 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
1722 		vput(a_vp);
1723 	}
1724 
1725 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
1726 	VOP_UNLOCK(a_dvp);
1727 
1728 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
1729 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
1730 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
1731 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
1732 	return error;
1733 }
1734 
1735 /*
1736  * Shuffle the directory entries to move fvp from the directory fdvp
1737  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
1738  * we are overwriting a target node, it is tvp, and tde is its
1739  * directory entry in tdvp.
1740  *
1741  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
1742  */
1743 static void
1744 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
1745     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
1746     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
1747 {
1748 
1749 	KASSERT(tmpfs != NULL);
1750 	KASSERT(fdvp != NULL);
1751 	KASSERT(fde != NULL);
1752 	KASSERT(fvp != NULL);
1753 	KASSERT(tdvp != NULL);
1754 	KASSERT(fde->td_node != NULL);
1755 	KASSERT(fde->td_node->tn_vnode == fvp);
1756 	KASSERT((tde == NULL) == (tvp == NULL));
1757 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1758 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1759 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1760 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1761 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1762 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1763 
1764 	/*
1765 	 * If we are moving from one directory to another, detach the
1766 	 * source entry and reattach it to the target directory.
1767 	 */
1768 	if (fdvp != tdvp) {
1769 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
1770 		struct tmpfs_node *fnode = fde->td_node;
1771 		tmpfs_dir_detach(fdvp, fde);
1772 		tmpfs_dir_attach(tdvp, fde, fnode);
1773 	} else if (tvp == NULL) {
1774 		/*
1775 		 * We are changing the directory.  tmpfs_dir_attach and
1776 		 * tmpfs_dir_detach note the events for us, but for
1777 		 * this case we don't call them, so we must note the
1778 		 * event explicitly.
1779 		 */
1780 		VN_KNOTE(fdvp, NOTE_WRITE);
1781 	}
1782 
1783 	/*
1784 	 * If we are replacing an existing target entry, delete it.
1785 	 */
1786 	if (tde != NULL) {
1787 		KASSERT(tvp != NULL);
1788 		KASSERT(tde->td_node != NULL);
1789 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1790 		if (tde->td_node->tn_type == VDIR) {
1791 			KASSERT(tde->td_node->tn_size == 0);
1792 			KASSERT(tde->td_node->tn_links == 2);
1793 			/* Decrement the extra link count for `.' so
1794 			 * the vnode will be recycled when released.  */
1795 			tde->td_node->tn_links--;
1796 		}
1797 		tmpfs_dir_detach(tdvp, tde);
1798 		tmpfs_free_dirent(tmpfs, tde);
1799 	}
1800 }
1801 
1802 /*
1803  * Remove the entry de for the non-directory vp from the directory dvp.
1804  *
1805  * Everything must be locked and referenced.
1806  */
1807 static int
1808 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
1809     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
1810     kauth_cred_t cred)
1811 {
1812 	int error;
1813 
1814 	KASSERT(tmpfs != NULL);
1815 	KASSERT(dvp != NULL);
1816 	KASSERT(dnode != NULL);
1817 	KASSERT(de != NULL);
1818 	KASSERT(vp != NULL);
1819 	KASSERT(dnode->tn_vnode == dvp);
1820 	KASSERT(de->td_node != NULL);
1821 	KASSERT(de->td_node->tn_vnode == vp);
1822 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
1823 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
1824 
1825 	error = tmpfs_remove_check_possible(dnode, de->td_node);
1826 	if (error)
1827 		return error;
1828 
1829 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
1830 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp,
1831 	    error);
1832 	if (error)
1833 		return error;
1834 
1835 	tmpfs_dir_detach(dvp, de);
1836 	tmpfs_free_dirent(tmpfs, de);
1837 
1838 	return 0;
1839 }
1840 
1841 /*
1842  * Check whether a rename is possible independent of credentials.
1843  *
1844  * Everything must be locked and referenced.
1845  */
1846 static int
1847 tmpfs_rename_check_possible(
1848     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
1849     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
1850 {
1851 
1852 	KASSERT(fdnode != NULL);
1853 	KASSERT(fnode != NULL);
1854 	KASSERT(tdnode != NULL);
1855 	KASSERT(fdnode != fnode);
1856 	KASSERT(tdnode != tnode);
1857 	KASSERT(fnode != tnode);
1858 	KASSERT(fdnode->tn_vnode != NULL);
1859 	KASSERT(fnode->tn_vnode != NULL);
1860 	KASSERT(tdnode->tn_vnode != NULL);
1861 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
1862 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
1863 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
1864 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
1865 	KASSERT((tnode == NULL) ||
1866 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
1867 
1868 	/*
1869 	 * If fdnode is immutable, we can't write to it.  If fdnode is
1870 	 * append-only, the only change we can make is to add entries
1871 	 * to it.  If fnode is immutable, we can't change the links to
1872 	 * it.  If fnode is append-only...well, this is what UFS does.
1873 	 */
1874 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
1875 		return EPERM;
1876 
1877 	/*
1878 	 * If tdnode is immutable, we can't write to it.  If tdnode is
1879 	 * append-only, we can add entries, but we can't change
1880 	 * existing entries.
1881 	 */
1882 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
1883 		return EPERM;
1884 
1885 	/*
1886 	 * If tnode is immutable, we can't replace links to it.  If
1887 	 * tnode is append-only...well, this is what UFS does.
1888 	 */
1889 	if (tnode != NULL) {
1890 		KASSERT(tnode != NULL);
1891 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
1892 			return EPERM;
1893 	}
1894 
1895 	return 0;
1896 }
1897 
1898 /*
1899  * Check whether a rename is permitted given our credentials.
1900  *
1901  * Everything must be locked and referenced.
1902  */
1903 static int
1904 tmpfs_rename_check_permitted(kauth_cred_t cred,
1905     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
1906     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
1907 {
1908 	int error;
1909 
1910 	KASSERT(fdnode != NULL);
1911 	KASSERT(fnode != NULL);
1912 	KASSERT(tdnode != NULL);
1913 	KASSERT(fdnode != fnode);
1914 	KASSERT(tdnode != tnode);
1915 	KASSERT(fnode != tnode);
1916 	KASSERT(fdnode->tn_vnode != NULL);
1917 	KASSERT(fnode->tn_vnode != NULL);
1918 	KASSERT(tdnode->tn_vnode != NULL);
1919 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
1920 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
1921 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
1922 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
1923 	KASSERT((tnode == NULL) ||
1924 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
1925 
1926 	/*
1927 	 * We need to remove or change an entry in the source directory.
1928 	 */
1929 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred);
1930 	if (error)
1931 		return error;
1932 
1933 	/*
1934 	 * If we are changing directories, then we need to write to the
1935 	 * target directory to add or change an entry.  Also, if fnode
1936 	 * is a directory, we need to write to it to change its `..'
1937 	 * entry.
1938 	 */
1939 	if (fdnode != tdnode) {
1940 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred);
1941 		if (error)
1942 			return error;
1943 		if (fnode->tn_type == VDIR) {
1944 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred);
1945 			if (error)
1946 				return error;
1947 		}
1948 	}
1949 
1950 	error = tmpfs_check_sticky(cred, fdnode, fnode);
1951 	if (error)
1952 		return error;
1953 
1954 	error = tmpfs_check_sticky(cred, tdnode, tnode);
1955 	if (error)
1956 		return error;
1957 
1958 	return 0;
1959 }
1960 
1961 /*
1962  * Check whether removing node's entry in dnode is possible independent
1963  * of credentials.
1964  *
1965  * Everything must be locked and referenced.
1966  */
1967 static int
1968 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
1969 {
1970 
1971 	KASSERT(dnode != NULL);
1972 	KASSERT(dnode->tn_vnode != NULL);
1973 	KASSERT(node != NULL);
1974 	KASSERT(dnode != node);
1975 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
1976 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
1977 
1978 	/*
1979 	 * We want to delete the entry.  If dnode is immutable, we
1980 	 * can't write to it to delete the entry.  If dnode is
1981 	 * append-only, the only change we can make is to add entries,
1982 	 * so we can't delete entries.  If node is immutable, we can't
1983 	 * change the links to it, so we can't delete the entry.  If
1984 	 * node is append-only...well, this is what UFS does.
1985 	 */
1986 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
1987 		return EPERM;
1988 
1989 	return 0;
1990 }
1991 
1992 /*
1993  * Check whether removing node's entry in dnode is permitted given our
1994  * credentials.
1995  *
1996  * Everything must be locked and referenced.
1997  */
1998 static int
1999 tmpfs_remove_check_permitted(kauth_cred_t cred,
2000     struct tmpfs_node *dnode, struct tmpfs_node *node)
2001 {
2002 	int error;
2003 
2004 	KASSERT(dnode != NULL);
2005 	KASSERT(dnode->tn_vnode != NULL);
2006 	KASSERT(node != NULL);
2007 	KASSERT(dnode != node);
2008 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2009 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2010 
2011 	/*
2012 	 * Check whether we are permitted to write to the source
2013 	 * directory in order to delete an entry from it.
2014 	 */
2015 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred);
2016 	if (error)
2017 		return error;
2018 
2019 	error = tmpfs_check_sticky(cred, dnode, node);
2020 	if (error)
2021 		return error;
2022 
2023 	return 0;
2024 }
2025 
2026 /*
2027  * Check whether we may change an entry in a sticky directory.  If the
2028  * directory is sticky, the user must own either the directory or, if
2029  * it exists, the node, in order to change the entry.
2030  *
2031  * Everything must be locked and referenced.
2032  */
2033 static int
2034 tmpfs_check_sticky(kauth_cred_t cred,
2035     struct tmpfs_node *dnode, struct tmpfs_node *node)
2036 {
2037 
2038 	KASSERT(dnode != NULL);
2039 	KASSERT(dnode->tn_vnode != NULL);
2040 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2041 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2042 	KASSERT((node == NULL) ||
2043 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2044 
2045 	if (dnode->tn_mode & S_ISTXT) {
2046 		uid_t euid = kauth_cred_geteuid(cred);
2047 		if (euid == dnode->tn_uid)
2048 			return 0;
2049 		if ((node == NULL) || (euid == node->tn_uid))
2050 			return 0;
2051 		return EPERM;
2052 	}
2053 
2054 	return 0;
2055 }
2056 
2057 int
2058 tmpfs_mkdir(void *v)
2059 {
2060 	struct vop_mkdir_args /* {
2061 		struct vnode		*a_dvp;
2062 		struct vnode		**a_vpp;
2063 		struct componentname	*a_cnp;
2064 		struct vattr		*a_vap;
2065 	} */ *ap = v;
2066 	vnode_t *dvp = ap->a_dvp;
2067 	vnode_t **vpp = ap->a_vpp;
2068 	struct componentname *cnp = ap->a_cnp;
2069 	struct vattr *vap = ap->a_vap;
2070 
2071 	KASSERT(vap->va_type == VDIR);
2072 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
2073 }
2074 
2075 int
2076 tmpfs_rmdir(void *v)
2077 {
2078 	struct vop_rmdir_args /* {
2079 		struct vnode		*a_dvp;
2080 		struct vnode		*a_vp;
2081 		struct componentname	*a_cnp;
2082 	} */ *ap = v;
2083 	vnode_t *dvp = ap->a_dvp;
2084 	vnode_t *vp = ap->a_vp;
2085 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
2086 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
2087 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
2088 	tmpfs_dirent_t *de;
2089 	int error = 0;
2090 
2091 	KASSERT(VOP_ISLOCKED(dvp));
2092 	KASSERT(VOP_ISLOCKED(vp));
2093 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
2094 
2095 	/*
2096 	 * Directories with more than two non-whiteout
2097 	 * entries ('.' and '..') cannot be removed.
2098 	 */
2099 	if (node->tn_size > 0) {
2100 		KASSERT(error == 0);
2101 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
2102 			if (de->td_node != TMPFS_NODE_WHITEOUT) {
2103 				error = ENOTEMPTY;
2104 				break;
2105 			}
2106 		}
2107 		if (error)
2108 			goto out;
2109 	}
2110 
2111 	/* Lookup the directory entry (check the cached hint first). */
2112 	de = tmpfs_dir_cached(node);
2113 	if (de == NULL) {
2114 		struct componentname *cnp = ap->a_cnp;
2115 		de = tmpfs_dir_lookup(dnode, cnp);
2116 	}
2117 	KASSERT(de && de->td_node == node);
2118 
2119 	/* Check flags to see if we are allowed to remove the directory. */
2120 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
2121 		error = EPERM;
2122 		goto out;
2123 	}
2124 
2125 	/* Decrement the link count for the virtual '.' entry. */
2126 	node->tn_links--;
2127 	node->tn_status |= TMPFS_NODE_STATUSALL;
2128 
2129 	/* Detach the directory entry from the directory. */
2130 	tmpfs_dir_detach(dvp, de);
2131 
2132 	/* Purge the cache for parent. */
2133 	cache_purge(dvp);
2134 
2135 	/*
2136 	 * Destroy the directory entry or replace it with a whiteout.
2137 	 * Note: the inode referred by it will not be destroyed
2138 	 * until the vnode is reclaimed.
2139 	 */
2140 	if (ap->a_cnp->cn_flags & DOWHITEOUT)
2141 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
2142 	else
2143 		tmpfs_free_dirent(tmp, de);
2144 
2145 	/* Destroy the whiteout entries from the node. */
2146 	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
2147 		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
2148 		tmpfs_dir_detach(vp, de);
2149 		tmpfs_free_dirent(tmp, de);
2150 	}
2151 
2152 	KASSERT(node->tn_links == 0);
2153 out:
2154 	/* Release the nodes. */
2155 	vput(dvp);
2156 	vput(vp);
2157 	return error;
2158 }
2159 
2160 int
2161 tmpfs_symlink(void *v)
2162 {
2163 	struct vop_symlink_args /* {
2164 		struct vnode		*a_dvp;
2165 		struct vnode		**a_vpp;
2166 		struct componentname	*a_cnp;
2167 		struct vattr		*a_vap;
2168 		char			*a_target;
2169 	} */ *ap = v;
2170 	vnode_t *dvp = ap->a_dvp;
2171 	vnode_t **vpp = ap->a_vpp;
2172 	struct componentname *cnp = ap->a_cnp;
2173 	struct vattr *vap = ap->a_vap;
2174 	char *target = ap->a_target;
2175 
2176 	KASSERT(vap->va_type == VLNK);
2177 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
2178 }
2179 
2180 int
2181 tmpfs_readdir(void *v)
2182 {
2183 	struct vop_readdir_args /* {
2184 		struct vnode	*a_vp;
2185 		struct uio	*a_uio;
2186 		kauth_cred_t	a_cred;
2187 		int		*a_eofflag;
2188 		off_t		**a_cookies;
2189 		int		*ncookies;
2190 	} */ *ap = v;
2191 	vnode_t *vp = ap->a_vp;
2192 	struct uio *uio = ap->a_uio;
2193 	int *eofflag = ap->a_eofflag;
2194 	off_t **cookies = ap->a_cookies;
2195 	int *ncookies = ap->a_ncookies;
2196 	off_t startoff, cnt;
2197 	tmpfs_node_t *node;
2198 	int error;
2199 
2200 	KASSERT(VOP_ISLOCKED(vp));
2201 
2202 	/* This operation only makes sense on directory nodes. */
2203 	if (vp->v_type != VDIR) {
2204 		return ENOTDIR;
2205 	}
2206 	node = VP_TO_TMPFS_DIR(vp);
2207 	startoff = uio->uio_offset;
2208 	cnt = 0;
2209 	if (node->tn_links == 0) {
2210 		error = 0;
2211 		goto out;
2212 	}
2213 
2214 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
2215 		error = tmpfs_dir_getdotdent(node, uio);
2216 		if (error != 0) {
2217 			if (error == -1)
2218 				error = 0;
2219 			goto out;
2220 		}
2221 		cnt++;
2222 	}
2223 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
2224 		error = tmpfs_dir_getdotdotdent(node, uio);
2225 		if (error != 0) {
2226 			if (error == -1)
2227 				error = 0;
2228 			goto out;
2229 		}
2230 		cnt++;
2231 	}
2232 	error = tmpfs_dir_getdents(node, uio, &cnt);
2233 	if (error == -1) {
2234 		error = 0;
2235 	}
2236 	KASSERT(error >= 0);
2237 out:
2238 	if (eofflag != NULL) {
2239 		*eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
2240 	}
2241 	if (error || cookies == NULL || ncookies == NULL) {
2242 		return error;
2243 	}
2244 
2245 	/* Update NFS-related variables, if any. */
2246 	off_t i, off = startoff;
2247 	tmpfs_dirent_t *de = NULL;
2248 
2249 	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
2250 	*ncookies = cnt;
2251 
2252 	for (i = 0; i < cnt; i++) {
2253 		KASSERT(off != TMPFS_DIRCOOKIE_EOF);
2254 		if (off != TMPFS_DIRCOOKIE_DOT) {
2255 			if (off == TMPFS_DIRCOOKIE_DOTDOT) {
2256 				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
2257 			} else if (de != NULL) {
2258 				de = TAILQ_NEXT(de, td_entries);
2259 			} else {
2260 				de = tmpfs_dir_lookupbycookie(node, off);
2261 				KASSERT(de != NULL);
2262 				de = TAILQ_NEXT(de, td_entries);
2263 			}
2264 			if (de == NULL) {
2265 				off = TMPFS_DIRCOOKIE_EOF;
2266 			} else {
2267 				off = tmpfs_dircookie(de);
2268 			}
2269 		} else {
2270 			off = TMPFS_DIRCOOKIE_DOTDOT;
2271 		}
2272 		(*cookies)[i] = off;
2273 	}
2274 	KASSERT(uio->uio_offset == off);
2275 	return error;
2276 }
2277 
2278 int
2279 tmpfs_readlink(void *v)
2280 {
2281 	struct vop_readlink_args /* {
2282 		struct vnode	*a_vp;
2283 		struct uio	*a_uio;
2284 		kauth_cred_t	a_cred;
2285 	} */ *ap = v;
2286 	vnode_t *vp = ap->a_vp;
2287 	struct uio *uio = ap->a_uio;
2288 	tmpfs_node_t *node;
2289 	int error;
2290 
2291 	KASSERT(VOP_ISLOCKED(vp));
2292 	KASSERT(uio->uio_offset == 0);
2293 	KASSERT(vp->v_type == VLNK);
2294 
2295 	node = VP_TO_TMPFS_NODE(vp);
2296 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
2297 	    MIN(node->tn_size, uio->uio_resid), uio);
2298 	node->tn_status |= TMPFS_NODE_ACCESSED;
2299 
2300 	return error;
2301 }
2302 
2303 int
2304 tmpfs_inactive(void *v)
2305 {
2306 	struct vop_inactive_args /* {
2307 		struct vnode *a_vp;
2308 		bool *a_recycle;
2309 	} */ *ap = v;
2310 	vnode_t *vp = ap->a_vp;
2311 	tmpfs_node_t *node;
2312 
2313 	KASSERT(VOP_ISLOCKED(vp));
2314 
2315 	node = VP_TO_TMPFS_NODE(vp);
2316 	*ap->a_recycle = (node->tn_links == 0);
2317 	VOP_UNLOCK(vp);
2318 
2319 	return 0;
2320 }
2321 
2322 int
2323 tmpfs_reclaim(void *v)
2324 {
2325 	struct vop_reclaim_args /* {
2326 		struct vnode *a_vp;
2327 	} */ *ap = v;
2328 	vnode_t *vp = ap->a_vp;
2329 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
2330 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2331 	bool racing;
2332 
2333 	/* Disassociate inode from vnode. */
2334 	mutex_enter(&node->tn_vlock);
2335 	node->tn_vnode = NULL;
2336 	vp->v_data = NULL;
2337 	/* Check if tmpfs_vnode_get() is racing with us. */
2338 	racing = TMPFS_NODE_RECLAIMING(node);
2339 	mutex_exit(&node->tn_vlock);
2340 
2341 	/*
2342 	 * If inode is not referenced, i.e. no links, then destroy it.
2343 	 * Note: if racing - inode is about to get a new vnode, leave it.
2344 	 */
2345 	if (node->tn_links == 0 && !racing) {
2346 		tmpfs_free_node(tmp, node);
2347 	}
2348 	return 0;
2349 }
2350 
2351 int
2352 tmpfs_pathconf(void *v)
2353 {
2354 	struct vop_pathconf_args /* {
2355 		struct vnode	*a_vp;
2356 		int		a_name;
2357 		register_t	*a_retval;
2358 	} */ *ap = v;
2359 	const int name = ap->a_name;
2360 	register_t *retval = ap->a_retval;
2361 	int error = 0;
2362 
2363 	switch (name) {
2364 	case _PC_LINK_MAX:
2365 		*retval = LINK_MAX;
2366 		break;
2367 	case _PC_NAME_MAX:
2368 		*retval = TMPFS_MAXNAMLEN;
2369 		break;
2370 	case _PC_PATH_MAX:
2371 		*retval = PATH_MAX;
2372 		break;
2373 	case _PC_PIPE_BUF:
2374 		*retval = PIPE_BUF;
2375 		break;
2376 	case _PC_CHOWN_RESTRICTED:
2377 		*retval = 1;
2378 		break;
2379 	case _PC_NO_TRUNC:
2380 		*retval = 1;
2381 		break;
2382 	case _PC_SYNC_IO:
2383 		*retval = 1;
2384 		break;
2385 	case _PC_FILESIZEBITS:
2386 		*retval = sizeof(off_t) * CHAR_BIT;
2387 		break;
2388 	default:
2389 		error = EINVAL;
2390 	}
2391 	return error;
2392 }
2393 
2394 int
2395 tmpfs_advlock(void *v)
2396 {
2397 	struct vop_advlock_args /* {
2398 		struct vnode	*a_vp;
2399 		void *		a_id;
2400 		int		a_op;
2401 		struct flock	*a_fl;
2402 		int		a_flags;
2403 	} */ *ap = v;
2404 	vnode_t *vp = ap->a_vp;
2405 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2406 
2407 	return lf_advlock(v, &node->tn_lockf, node->tn_size);
2408 }
2409 
2410 int
2411 tmpfs_getpages(void *v)
2412 {
2413 	struct vop_getpages_args /* {
2414 		struct vnode *a_vp;
2415 		voff_t a_offset;
2416 		struct vm_page **a_m;
2417 		int *a_count;
2418 		int a_centeridx;
2419 		vm_prot_t a_access_type;
2420 		int a_advice;
2421 		int a_flags;
2422 	} */ * const ap = v;
2423 	vnode_t *vp = ap->a_vp;
2424 	const voff_t offset = ap->a_offset;
2425 	struct vm_page **pgs = ap->a_m;
2426 	const int centeridx = ap->a_centeridx;
2427 	const vm_prot_t access_type = ap->a_access_type;
2428 	const int advice = ap->a_advice;
2429 	const int flags = ap->a_flags;
2430 	int error, npages = *ap->a_count;
2431 	tmpfs_node_t *node;
2432 	struct uvm_object *uobj;
2433 
2434 	KASSERT(vp->v_type == VREG);
2435 	KASSERT(mutex_owned(vp->v_interlock));
2436 
2437 	node = VP_TO_TMPFS_NODE(vp);
2438 	uobj = node->tn_spec.tn_reg.tn_aobj;
2439 
2440 	/*
2441 	 * Currently, PGO_PASTEOF is not supported.
2442 	 */
2443 	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
2444 		if ((flags & PGO_LOCKED) == 0)
2445 			mutex_exit(vp->v_interlock);
2446 		return EINVAL;
2447 	}
2448 
2449 	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
2450 		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
2451 	}
2452 
2453 	if ((flags & PGO_LOCKED) != 0)
2454 		return EBUSY;
2455 
2456 	if ((flags & PGO_NOTIMESTAMP) == 0) {
2457 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
2458 			node->tn_status |= TMPFS_NODE_ACCESSED;
2459 
2460 		if ((access_type & VM_PROT_WRITE) != 0) {
2461 			node->tn_status |= TMPFS_NODE_MODIFIED;
2462 			if (vp->v_mount->mnt_flag & MNT_RELATIME)
2463 				node->tn_status |= TMPFS_NODE_ACCESSED;
2464 		}
2465 	}
2466 
2467 	/*
2468 	 * Invoke the pager.
2469 	 *
2470 	 * Clean the array of pages before.  XXX: PR/32166
2471 	 * Note that vnode lock is shared with underlying UVM object.
2472 	 */
2473 	if (pgs) {
2474 		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
2475 	}
2476 	KASSERT(vp->v_interlock == uobj->vmobjlock);
2477 
2478 	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
2479 	    access_type, advice, flags | PGO_ALLPAGES);
2480 
2481 #if defined(DEBUG)
2482 	if (!error && pgs) {
2483 		for (int i = 0; i < npages; i++) {
2484 			KASSERT(pgs[i] != NULL);
2485 		}
2486 	}
2487 #endif
2488 	return error;
2489 }
2490 
2491 int
2492 tmpfs_putpages(void *v)
2493 {
2494 	struct vop_putpages_args /* {
2495 		struct vnode *a_vp;
2496 		voff_t a_offlo;
2497 		voff_t a_offhi;
2498 		int a_flags;
2499 	} */ * const ap = v;
2500 	vnode_t *vp = ap->a_vp;
2501 	const voff_t offlo = ap->a_offlo;
2502 	const voff_t offhi = ap->a_offhi;
2503 	const int flags = ap->a_flags;
2504 	tmpfs_node_t *node;
2505 	struct uvm_object *uobj;
2506 	int error;
2507 
2508 	KASSERT(mutex_owned(vp->v_interlock));
2509 
2510 	if (vp->v_type != VREG) {
2511 		mutex_exit(vp->v_interlock);
2512 		return 0;
2513 	}
2514 
2515 	node = VP_TO_TMPFS_NODE(vp);
2516 	uobj = node->tn_spec.tn_reg.tn_aobj;
2517 
2518 	KASSERT(vp->v_interlock == uobj->vmobjlock);
2519 	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
2520 
2521 	/* XXX mtime */
2522 
2523 	return error;
2524 }
2525 
2526 int
2527 tmpfs_whiteout(void *v)
2528 {
2529 	struct vop_whiteout_args /* {
2530 		struct vnode		*a_dvp;
2531 		struct componentname	*a_cnp;
2532 		int			a_flags;
2533 	} */ *ap = v;
2534 	vnode_t *dvp = ap->a_dvp;
2535 	struct componentname *cnp = ap->a_cnp;
2536 	const int flags = ap->a_flags;
2537 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
2538 	tmpfs_dirent_t *de;
2539 	int error;
2540 
2541 	switch (flags) {
2542 	case LOOKUP:
2543 		break;
2544 	case CREATE:
2545 		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
2546 		    cnp->cn_namelen, &de);
2547 		if (error)
2548 			return error;
2549 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
2550 		break;
2551 	case DELETE:
2552 		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
2553 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp);
2554 		if (de == NULL)
2555 			return ENOENT;
2556 		tmpfs_dir_detach(dvp, de);
2557 		tmpfs_free_dirent(tmp, de);
2558 		break;
2559 	}
2560 	return 0;
2561 }
2562 
2563 int
2564 tmpfs_print(void *v)
2565 {
2566 	struct vop_print_args /* {
2567 		struct vnode	*a_vp;
2568 	} */ *ap = v;
2569 	vnode_t *vp = ap->a_vp;
2570 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2571 
2572 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
2573 	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX ", status 0x%x",
2574 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
2575 	    node->tn_gid, (uintmax_t)node->tn_size, node->tn_status);
2576 	if (vp->v_type == VFIFO) {
2577 		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
2578 	}
2579 	printf("\n");
2580 	return 0;
2581 }
2582