xref: /netbsd-src/sys/fs/tmpfs/tmpfs_vnops.c (revision 33ce21e27d1c67d4ec652b9a5aa092ee2684f8d2)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.89 2011/08/18 21:42:18 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.89 2011/08/18 21:42:18 riastradh Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/fcntl.h>
43 #include <sys/event.h>
44 #include <sys/malloc.h>
45 #include <sys/namei.h>
46 #include <sys/stat.h>
47 #include <sys/uio.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/lockf.h>
51 #include <sys/kauth.h>
52 
53 #include <uvm/uvm.h>
54 
55 #include <miscfs/fifofs/fifo.h>
56 #include <miscfs/genfs/genfs.h>
57 #include <fs/tmpfs/tmpfs_vnops.h>
58 #include <fs/tmpfs/tmpfs.h>
59 
60 /*
61  * vnode operations vector used for files stored in a tmpfs file system.
62  */
63 int (**tmpfs_vnodeop_p)(void *);
64 const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
65 	{ &vop_default_desc,		vn_default_error },
66 	{ &vop_lookup_desc,		tmpfs_lookup },
67 	{ &vop_create_desc,		tmpfs_create },
68 	{ &vop_mknod_desc,		tmpfs_mknod },
69 	{ &vop_open_desc,		tmpfs_open },
70 	{ &vop_close_desc,		tmpfs_close },
71 	{ &vop_access_desc,		tmpfs_access },
72 	{ &vop_getattr_desc,		tmpfs_getattr },
73 	{ &vop_setattr_desc,		tmpfs_setattr },
74 	{ &vop_read_desc,		tmpfs_read },
75 	{ &vop_write_desc,		tmpfs_write },
76 	{ &vop_ioctl_desc,		tmpfs_ioctl },
77 	{ &vop_fcntl_desc,		tmpfs_fcntl },
78 	{ &vop_poll_desc,		tmpfs_poll },
79 	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
80 	{ &vop_revoke_desc,		tmpfs_revoke },
81 	{ &vop_mmap_desc,		tmpfs_mmap },
82 	{ &vop_fsync_desc,		tmpfs_fsync },
83 	{ &vop_seek_desc,		tmpfs_seek },
84 	{ &vop_remove_desc,		tmpfs_remove },
85 	{ &vop_link_desc,		tmpfs_link },
86 	{ &vop_rename_desc,		tmpfs_rename },
87 	{ &vop_mkdir_desc,		tmpfs_mkdir },
88 	{ &vop_rmdir_desc,		tmpfs_rmdir },
89 	{ &vop_symlink_desc,		tmpfs_symlink },
90 	{ &vop_readdir_desc,		tmpfs_readdir },
91 	{ &vop_readlink_desc,		tmpfs_readlink },
92 	{ &vop_abortop_desc,		tmpfs_abortop },
93 	{ &vop_inactive_desc,		tmpfs_inactive },
94 	{ &vop_reclaim_desc,		tmpfs_reclaim },
95 	{ &vop_lock_desc,		tmpfs_lock },
96 	{ &vop_unlock_desc,		tmpfs_unlock },
97 	{ &vop_bmap_desc,		tmpfs_bmap },
98 	{ &vop_strategy_desc,		tmpfs_strategy },
99 	{ &vop_print_desc,		tmpfs_print },
100 	{ &vop_pathconf_desc,		tmpfs_pathconf },
101 	{ &vop_islocked_desc,		tmpfs_islocked },
102 	{ &vop_advlock_desc,		tmpfs_advlock },
103 	{ &vop_bwrite_desc,		tmpfs_bwrite },
104 	{ &vop_getpages_desc,		tmpfs_getpages },
105 	{ &vop_putpages_desc,		tmpfs_putpages },
106 #if TMPFS_WHITEOUT
107 	{ &vop_whiteout_desc,		tmpfs_whiteout },
108 #endif
109 	{ NULL, NULL }
110 };
111 
112 const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
113 	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
114 };
115 
116 /*
117  * tmpfs_lookup: path name traversal routine.
118  *
119  * Arguments: dvp (directory being searched), vpp (result),
120  * cnp (component name - path).
121  *
122  * => Caller holds a reference and lock on dvp.
123  * => We return looked-up vnode (vpp) locked, with a reference held.
124  */
125 int
126 tmpfs_lookup(void *v)
127 {
128 	struct vop_lookup_args /* {
129 		struct vnode *a_dvp;
130 		struct vnode **a_vpp;
131 		struct componentname *a_cnp;
132 	} */ *ap = v;
133 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
134 	struct componentname *cnp = ap->a_cnp;
135 	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
136 	tmpfs_node_t *dnode, *tnode;
137 	tmpfs_dirent_t *de;
138 	int error;
139 
140 	KASSERT(VOP_ISLOCKED(dvp));
141 
142 	dnode = VP_TO_TMPFS_DIR(dvp);
143 	*vpp = NULL;
144 
145 	/* Check accessibility of directory. */
146 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
147 	if (error) {
148 		goto out;
149 	}
150 
151 	/*
152 	 * If requesting the last path component on a read-only file system
153 	 * with a write operation, deny it.
154 	 */
155 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
156 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
157 		error = EROFS;
158 		goto out;
159 	}
160 
161 	/*
162 	 * Avoid doing a linear scan of the directory if the requested
163 	 * directory/name couple is already in the cache.
164 	 */
165 	error = cache_lookup(dvp, vpp, cnp);
166 	if (error >= 0) {
167 		/* Both cache-hit or an error case. */
168 		goto out;
169 	}
170 
171 	if (cnp->cn_flags & ISDOTDOT) {
172 		tmpfs_node_t *pnode;
173 
174 		/*
175 		 * Lookup of ".." case.
176 		 */
177 		if (lastcn && cnp->cn_nameiop == RENAME) {
178 			error = EINVAL;
179 			goto out;
180 		}
181 		KASSERT(dnode->tn_type == VDIR);
182 		pnode = dnode->tn_spec.tn_dir.tn_parent;
183 		if (pnode == NULL) {
184 			error = ENOENT;
185 			goto out;
186 		}
187 
188 		/*
189 		 * Lock the parent tn_vlock before releasing the vnode lock,
190 		 * and thus prevents parent from disappearing.
191 		 */
192 		mutex_enter(&pnode->tn_vlock);
193 		VOP_UNLOCK(dvp);
194 
195 		/*
196 		 * Get a vnode of the '..' entry and re-acquire the lock.
197 		 * Release the tn_vlock.
198 		 */
199 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
200 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
201 		goto out;
202 
203 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
204 		/*
205 		 * Lookup of "." case.
206 		 */
207 		if (lastcn && cnp->cn_nameiop == RENAME) {
208 			error = EISDIR;
209 			goto out;
210 		}
211 		vref(dvp);
212 		*vpp = dvp;
213 		error = 0;
214 		goto done;
215 	}
216 
217 	/*
218 	 * Other lookup cases: perform directory scan.
219 	 */
220 	de = tmpfs_dir_lookup(dnode, cnp);
221 	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
222 		/*
223 		 * The entry was not found in the directory.  This is valid
224 		 * if we are creating or renaming an entry and are working
225 		 * on the last component of the path name.
226 		 */
227 		if (lastcn && (cnp->cn_nameiop == CREATE ||
228 		    cnp->cn_nameiop == RENAME)) {
229 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
230 			if (error) {
231 				goto out;
232 			}
233 			error = EJUSTRETURN;
234 		} else {
235 			error = ENOENT;
236 		}
237 		if (de) {
238 			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
239 			cnp->cn_flags |= ISWHITEOUT;
240 		}
241 		goto done;
242 	}
243 
244 	tnode = de->td_node;
245 
246 	/*
247 	 * If it is not the last path component and found a non-directory
248 	 * or non-link entry (which may itself be pointing to a directory),
249 	 * raise an error.
250 	 */
251 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
252 		error = ENOTDIR;
253 		goto out;
254 	}
255 
256 	/* Check the permissions. */
257 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
258 		kauth_action_t action = 0;
259 
260 		/* This is the file-system's decision. */
261 		if ((dnode->tn_mode & S_ISTXT) != 0 &&
262 		    kauth_cred_geteuid(cnp->cn_cred) != dnode->tn_uid &&
263 		    kauth_cred_geteuid(cnp->cn_cred) != tnode->tn_uid) {
264 			error = EPERM;
265 		} else {
266 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
267 		}
268 
269 		if (cnp->cn_nameiop == DELETE) {
270 			action |= KAUTH_VNODE_DELETE;
271 		} else {
272 			KASSERT(cnp->cn_nameiop == RENAME);
273 			action |= KAUTH_VNODE_RENAME;
274 		}
275 		error = kauth_authorize_vnode(cnp->cn_cred,
276 		    action, *vpp, dvp, error);
277 		if (error) {
278 			goto out;
279 		}
280 	}
281 
282 	/* Get a vnode for the matching entry. */
283 	mutex_enter(&tnode->tn_vlock);
284 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
285 done:
286 	/*
287 	 * Cache the result, unless request was for creation (as it does
288 	 * not improve the performance).
289 	 */
290 	if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE) {
291 		cache_enter(dvp, *vpp, cnp);
292 	}
293 out:
294 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
295 	KASSERT(VOP_ISLOCKED(dvp));
296 
297 	return error;
298 }
299 
300 int
301 tmpfs_create(void *v)
302 {
303 	struct vop_create_args /* {
304 		struct vnode		*a_dvp;
305 		struct vnode		**a_vpp;
306 		struct componentname	*a_cnp;
307 		struct vattr		*a_vap;
308 	} */ *ap = v;
309 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
310 	struct componentname *cnp = ap->a_cnp;
311 	struct vattr *vap = ap->a_vap;
312 
313 	KASSERT(VOP_ISLOCKED(dvp));
314 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
315 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
316 }
317 
318 int
319 tmpfs_mknod(void *v)
320 {
321 	struct vop_mknod_args /* {
322 		struct vnode		*a_dvp;
323 		struct vnode		**a_vpp;
324 		struct componentname	*a_cnp;
325 		struct vattr		*a_vap;
326 	} */ *ap = v;
327 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
328 	struct componentname *cnp = ap->a_cnp;
329 	struct vattr *vap = ap->a_vap;
330 	enum vtype vt = vap->va_type;
331 
332 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
333 		vput(dvp);
334 		return EINVAL;
335 	}
336 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
337 }
338 
339 int
340 tmpfs_open(void *v)
341 {
342 	struct vop_open_args /* {
343 		struct vnode	*a_vp;
344 		int		a_mode;
345 		kauth_cred_t	a_cred;
346 	} */ *ap = v;
347 	vnode_t *vp = ap->a_vp;
348 	mode_t mode = ap->a_mode;
349 	tmpfs_node_t *node;
350 
351 	KASSERT(VOP_ISLOCKED(vp));
352 
353 	node = VP_TO_TMPFS_NODE(vp);
354 	if (node->tn_links < 1) {
355 		/*
356 		 * The file is still active, but all its names have been
357 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
358 		 * any more, as it is about to be destroyed.
359 		 */
360 		return ENOENT;
361 	}
362 
363 	/* If the file is marked append-only, deny write requests. */
364 	if ((node->tn_flags & APPEND) != 0 &&
365 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
366 		return EPERM;
367 	}
368 	return 0;
369 }
370 
371 int
372 tmpfs_close(void *v)
373 {
374 	struct vop_close_args /* {
375 		struct vnode	*a_vp;
376 		int		a_fflag;
377 		kauth_cred_t	a_cred;
378 	} */ *ap = v;
379 	vnode_t *vp = ap->a_vp;
380 
381 	KASSERT(VOP_ISLOCKED(vp));
382 
383 	tmpfs_update(vp, NULL, NULL, NULL, UPDATE_CLOSE);
384 	return 0;
385 }
386 
387 static int
388 tmpfs_check_possible(vnode_t *vp, tmpfs_node_t *node, mode_t mode)
389 {
390 	const bool writing = (mode & VWRITE) != 0;
391 
392 	switch (vp->v_type) {
393 	case VDIR:
394 	case VLNK:
395 	case VREG:
396 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
397 			return EROFS;
398 		}
399 		break;
400 	case VBLK:
401 	case VCHR:
402 	case VSOCK:
403 	case VFIFO:
404 		break;
405 	default:
406 		return EINVAL;
407 	}
408 	return (writing && (node->tn_flags & IMMUTABLE) != 0) ? EPERM : 0;
409 }
410 
411 static int
412 tmpfs_check_permitted(vnode_t *vp, tmpfs_node_t *node, mode_t mode,
413     kauth_cred_t cred)
414 {
415 
416 	return genfs_can_access(vp->v_type, node->tn_mode, node->tn_uid,
417 	    node->tn_gid, mode, cred);
418 }
419 
420 int
421 tmpfs_access(void *v)
422 {
423 	struct vop_access_args /* {
424 		struct vnode	*a_vp;
425 		int		a_mode;
426 		kauth_cred_t	a_cred;
427 	} */ *ap = v;
428 	vnode_t *vp = ap->a_vp;
429 	mode_t mode = ap->a_mode;
430 	kauth_cred_t cred = ap->a_cred;
431 	tmpfs_node_t *node;
432 	int error;
433 
434 	KASSERT(VOP_ISLOCKED(vp));
435 
436 	node = VP_TO_TMPFS_NODE(vp);
437 	error = tmpfs_check_possible(vp, node, mode);
438 	if (error) {
439 		return error;
440 	}
441 	return kauth_authorize_vnode(cred, kauth_mode_to_action(mode), vp,
442 	    NULL, tmpfs_check_permitted(vp, node, mode, cred));
443 }
444 
445 int
446 tmpfs_getattr(void *v)
447 {
448 	struct vop_getattr_args /* {
449 		struct vnode	*a_vp;
450 		struct vattr	*a_vap;
451 		kauth_cred_t	a_cred;
452 	} */ *ap = v;
453 	vnode_t *vp = ap->a_vp;
454 	struct vattr *vap = ap->a_vap;
455 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
456 
457 	vattr_null(vap);
458 
459 	tmpfs_update(vp, NULL, NULL, NULL, 0);
460 
461 	vap->va_type = vp->v_type;
462 	vap->va_mode = node->tn_mode;
463 	vap->va_nlink = node->tn_links;
464 	vap->va_uid = node->tn_uid;
465 	vap->va_gid = node->tn_gid;
466 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
467 	vap->va_fileid = node->tn_id;
468 	vap->va_size = node->tn_size;
469 	vap->va_blocksize = PAGE_SIZE;
470 	vap->va_atime = node->tn_atime;
471 	vap->va_mtime = node->tn_mtime;
472 	vap->va_ctime = node->tn_ctime;
473 	vap->va_birthtime = node->tn_birthtime;
474 	vap->va_gen = TMPFS_NODE_GEN(node);
475 	vap->va_flags = node->tn_flags;
476 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
477 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
478 	vap->va_bytes = round_page(node->tn_size);
479 	vap->va_filerev = VNOVAL;
480 	vap->va_vaflags = 0;
481 	vap->va_spare = VNOVAL; /* XXX */
482 
483 	return 0;
484 }
485 
486 #define GOODTIME(tv)	((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL)
487 /* XXX Should this operation be atomic?  I think it should, but code in
488  * XXX other places (e.g., ufs) doesn't seem to be... */
489 int
490 tmpfs_setattr(void *v)
491 {
492 	struct vop_setattr_args /* {
493 		struct vnode	*a_vp;
494 		struct vattr	*a_vap;
495 		kauth_cred_t	a_cred;
496 	} */ *ap = v;
497 	vnode_t *vp = ap->a_vp;
498 	struct vattr *vap = ap->a_vap;
499 	kauth_cred_t cred = ap->a_cred;
500 	lwp_t *l = curlwp;
501 	int error = 0;
502 
503 	KASSERT(VOP_ISLOCKED(vp));
504 
505 	/* Abort if any unsettable attribute is given. */
506 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
507 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
508 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
509 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
510 	    vap->va_bytes != VNOVAL) {
511 		return EINVAL;
512 	}
513 	if (error == 0 && (vap->va_flags != VNOVAL))
514 		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
515 
516 	if (error == 0 && (vap->va_size != VNOVAL))
517 		error = tmpfs_chsize(vp, vap->va_size, cred, l);
518 
519 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
520 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
521 
522 	if (error == 0 && (vap->va_mode != VNOVAL))
523 		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
524 
525 	if (error == 0 && (GOODTIME(&vap->va_atime) || GOODTIME(&vap->va_mtime)
526 	    || GOODTIME(&vap->va_birthtime))) {
527 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
528 		    &vap->va_birthtime, vap->va_vaflags, cred, l);
529 		if (error == 0)
530 			return 0;
531 	}
532 	tmpfs_update(vp, NULL, NULL, NULL, 0);
533 	return error;
534 }
535 
536 int
537 tmpfs_read(void *v)
538 {
539 	struct vop_read_args /* {
540 		struct vnode *a_vp;
541 		struct uio *a_uio;
542 		int a_ioflag;
543 		kauth_cred_t a_cred;
544 	} */ *ap = v;
545 	vnode_t *vp = ap->a_vp;
546 	struct uio *uio = ap->a_uio;
547 	const int ioflag = ap->a_ioflag;
548 	tmpfs_node_t *node;
549 	struct uvm_object *uobj;
550 	int error;
551 
552 	KASSERT(VOP_ISLOCKED(vp));
553 
554 	if (vp->v_type != VREG) {
555 		return EISDIR;
556 	}
557 	if (uio->uio_offset < 0) {
558 		return EINVAL;
559 	}
560 
561 	node = VP_TO_TMPFS_NODE(vp);
562 	node->tn_status |= TMPFS_NODE_ACCESSED;
563 	uobj = node->tn_spec.tn_reg.tn_aobj;
564 	error = 0;
565 
566 	while (error == 0 && uio->uio_resid > 0) {
567 		vsize_t len;
568 
569 		if (node->tn_size <= uio->uio_offset) {
570 			break;
571 		}
572 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
573 		if (len == 0) {
574 			break;
575 		}
576 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
577 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
578 	}
579 	return error;
580 }
581 
582 int
583 tmpfs_write(void *v)
584 {
585 	struct vop_write_args /* {
586 		struct vnode	*a_vp;
587 		struct uio	*a_uio;
588 		int		a_ioflag;
589 		kauth_cred_t	a_cred;
590 	} */ *ap = v;
591 	vnode_t *vp = ap->a_vp;
592 	struct uio *uio = ap->a_uio;
593 	const int ioflag = ap->a_ioflag;
594 	tmpfs_node_t *node;
595 	struct uvm_object *uobj;
596 	off_t oldsize;
597 	bool extended;
598 	int error;
599 
600 	KASSERT(VOP_ISLOCKED(vp));
601 
602 	node = VP_TO_TMPFS_NODE(vp);
603 	oldsize = node->tn_size;
604 
605 	if (uio->uio_offset < 0 || vp->v_type != VREG) {
606 		error = EINVAL;
607 		goto out;
608 	}
609 	if (uio->uio_resid == 0) {
610 		error = 0;
611 		goto out;
612 	}
613 	if (ioflag & IO_APPEND) {
614 		uio->uio_offset = node->tn_size;
615 	}
616 
617 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
618 	if (extended) {
619 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
620 		if (error)
621 			goto out;
622 	}
623 
624 	uobj = node->tn_spec.tn_reg.tn_aobj;
625 	error = 0;
626 	while (error == 0 && uio->uio_resid > 0) {
627 		vsize_t len;
628 
629 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
630 		if (len == 0) {
631 			break;
632 		}
633 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
634 		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
635 	}
636 	if (error) {
637 		(void)tmpfs_reg_resize(vp, oldsize);
638 	}
639 
640 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
641 	    (extended ? TMPFS_NODE_CHANGED : 0);
642 	VN_KNOTE(vp, NOTE_WRITE);
643 out:
644 	if (error) {
645 		KASSERT(oldsize == node->tn_size);
646 	} else {
647 		KASSERT(uio->uio_resid == 0);
648 	}
649 	return error;
650 }
651 
652 int
653 tmpfs_fsync(void *v)
654 {
655 	struct vop_fsync_args /* {
656 		struct vnode *a_vp;
657 		kauth_cred_t a_cred;
658 		int a_flags;
659 		off_t a_offlo;
660 		off_t a_offhi;
661 		struct lwp *a_l;
662 	} */ *ap = v;
663 	vnode_t *vp = ap->a_vp;
664 
665 	/* Nothing to do.  Just update. */
666 	KASSERT(VOP_ISLOCKED(vp));
667 	tmpfs_update(vp, NULL, NULL, NULL, 0);
668 	return 0;
669 }
670 
671 /*
672  * tmpfs_remove: unlink a file.
673  *
674  * => Both directory (dvp) and file (vp) are locked.
675  * => We unlock and drop the reference on both.
676  */
677 int
678 tmpfs_remove(void *v)
679 {
680 	struct vop_remove_args /* {
681 		struct vnode *a_dvp;
682 		struct vnode *a_vp;
683 		struct componentname *a_cnp;
684 	} */ *ap = v;
685 	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
686 	tmpfs_node_t *node;
687 	tmpfs_dirent_t *de;
688 	int error;
689 
690 	KASSERT(VOP_ISLOCKED(dvp));
691 	KASSERT(VOP_ISLOCKED(vp));
692 
693 	if (vp->v_type == VDIR) {
694 		error = EPERM;
695 		goto out;
696 	}
697 	node = VP_TO_TMPFS_NODE(vp);
698 
699 	/* Files marked as immutable or append-only cannot be deleted. */
700 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
701 		error = EPERM;
702 		goto out;
703 	}
704 
705 	/* Lookup the directory entry (check the cached hint first). */
706 	de = tmpfs_dir_cached(node);
707 	if (de == NULL) {
708 		tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
709 		struct componentname *cnp = ap->a_cnp;
710 		de = tmpfs_dir_lookup(dnode, cnp);
711 	}
712 	KASSERT(de && de->td_node == node);
713 
714 	/*
715 	 * Remove the entry from the directory (drops the link count) and
716 	 * destroy it.  Note: the inode referred by it will not be destroyed
717 	 * until the vnode is reclaimed/recycled.
718 	 */
719 	tmpfs_dir_detach(dvp, de);
720 	tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
721 	error = 0;
722 out:
723 	/* Drop the references and unlock the vnodes. */
724 	vput(vp);
725 	if (dvp == vp) {
726 		vrele(dvp);
727 	} else {
728 		vput(dvp);
729 	}
730 	return error;
731 }
732 
733 /*
734  * tmpfs_link: create a hard link.
735  */
736 int
737 tmpfs_link(void *v)
738 {
739 	struct vop_link_args /* {
740 		struct vnode *a_dvp;
741 		struct vnode *a_vp;
742 		struct componentname *a_cnp;
743 	} */ *ap = v;
744 	vnode_t *dvp = ap->a_dvp;
745 	vnode_t *vp = ap->a_vp;
746 	struct componentname *cnp = ap->a_cnp;
747 	tmpfs_node_t *dnode, *node;
748 	tmpfs_dirent_t *de;
749 	int error;
750 
751 	KASSERT(dvp != vp);
752 	KASSERT(VOP_ISLOCKED(dvp));
753 	KASSERT(vp->v_type != VDIR);
754 	KASSERT(dvp->v_mount == vp->v_mount);
755 
756 	dnode = VP_TO_TMPFS_DIR(dvp);
757 	node = VP_TO_TMPFS_NODE(vp);
758 
759 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
760 
761 	/* Check for maximum number of links limit. */
762 	if (node->tn_links == LINK_MAX) {
763 		error = EMLINK;
764 		goto out;
765 	}
766 	KASSERT(node->tn_links < LINK_MAX);
767 
768 	/* We cannot create links of files marked immutable or append-only. */
769 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
770 		error = EPERM;
771 		goto out;
772 	}
773 
774 	/* Allocate a new directory entry to represent the inode. */
775 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
776 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
777 	if (error) {
778 		goto out;
779 	}
780 
781 	/*
782 	 * Insert the entry into the directory.
783 	 * It will increase the inode link count.
784 	 */
785 	tmpfs_dir_attach(dvp, de, node);
786 
787 	/* Update the timestamps and trigger the event. */
788 	if (node->tn_vnode) {
789 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
790 	}
791 	node->tn_status |= TMPFS_NODE_CHANGED;
792 	tmpfs_update(vp, NULL, NULL, NULL, 0);
793 	error = 0;
794 out:
795 	VOP_UNLOCK(vp);
796 	vput(dvp);
797 	return error;
798 }
799 
800 /*
801  * tmpfs_rename: rename routine, the hairiest system call, with the
802  * insane API.
803  *
804  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
805  * and tvp (to-leaf), if exists (NULL if not).
806  *
807  * => Caller holds a reference on fdvp and fvp, they are unlocked.
808  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
809  *
810  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
811  *    to release the references and unlock them (or destroy).
812  */
813 
814 /*
815  * First, some forward declarations of subroutines.
816  */
817 
818 static int tmpfs_sane_rename(struct vnode *, struct componentname *,
819     struct vnode *, struct componentname *, kauth_cred_t, bool);
820 static int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
821     kauth_cred_t,
822     struct vnode *, struct tmpfs_node *, struct componentname *,
823     struct tmpfs_dirent **, struct vnode **,
824     struct vnode *, struct tmpfs_node *, struct componentname *,
825     struct tmpfs_dirent **, struct vnode **);
826 static int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
827     kauth_cred_t,
828     struct vnode *, struct tmpfs_node *,
829     struct componentname *, struct tmpfs_dirent **, struct vnode **,
830     struct componentname *, struct tmpfs_dirent **, struct vnode **);
831 static int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
832     kauth_cred_t,
833     struct vnode *, struct tmpfs_node *, struct componentname *,
834     struct tmpfs_dirent **, struct vnode **,
835     struct vnode *, struct tmpfs_node *, struct componentname *,
836     struct tmpfs_dirent **, struct vnode **);
837 static void tmpfs_rename_exit(struct tmpfs_mount *,
838     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
839 static int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
840 static int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
841     struct tmpfs_node **);
842 static int tmpfs_rename_lock(struct mount *, kauth_cred_t, int,
843     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
844     struct tmpfs_dirent **, struct vnode **,
845     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
846     struct tmpfs_dirent **, struct vnode **);
847 static void tmpfs_rename_attachdetach(struct tmpfs_mount *,
848     struct vnode *, struct tmpfs_dirent *, struct vnode *,
849     struct vnode *, struct tmpfs_dirent *, struct vnode *);
850 static int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
851     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, kauth_cred_t);
852 static int tmpfs_rename_check_possible(struct tmpfs_node *,
853     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
854 static int tmpfs_rename_check_permitted(kauth_cred_t,
855     struct tmpfs_node *, struct tmpfs_node *,
856     struct tmpfs_node *, struct tmpfs_node *);
857 static int tmpfs_remove_check_possible(struct tmpfs_node *,
858     struct tmpfs_node *);
859 static int tmpfs_remove_check_permitted(kauth_cred_t,
860     struct tmpfs_node *, struct tmpfs_node *);
861 static int tmpfs_check_sticky(kauth_cred_t,
862     struct tmpfs_node *, struct tmpfs_node *);
863 
864 int
865 tmpfs_rename(void *v)
866 {
867 	struct vop_rename_args  /* {
868 		struct vnode		*a_fdvp;
869 		struct vnode		*a_fvp;
870 		struct componentname	*a_fcnp;
871 		struct vnode		*a_tdvp;
872 		struct vnode		*a_tvp;
873 		struct componentname	*a_tcnp;
874 	} */ *ap = v;
875 	struct vnode *fdvp = ap->a_fdvp;
876 	struct vnode *fvp = ap->a_fvp;
877 	struct componentname *fcnp = ap->a_fcnp;
878 	struct vnode *tdvp = ap->a_tdvp;
879 	struct vnode *tvp = ap->a_tvp;
880 	struct componentname *tcnp = ap->a_tcnp;
881 	kauth_cred_t cred;
882 	int error;
883 
884 	KASSERT(fdvp != NULL);
885 	KASSERT(fvp != NULL);
886 	KASSERT(fcnp != NULL);
887 	KASSERT(fcnp->cn_nameptr != NULL);
888 	KASSERT(tdvp != NULL);
889 	KASSERT(tcnp != NULL);
890 	KASSERT(fcnp->cn_nameptr != NULL);
891 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
892 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
893 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
894 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
895 	KASSERT(fdvp->v_type == VDIR);
896 	KASSERT(tdvp->v_type == VDIR);
897 
898 	cred = fcnp->cn_cred;
899 	KASSERT(tcnp->cn_cred == cred);
900 
901 	/*
902 	 * Sanitize our world from the VFS insanity.  Unlock the target
903 	 * directory and node, which are locked.  Release the children,
904 	 * which are referenced.  Check for rename("x", "y/."), which
905 	 * it is our responsibility to reject, not the caller's.  (But
906 	 * the caller does reject rename("x/.", "y").  Go figure.)
907 	 */
908 
909 	VOP_UNLOCK(tdvp);
910 	if ((tvp != NULL) && (tvp != tdvp))
911 		VOP_UNLOCK(tvp);
912 
913 	vrele(fvp);
914 	if (tvp != NULL)
915 		vrele(tvp);
916 
917 	if (tvp == tdvp) {
918 		error = EINVAL;
919 		goto out;
920 	}
921 
922 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, false);
923 
924 out:	/*
925 	 * All done, whether with success or failure.  Release the
926 	 * directory nodes now, as the caller expects from the VFS
927 	 * protocol.
928 	 */
929 	vrele(fdvp);
930 	vrele(tdvp);
931 
932 	return error;
933 }
934 
935 /*
936  * tmpfs_sane_rename: rename routine, the hairiest system call, with
937  * the sane API.
938  *
939  * Arguments:
940  *
941  * . fdvp (from directory vnode),
942  * . fcnp (from component name),
943  * . tdvp (to directory vnode), and
944  * . tcnp (to component name).
945  *
946  * fdvp and tdvp must be referenced and unlocked.
947  */
948 static int
949 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
950     struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t cred,
951     bool posixly_correct)
952 {
953 	struct mount *mount;
954 	struct tmpfs_mount *tmpfs;
955 	struct tmpfs_node *fdnode, *tdnode;
956 	struct tmpfs_dirent *fde, *tde;
957 	struct vnode *fvp, *tvp;
958 	char *newname;
959 	int error;
960 
961 	KASSERT(fdvp != NULL);
962 	KASSERT(fcnp != NULL);
963 	KASSERT(tdvp != NULL);
964 	KASSERT(tcnp != NULL);
965 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
966 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
967 	KASSERT(fdvp->v_type == VDIR);
968 	KASSERT(tdvp->v_type == VDIR);
969 	KASSERT(fdvp->v_mount == tdvp->v_mount);
970 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
971 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
972 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
973 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
974 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
975 	    (fcnp->cn_nameptr[1] != '.'));
976 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
977 	    (tcnp->cn_nameptr[1] != '.'));
978 
979 	/*
980 	 * Pull out the tmpfs data structures.
981 	 */
982 	fdnode = VP_TO_TMPFS_NODE(fdvp);
983 	tdnode = VP_TO_TMPFS_NODE(tdvp);
984 	KASSERT(fdnode != NULL);
985 	KASSERT(tdnode != NULL);
986 	KASSERT(fdnode->tn_vnode == fdvp);
987 	KASSERT(tdnode->tn_vnode == tdvp);
988 	KASSERT(fdnode->tn_type == VDIR);
989 	KASSERT(tdnode->tn_type == VDIR);
990 
991 	mount = fdvp->v_mount;
992 	KASSERT(mount != NULL);
993 	KASSERT(mount == tdvp->v_mount);
994 	/* XXX How can we be sure this stays true?  (Not that you're
995 	 * likely to mount a tmpfs read-only...)  */
996 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
997 	tmpfs = VFS_TO_TMPFS(mount);
998 	KASSERT(tmpfs != NULL);
999 
1000 	/*
1001 	 * Decide whether we need a new name, and allocate memory for
1002 	 * it if so.  Do this before locking anything or taking
1003 	 * destructive actions so that we can back out safely and sleep
1004 	 * safely.  XXX Is sleeping an issue here?  Can this just be
1005 	 * moved into tmpfs_rename_attachdetach?
1006 	 */
1007 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
1008 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
1009 		if (newname == NULL) {
1010 			error = ENOSPC;
1011 			goto out_unlocked;
1012 		}
1013 	} else {
1014 		newname = NULL;
1015 	}
1016 
1017 	/*
1018 	 * Lock and look up everything.  GCC is not very clever.
1019 	 */
1020 	fde = tde = NULL;
1021 	fvp = tvp = NULL;
1022 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1023 	    fdvp, fdnode, fcnp, &fde, &fvp,
1024 	    tdvp, tdnode, tcnp, &tde, &tvp);
1025 	if (error)
1026 		goto out_unlocked;
1027 
1028 	/*
1029 	 * Check that everything is locked and looks right.
1030 	 */
1031 	KASSERT(fde != NULL);
1032 	KASSERT(fvp != NULL);
1033 	KASSERT(fde->td_node != NULL);
1034 	KASSERT(fde->td_node->tn_vnode == fvp);
1035 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1036 	KASSERT((tde == NULL) == (tvp == NULL));
1037 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1038 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1039 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1040 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1041 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1042 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1043 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1044 
1045 	/*
1046 	 * If the source and destination are the same object, we need
1047 	 * only at most delete the source entry.
1048 	 */
1049 	if (fvp == tvp) {
1050 		KASSERT(tvp != NULL);
1051 		if (fde->td_node->tn_type == VDIR) {
1052 			/* XXX How can this possibly happen?  */
1053 			error = EINVAL;
1054 			goto out_locked;
1055 		}
1056 		if (!posixly_correct && (fde != tde)) {
1057 			/* XXX Doesn't work because of locking.
1058 			 * error = VOP_REMOVE(fdvp, fvp);
1059 			 */
1060 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1061 			    cred);
1062 			if (error)
1063 				goto out_locked;
1064 		}
1065 		goto success;
1066 	}
1067 	KASSERT(fde != tde);
1068 	KASSERT(fvp != tvp);
1069 
1070 	/*
1071 	 * If the target exists, refuse to rename a directory over a
1072 	 * non-directory or vice versa, or to clobber a non-empty
1073 	 * directory.
1074 	 */
1075 	if (tvp != NULL) {
1076 		KASSERT(tde != NULL);
1077 		KASSERT(tde->td_node != NULL);
1078 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1079 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1080 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1081 			error = ENOTDIR;
1082 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1083 			error = EISDIR;
1084 		else
1085 			error = 0;
1086 		if (error)
1087 			goto out_locked;
1088 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1089 	}
1090 
1091 	/*
1092 	 * Authorize the rename.
1093 	 */
1094 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1095 	    tdnode, (tde? tde->td_node : NULL));
1096 	if (error)
1097 		goto out_locked;
1098 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1099 	    tdnode, (tde? tde->td_node : NULL));
1100 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp,
1101 	    error);
1102 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp,
1103 	    error);
1104 	if (error)
1105 		goto out_locked;
1106 
1107 	/*
1108 	 * Everything is hunky-dory.  Shuffle the directory entries.
1109 	 */
1110 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1111 
1112 	/*
1113 	 * Update the directory entry's name necessary, and flag
1114 	 * metadata updates.  A memory allocation failure here is not
1115 	 * OK because we've already committed some changes that we
1116 	 * can't back out at this point, and we have things locked so
1117 	 * we can't sleep, hence the early allocation above.
1118 	 */
1119 	if (newname != NULL) {
1120 		KASSERT(tcnp->cn_namelen <= MAXNAMLEN);
1121 
1122 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1123 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1124 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1125 		/* Commit newname and don't free it on the way out.  */
1126 		fde->td_name = newname;
1127 		newname = NULL;
1128 
1129 		fde->td_node->tn_status |= TMPFS_NODE_CHANGED;
1130 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1131 	}
1132 
1133 success:
1134 	VN_KNOTE(fvp, NOTE_RENAME);
1135 	error = 0;
1136 
1137 out_locked:
1138 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1139 
1140 out_unlocked:
1141 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1142 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1143 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1144 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1145 
1146 	if (newname != NULL)
1147 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1148 
1149 	return error;
1150 }
1151 
1152 /*
1153  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1154  * and the associated vnode in fvp_ret; fail if not found.  Look up
1155  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1156  * associated vnode in tvp_ret; store null instead if not found.  Fail
1157  * if anything has been mounted on any of the nodes involved.
1158  *
1159  * fdvp and tdvp must be referenced.
1160  *
1161  * On entry, nothing is locked.
1162  *
1163  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1164  * nonnull, are referenced.  The only pairs of vnodes that may be
1165  * identical are {fdvp, tdvp} and {fvp, tvp}.
1166  *
1167  * On failure, everything remains as was.
1168  *
1169  * Locking everything including the source and target nodes is
1170  * necessary to make sure that, e.g., link count updates are OK.  The
1171  * locking order is, in general, ancestor-first, matching the order you
1172  * need to use to look up a descendant anyway.
1173  */
1174 static int
1175 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1176     kauth_cred_t cred,
1177     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1178     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1179     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1180     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1181 {
1182 	int error;
1183 
1184 	KASSERT(mount != NULL);
1185 	KASSERT(tmpfs != NULL);
1186 	KASSERT(fdvp != NULL);
1187 	KASSERT(fdnode != NULL);
1188 	KASSERT(fcnp != NULL);
1189 	KASSERT(fde_ret != NULL);
1190 	KASSERT(fvp_ret != NULL);
1191 	KASSERT(tdvp != NULL);
1192 	KASSERT(tdnode != NULL);
1193 	KASSERT(tcnp != NULL);
1194 	KASSERT(tde_ret != NULL);
1195 	KASSERT(tvp_ret != NULL);
1196 	KASSERT(fdnode->tn_vnode == fdvp);
1197 	KASSERT(tdnode->tn_vnode == tdvp);
1198 	KASSERT(fdnode->tn_type == VDIR);
1199 	KASSERT(tdnode->tn_type == VDIR);
1200 
1201 	if (fdvp == tdvp) {
1202 		KASSERT(fdnode == tdnode);
1203 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1204 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1205 	} else {
1206 		KASSERT(fdnode != tdnode);
1207 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1208 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1209 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1210 	}
1211 
1212 	if (error)
1213 		return error;
1214 
1215 	KASSERT(*fde_ret != NULL);
1216 	KASSERT(*fvp_ret != NULL);
1217 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1218 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1219 	KASSERT((*tde_ret == NULL) ||
1220 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1221 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1222 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1223 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1224 	KASSERT((*tvp_ret == NULL) ||
1225 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1226 	KASSERT(*fvp_ret != fdvp);
1227 	KASSERT(*fvp_ret != tdvp);
1228 	KASSERT(*tvp_ret != fdvp);
1229 	KASSERT(*tvp_ret != tdvp);
1230 	return 0;
1231 }
1232 
1233 /*
1234  * Lock and look up with a common source/target directory.
1235  */
1236 static int
1237 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1238     kauth_cred_t cred,
1239     struct vnode *dvp, struct tmpfs_node *dnode,
1240     struct componentname *fcnp,
1241     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1242     struct componentname *tcnp,
1243     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1244 {
1245 	struct tmpfs_dirent *fde, *tde;
1246 	struct vnode *fvp, *tvp;
1247 	int error;
1248 
1249 	error = tmpfs_rename_lock_directory(dvp, dnode);
1250 	if (error)
1251 		goto fail0;
1252 
1253 	/* Did we lose a race with mount?  */
1254 	if (dvp->v_mountedhere != NULL) {
1255 		error = EBUSY;
1256 		goto fail1;
1257 	}
1258 
1259 	/* Make sure the caller may read the directory.  */
1260 	error = VOP_ACCESS(dvp, VEXEC, cred);
1261 	if (error)
1262 		goto fail1;
1263 
1264 	/*
1265 	 * The order in which we lock the source and target nodes is
1266 	 * irrelevant because there can only be one rename on this
1267 	 * directory in flight at a time, and we have it locked.
1268 	 */
1269 
1270 	fde = tmpfs_dir_lookup(dnode, fcnp);
1271 	if (fde == NULL) {
1272 		error = ENOENT;
1273 		goto fail1;
1274 	}
1275 
1276 	KASSERT(fde->td_node != NULL);
1277 	/* We ruled out `.' earlier.  */
1278 	KASSERT(fde->td_node != dnode);
1279 	/* We ruled out `..' earlier.  */
1280 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1281 	mutex_enter(&fde->td_node->tn_vlock);
1282 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1283 	if (error)
1284 		goto fail1;
1285 	KASSERT(fvp != NULL);
1286 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1287 	KASSERT(fvp != dvp);
1288 	KASSERT(fvp->v_mount == mount);
1289 
1290 	/* Refuse to rename a mount point.  */
1291 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1292 		error = EBUSY;
1293 		goto fail2;
1294 	}
1295 
1296 	tde = tmpfs_dir_lookup(dnode, tcnp);
1297 	if (tde == NULL) {
1298 		tvp = NULL;
1299 	} else {
1300 		KASSERT(tde->td_node != NULL);
1301 		/* We ruled out `.' earlier.  */
1302 		KASSERT(tde->td_node != dnode);
1303 		/* We ruled out `..' earlier.  */
1304 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1305 		if (tde->td_node != fde->td_node) {
1306 			mutex_enter(&tde->td_node->tn_vlock);
1307 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1308 			if (error)
1309 				goto fail2;
1310 			KASSERT(tvp->v_mount == mount);
1311 			/* Refuse to rename over a mount point.  */
1312 			if ((tvp->v_type == VDIR) &&
1313 			    (tvp->v_mountedhere != NULL)) {
1314 				error = EBUSY;
1315 				goto fail3;
1316 			}
1317 		} else {
1318 			tvp = fvp;
1319 			vref(tvp);
1320 		}
1321 		KASSERT(tvp != NULL);
1322 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1323 	}
1324 	KASSERT(tvp != dvp);
1325 
1326 	*fde_ret = fde;
1327 	*fvp_ret = fvp;
1328 	*tde_ret = tde;
1329 	*tvp_ret = tvp;
1330 	return 0;
1331 
1332 fail3:	if (tvp != NULL) {
1333 		if (tvp != fvp)
1334 			vput(tvp);
1335 		else
1336 			vrele(tvp);
1337 	}
1338 
1339 fail2:	vput(fvp);
1340 fail1:	VOP_UNLOCK(dvp);
1341 fail0:	return error;
1342 }
1343 
1344 /*
1345  * Lock and look up with separate source and target directories.
1346  */
1347 static int
1348 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1349     kauth_cred_t cred,
1350     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1351     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1352     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1353     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1354 {
1355 	struct tmpfs_node *intermediate_node;
1356 	struct tmpfs_dirent *fde, *tde;
1357 	struct vnode *fvp, *tvp;
1358 	int error;
1359 
1360 	KASSERT(fdvp != tdvp);
1361 	KASSERT(fdnode != tdnode);
1362 
1363 #if 0				/* XXX */
1364 	mutex_enter(&tmpfs->tm_rename_lock);
1365 #endif
1366 
1367 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1368 	if (error)
1369 		goto fail;
1370 
1371 	/*
1372 	 * intermediate_node == NULL means fdnode is not an ancestor of
1373 	 * tdnode.
1374 	 */
1375 	if (intermediate_node == NULL)
1376 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1377 		    tdvp, tdnode, tcnp, true, &tde, &tvp,
1378 		    fdvp, fdnode, fcnp, false, &fde, &fvp);
1379 	else
1380 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1381 		    fdvp, fdnode, fcnp, false, &fde, &fvp,
1382 		    tdvp, tdnode, tcnp, true, &tde, &tvp);
1383 	if (error)
1384 		goto fail;
1385 
1386 	KASSERT(fde != NULL);
1387 	KASSERT(fde->td_node != NULL);
1388 
1389 	/*
1390 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1391 	 */
1392 	if (fde->td_node == intermediate_node) {
1393 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1394 		return EINVAL;
1395 	}
1396 
1397 	*fde_ret = fde;
1398 	*fvp_ret = fvp;
1399 	*tde_ret = tde;
1400 	*tvp_ret = tvp;
1401 	return 0;
1402 
1403 fail:
1404 #if 0				/* XXX */
1405 	mutex_exit(&tmpfs->tm_rename_lock);
1406 #endif
1407 	return error;
1408 }
1409 
1410 /*
1411  * Unlock everything we locked for rename.
1412  *
1413  * fdvp and tdvp must be referenced.
1414  *
1415  * On entry, everything is locked, and fvp and tvp referenced.
1416  *
1417  * On exit, everything is unlocked, and fvp and tvp are released.
1418  */
1419 static void
1420 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1421     struct vnode *fdvp, struct vnode *fvp,
1422     struct vnode *tdvp, struct vnode *tvp)
1423 {
1424 
1425 	KASSERT(tmpfs != NULL);
1426 	KASSERT(fdvp != NULL);
1427 	KASSERT(fvp != NULL);
1428 	KASSERT(fdvp != fvp);
1429 	KASSERT(fdvp != tvp);
1430 	KASSERT(tdvp != tvp);
1431 	KASSERT(tdvp != fvp);
1432 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1433 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1434 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1435 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1436 
1437 	if (tvp != NULL) {
1438 		if (tvp != fvp)
1439 			vput(tvp);
1440 		else
1441 			vrele(tvp);
1442 	}
1443 	VOP_UNLOCK(tdvp);
1444 	vput(fvp);
1445 	if (fdvp != tdvp)
1446 		VOP_UNLOCK(fdvp);
1447 
1448 #if 0				/* XXX */
1449 	if (fdvp != tdvp)
1450 		mutex_exit(&tmpfs->tm_rename_lock);
1451 #endif
1452 }
1453 
1454 /*
1455  * Lock a directory, but fail if it has been rmdir'd.
1456  *
1457  * vp must be referenced.
1458  */
1459 static int
1460 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1461 {
1462 
1463 	KASSERT(vp != NULL);
1464 	KASSERT(node != NULL);
1465 	KASSERT(node->tn_vnode == vp);
1466 	KASSERT(node->tn_type == VDIR);
1467 
1468 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1469 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1470 		VOP_UNLOCK(vp);
1471 		return ENOENT;
1472 	}
1473 
1474 	return 0;
1475 }
1476 
1477 /*
1478  * Analyze the genealogy of the source and target nodes.
1479  *
1480  * On success, stores in *intermediate_node_ret either the child of
1481  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1482  * descendant of fdnode at all.
1483  *
1484  * fdnode and tdnode must be unlocked and referenced.  The file
1485  * system's rename lock must also be held, to exclude concurrent
1486  * changes to the file system's genealogy other than rmdir.
1487  *
1488  * XXX This causes an extra lock/unlock of tdnode in the case when
1489  * we're just about to lock it again before locking anything else.
1490  * However, changing that requires reorganizing the code to make it
1491  * even more horrifically obscure.
1492  */
1493 static int
1494 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1495     struct tmpfs_node **intermediate_node_ret)
1496 {
1497 	struct tmpfs_node *node = tdnode, *parent;
1498 	int error;
1499 
1500 	KASSERT(fdnode != NULL);
1501 	KASSERT(tdnode != NULL);
1502 	KASSERT(fdnode != tdnode);
1503 	KASSERT(intermediate_node_ret != NULL);
1504 
1505 	KASSERT(fdnode->tn_vnode != NULL);
1506 	KASSERT(tdnode->tn_vnode != NULL);
1507 	KASSERT(fdnode->tn_type == VDIR);
1508 	KASSERT(tdnode->tn_type == VDIR);
1509 
1510 	/*
1511 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1512 	 * from deleting it -- or any ancestor -- at an inopportune
1513 	 * moment.
1514 	 */
1515 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1516 	if (error)
1517 		return error;
1518 
1519 	for (;;) {
1520 		parent = node->tn_spec.tn_dir.tn_parent;
1521 		KASSERT(parent != NULL);
1522 		KASSERT(parent->tn_type == VDIR);
1523 
1524 		/* Did we hit the root without finding fdnode?  */
1525 		if (parent == node) {
1526 			*intermediate_node_ret = NULL;
1527 			break;
1528 		}
1529 
1530 		/* Did we find that fdnode is an ancestor?  */
1531 		if (parent == fdnode) {
1532 			*intermediate_node_ret = node;
1533 			break;
1534 		}
1535 
1536 		/* Neither -- keep ascending the family tree.  */
1537 		node = parent;
1538 	}
1539 
1540 	VOP_UNLOCK(tdnode->tn_vnode);
1541 	return 0;
1542 }
1543 
1544 /*
1545  * Lock directories a and b, which must be distinct, and look up and
1546  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1547  * an ancestor of directory a, although directory a may be an ancestor
1548  * of directory b.  Fail with overlap_error if node a is directory b.
1549  * Neither componentname may be `.' or `..'.
1550  *
1551  * a_dvp and b_dvp must be referenced.
1552  *
1553  * On entry, a_dvp and b_dvp are unlocked.
1554  *
1555  * On success,
1556  * . a_dvp and b_dvp are locked,
1557  * . *a_dirent_ret is filled with a directory entry whose node is
1558  *     locked and referenced,
1559  * . *b_vp_ret is filled with the corresponding vnode,
1560  * . *b_dirent_ret is filled either with null or with a directory entry
1561  *     whose node is locked and referenced,
1562  * . *b_vp is filled either with null or with the corresponding vnode,
1563  *     and
1564  * . the only pair of vnodes that may be identical is a_vp and b_vp.
1565  *
1566  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
1567  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
1568  */
1569 static int
1570 tmpfs_rename_lock(struct mount *mount, kauth_cred_t cred, int overlap_error,
1571     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
1572     struct componentname *a_cnp, bool a_missing_ok,
1573     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
1574     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
1575     struct componentname *b_cnp, bool b_missing_ok,
1576     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
1577 {
1578 	struct tmpfs_dirent *a_dirent, *b_dirent;
1579 	struct vnode *a_vp, *b_vp;
1580 	int error;
1581 
1582 	KASSERT(a_dvp != NULL);
1583 	KASSERT(a_dnode != NULL);
1584 	KASSERT(a_cnp != NULL);
1585 	KASSERT(a_dirent_ret != NULL);
1586 	KASSERT(a_vp_ret != NULL);
1587 	KASSERT(b_dvp != NULL);
1588 	KASSERT(b_dnode != NULL);
1589 	KASSERT(b_cnp != NULL);
1590 	KASSERT(b_dirent_ret != NULL);
1591 	KASSERT(b_vp_ret != NULL);
1592 	KASSERT(a_dvp != b_dvp);
1593 	KASSERT(a_dnode != b_dnode);
1594 	KASSERT(a_dnode->tn_vnode == a_dvp);
1595 	KASSERT(b_dnode->tn_vnode == b_dvp);
1596 	KASSERT(a_dnode->tn_type == VDIR);
1597 	KASSERT(b_dnode->tn_type == VDIR);
1598 	KASSERT(a_missing_ok != b_missing_ok);
1599 
1600 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
1601 	if (error)
1602 		goto fail0;
1603 
1604 	/* Did we lose a race with mount?  */
1605 	if (a_dvp->v_mountedhere != NULL) {
1606 		error = EBUSY;
1607 		goto fail1;
1608 	}
1609 
1610 	/* Make sure the caller may read the directory.  */
1611 	error = VOP_ACCESS(a_dvp, VEXEC, cred);
1612 	if (error)
1613 		goto fail1;
1614 
1615 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
1616 	if (a_dirent != NULL) {
1617 		KASSERT(a_dirent->td_node != NULL);
1618 		/* We ruled out `.' earlier.  */
1619 		KASSERT(a_dirent->td_node != a_dnode);
1620 		/* We ruled out `..' earlier.  */
1621 		KASSERT(a_dirent->td_node !=
1622 		    a_dnode->tn_spec.tn_dir.tn_parent);
1623 		if (a_dirent->td_node == b_dnode) {
1624 			error = overlap_error;
1625 			goto fail1;
1626 		}
1627 		mutex_enter(&a_dirent->td_node->tn_vlock);
1628 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
1629 		if (error)
1630 			goto fail1;
1631 		KASSERT(a_vp->v_mount == mount);
1632 		/* Refuse to rename (over) a mount point.  */
1633 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
1634 			error = EBUSY;
1635 			goto fail2;
1636 		}
1637 	} else if (!a_missing_ok) {
1638 		error = ENOENT;
1639 		goto fail1;
1640 	} else {
1641 		a_vp = NULL;
1642 	}
1643 	KASSERT(a_vp != a_dvp);
1644 	KASSERT(a_vp != b_dvp);
1645 
1646 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
1647 	if (error)
1648 		goto fail2;
1649 
1650 	/* Did we lose a race with mount?  */
1651 	if (b_dvp->v_mountedhere != NULL) {
1652 		error = EBUSY;
1653 		goto fail3;
1654 	}
1655 
1656 	/* Make sure the caller may read the directory.  */
1657 	error = VOP_ACCESS(b_dvp, VEXEC, cred);
1658 	if (error)
1659 		goto fail3;
1660 
1661 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
1662 	if (b_dirent != NULL) {
1663 		KASSERT(b_dirent->td_node != NULL);
1664 		/* We ruled out `.' earlier.  */
1665 		KASSERT(b_dirent->td_node != b_dnode);
1666 		/* We ruled out `..' earlier.  */
1667 		KASSERT(b_dirent->td_node !=
1668 		    b_dnode->tn_spec.tn_dir.tn_parent);
1669 		/* b is not an ancestor of a.  */
1670 		KASSERT(b_dirent->td_node != a_dnode);
1671 		/* But the source and target nodes might be the same.  */
1672 		if ((a_dirent == NULL) ||
1673 		    (a_dirent->td_node != b_dirent->td_node)) {
1674 			mutex_enter(&b_dirent->td_node->tn_vlock);
1675 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
1676 			    &b_vp);
1677 			if (error)
1678 				goto fail3;
1679 			KASSERT(b_vp->v_mount == mount);
1680 			KASSERT(a_vp != b_vp);
1681 			/* Refuse to rename (over) a mount point.  */
1682 			if ((b_vp->v_type == VDIR) &&
1683 			    (b_vp->v_mountedhere != NULL)) {
1684 				error = EBUSY;
1685 				goto fail4;
1686 			}
1687 		} else {
1688 			b_vp = a_vp;
1689 			vref(b_vp);
1690 		}
1691 	} else if (!b_missing_ok) {
1692 		error = ENOENT;
1693 		goto fail3;
1694 	} else {
1695 		b_vp = NULL;
1696 	}
1697 	KASSERT(b_vp != a_dvp);
1698 	KASSERT(b_vp != b_dvp);
1699 
1700 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
1701 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
1702 	KASSERT(a_missing_ok || (a_dirent != NULL));
1703 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
1704 	KASSERT(b_missing_ok || (b_dirent != NULL));
1705 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
1706 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
1707 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
1708 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
1709 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
1710 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
1711 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
1712 
1713 	*a_dirent_ret = a_dirent;
1714 	*b_dirent_ret = b_dirent;
1715 	*a_vp_ret = a_vp;
1716 	*b_vp_ret = b_vp;
1717 	return 0;
1718 
1719 fail4:	if (b_vp != NULL) {
1720 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
1721 		if (b_vp != a_vp)
1722 			vput(b_vp);
1723 		else
1724 			vrele(a_vp);
1725 	}
1726 
1727 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
1728 	VOP_UNLOCK(b_dvp);
1729 
1730 fail2:	if (a_vp != NULL) {
1731 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
1732 		vput(a_vp);
1733 	}
1734 
1735 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
1736 	VOP_UNLOCK(a_dvp);
1737 
1738 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
1739 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
1740 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
1741 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
1742 	return error;
1743 }
1744 
1745 /*
1746  * Shuffle the directory entries to move fvp from the directory fdvp
1747  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
1748  * we are overwriting a target node, it is tvp, and tde is its
1749  * directory entry in tdvp.
1750  *
1751  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
1752  */
1753 static void
1754 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
1755     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
1756     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
1757 {
1758 
1759 	KASSERT(tmpfs != NULL);
1760 	KASSERT(fdvp != NULL);
1761 	KASSERT(fde != NULL);
1762 	KASSERT(fvp != NULL);
1763 	KASSERT(tdvp != NULL);
1764 	KASSERT(fde->td_node != NULL);
1765 	KASSERT(fde->td_node->tn_vnode == fvp);
1766 	KASSERT((tde == NULL) == (tvp == NULL));
1767 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1768 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1769 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1770 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1771 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1772 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1773 
1774 	/*
1775 	 * If we are moving from one directory to another, detach the
1776 	 * source entry and reattach it to the target directory.
1777 	 */
1778 	if (fdvp != tdvp) {
1779 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
1780 		struct tmpfs_node *fnode = fde->td_node;
1781 		tmpfs_dir_detach(fdvp, fde);
1782 		tmpfs_dir_attach(tdvp, fde, fnode);
1783 	} else if (tvp == NULL) {
1784 		/*
1785 		 * We are changing the directory.  tmpfs_dir_attach and
1786 		 * tmpfs_dir_detach note the events for us, but for
1787 		 * this case we don't call them, so we must note the
1788 		 * event explicitly.
1789 		 */
1790 		VN_KNOTE(fdvp, NOTE_WRITE);
1791 	}
1792 
1793 	/*
1794 	 * If we are replacing an existing target entry, delete it.
1795 	 */
1796 	if (tde != NULL) {
1797 		KASSERT(tvp != NULL);
1798 		KASSERT(tde->td_node != NULL);
1799 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1800 		if (tde->td_node->tn_type == VDIR) {
1801 			KASSERT(tde->td_node->tn_size == 0);
1802 			KASSERT(tde->td_node->tn_links == 2);
1803 			/* Decrement the extra link count for `.' so
1804 			 * the vnode will be recycled when released.  */
1805 			tde->td_node->tn_links--;
1806 		}
1807 		tmpfs_dir_detach(tdvp, tde);
1808 		tmpfs_free_dirent(tmpfs, tde);
1809 	}
1810 }
1811 
1812 /*
1813  * Remove the entry de for the non-directory vp from the directory dvp.
1814  *
1815  * Everything must be locked and referenced.
1816  */
1817 static int
1818 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
1819     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
1820     kauth_cred_t cred)
1821 {
1822 	int error;
1823 
1824 	KASSERT(tmpfs != NULL);
1825 	KASSERT(dvp != NULL);
1826 	KASSERT(dnode != NULL);
1827 	KASSERT(de != NULL);
1828 	KASSERT(vp != NULL);
1829 	KASSERT(dnode->tn_vnode == dvp);
1830 	KASSERT(de->td_node != NULL);
1831 	KASSERT(de->td_node->tn_vnode == vp);
1832 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
1833 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
1834 
1835 	error = tmpfs_remove_check_possible(dnode, de->td_node);
1836 	if (error)
1837 		return error;
1838 
1839 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
1840 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp,
1841 	    error);
1842 	if (error)
1843 		return error;
1844 
1845 	tmpfs_dir_detach(dvp, de);
1846 	tmpfs_free_dirent(tmpfs, de);
1847 
1848 	return 0;
1849 }
1850 
1851 /*
1852  * Check whether a rename is possible independent of credentials.
1853  *
1854  * Everything must be locked and referenced.
1855  */
1856 static int
1857 tmpfs_rename_check_possible(
1858     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
1859     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
1860 {
1861 
1862 	KASSERT(fdnode != NULL);
1863 	KASSERT(fnode != NULL);
1864 	KASSERT(tdnode != NULL);
1865 	KASSERT(fdnode != fnode);
1866 	KASSERT(tdnode != tnode);
1867 	KASSERT(fnode != tnode);
1868 	KASSERT(fdnode->tn_vnode != NULL);
1869 	KASSERT(fnode->tn_vnode != NULL);
1870 	KASSERT(tdnode->tn_vnode != NULL);
1871 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
1872 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
1873 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
1874 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
1875 	KASSERT((tnode == NULL) ||
1876 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
1877 
1878 	/*
1879 	 * If fdnode is immutable, we can't write to it.  If fdnode is
1880 	 * append-only, the only change we can make is to add entries
1881 	 * to it.  If fnode is immutable, we can't change the links to
1882 	 * it.  If fnode is append-only...well, this is what UFS does.
1883 	 */
1884 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
1885 		return EPERM;
1886 
1887 	/*
1888 	 * If tdnode is immutable, we can't write to it.  If tdnode is
1889 	 * append-only, we can add entries, but we can't change
1890 	 * existing entries.
1891 	 */
1892 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
1893 		return EPERM;
1894 
1895 	/*
1896 	 * If tnode is immutable, we can't replace links to it.  If
1897 	 * tnode is append-only...well, this is what UFS does.
1898 	 */
1899 	if (tnode != NULL) {
1900 		KASSERT(tnode != NULL);
1901 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
1902 			return EPERM;
1903 	}
1904 
1905 	return 0;
1906 }
1907 
1908 /*
1909  * Check whether a rename is permitted given our credentials.
1910  *
1911  * Everything must be locked and referenced.
1912  */
1913 static int
1914 tmpfs_rename_check_permitted(kauth_cred_t cred,
1915     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
1916     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
1917 {
1918 	int error;
1919 
1920 	KASSERT(fdnode != NULL);
1921 	KASSERT(fnode != NULL);
1922 	KASSERT(tdnode != NULL);
1923 	KASSERT(fdnode != fnode);
1924 	KASSERT(tdnode != tnode);
1925 	KASSERT(fnode != tnode);
1926 	KASSERT(fdnode->tn_vnode != NULL);
1927 	KASSERT(fnode->tn_vnode != NULL);
1928 	KASSERT(tdnode->tn_vnode != NULL);
1929 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
1930 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
1931 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
1932 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
1933 	KASSERT((tnode == NULL) ||
1934 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
1935 
1936 	/*
1937 	 * We need to remove or change an entry in the source directory.
1938 	 */
1939 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred);
1940 	if (error)
1941 		return error;
1942 
1943 	/*
1944 	 * If we are changing directories, then we need to write to the
1945 	 * target directory to add or change an entry.  Also, if fnode
1946 	 * is a directory, we need to write to it to change its `..'
1947 	 * entry.
1948 	 */
1949 	if (fdnode != tdnode) {
1950 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred);
1951 		if (error)
1952 			return error;
1953 		if (fnode->tn_type == VDIR) {
1954 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred);
1955 			if (error)
1956 				return error;
1957 		}
1958 	}
1959 
1960 	error = tmpfs_check_sticky(cred, fdnode, fnode);
1961 	if (error)
1962 		return error;
1963 
1964 	error = tmpfs_check_sticky(cred, tdnode, tnode);
1965 	if (error)
1966 		return error;
1967 
1968 	return 0;
1969 }
1970 
1971 /*
1972  * Check whether removing node's entry in dnode is possible independent
1973  * of credentials.
1974  *
1975  * Everything must be locked and referenced.
1976  */
1977 static int
1978 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
1979 {
1980 
1981 	KASSERT(dnode != NULL);
1982 	KASSERT(dnode->tn_vnode != NULL);
1983 	KASSERT(node != NULL);
1984 	KASSERT(dnode != node);
1985 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
1986 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
1987 
1988 	/*
1989 	 * We want to delete the entry.  If dnode is immutable, we
1990 	 * can't write to it to delete the entry.  If dnode is
1991 	 * append-only, the only change we can make is to add entries,
1992 	 * so we can't delete entries.  If node is immutable, we can't
1993 	 * change the links to it, so we can't delete the entry.  If
1994 	 * node is append-only...well, this is what UFS does.
1995 	 */
1996 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
1997 		return EPERM;
1998 
1999 	return 0;
2000 }
2001 
2002 /*
2003  * Check whether removing node's entry in dnode is permitted given our
2004  * credentials.
2005  *
2006  * Everything must be locked and referenced.
2007  */
2008 static int
2009 tmpfs_remove_check_permitted(kauth_cred_t cred,
2010     struct tmpfs_node *dnode, struct tmpfs_node *node)
2011 {
2012 	int error;
2013 
2014 	KASSERT(dnode != NULL);
2015 	KASSERT(dnode->tn_vnode != NULL);
2016 	KASSERT(node != NULL);
2017 	KASSERT(dnode != node);
2018 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2019 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2020 
2021 	/*
2022 	 * Check whether we are permitted to write to the source
2023 	 * directory in order to delete an entry from it.
2024 	 */
2025 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred);
2026 	if (error)
2027 		return error;
2028 
2029 	error = tmpfs_check_sticky(cred, dnode, node);
2030 	if (error)
2031 		return error;
2032 
2033 	return 0;
2034 }
2035 
2036 /*
2037  * Check whether we may change an entry in a sticky directory.  If the
2038  * directory is sticky, the user must own either the directory or, if
2039  * it exists, the node, in order to change the entry.
2040  *
2041  * Everything must be locked and referenced.
2042  */
2043 static int
2044 tmpfs_check_sticky(kauth_cred_t cred,
2045     struct tmpfs_node *dnode, struct tmpfs_node *node)
2046 {
2047 
2048 	KASSERT(dnode != NULL);
2049 	KASSERT(dnode->tn_vnode != NULL);
2050 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2051 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2052 	KASSERT((node == NULL) ||
2053 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2054 
2055 	if (dnode->tn_mode & S_ISTXT) {
2056 		uid_t euid = kauth_cred_geteuid(cred);
2057 		if (euid == dnode->tn_uid)
2058 			return 0;
2059 		if ((node == NULL) || (euid == node->tn_uid))
2060 			return 0;
2061 		return EPERM;
2062 	}
2063 
2064 	return 0;
2065 }
2066 
2067 int
2068 tmpfs_mkdir(void *v)
2069 {
2070 	struct vop_mkdir_args /* {
2071 		struct vnode		*a_dvp;
2072 		struct vnode		**a_vpp;
2073 		struct componentname	*a_cnp;
2074 		struct vattr		*a_vap;
2075 	} */ *ap = v;
2076 	vnode_t *dvp = ap->a_dvp;
2077 	vnode_t **vpp = ap->a_vpp;
2078 	struct componentname *cnp = ap->a_cnp;
2079 	struct vattr *vap = ap->a_vap;
2080 
2081 	KASSERT(vap->va_type == VDIR);
2082 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
2083 }
2084 
2085 int
2086 tmpfs_rmdir(void *v)
2087 {
2088 	struct vop_rmdir_args /* {
2089 		struct vnode		*a_dvp;
2090 		struct vnode		*a_vp;
2091 		struct componentname	*a_cnp;
2092 	} */ *ap = v;
2093 	vnode_t *dvp = ap->a_dvp;
2094 	vnode_t *vp = ap->a_vp;
2095 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
2096 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
2097 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
2098 	tmpfs_dirent_t *de;
2099 	int error = 0;
2100 
2101 	KASSERT(VOP_ISLOCKED(dvp));
2102 	KASSERT(VOP_ISLOCKED(vp));
2103 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
2104 
2105 	/*
2106 	 * Directories with more than two entries ('.' and '..') cannot
2107 	 * be removed.
2108 	 */
2109 	if (node->tn_size > 0) {
2110 		error = ENOTEMPTY;
2111 		goto out;
2112 	}
2113 
2114 	/* Lookup the directory entry (check the cached hint first). */
2115 	de = tmpfs_dir_cached(node);
2116 	if (de == NULL) {
2117 		struct componentname *cnp = ap->a_cnp;
2118 		de = tmpfs_dir_lookup(dnode, cnp);
2119 	}
2120 	KASSERT(de && de->td_node == node);
2121 
2122 	/* Check flags to see if we are allowed to remove the directory. */
2123 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
2124 		error = EPERM;
2125 		goto out;
2126 	}
2127 
2128 	/* Decrement the link count for the virtual '.' entry. */
2129 	node->tn_links--;
2130 	node->tn_status |= TMPFS_NODE_STATUSALL;
2131 
2132 	/* Detach the directory entry from the directory. */
2133 	tmpfs_dir_detach(dvp, de);
2134 
2135 	/* Purge the cache for parent. */
2136 	cache_purge(dvp);
2137 
2138 	/*
2139 	 * Destroy the directory entry.  Note: the inode referred by it
2140 	 * will not be destroyed until the vnode is reclaimed.
2141 	 */
2142 	tmpfs_free_dirent(tmp, de);
2143 	KASSERT(node->tn_links == 0);
2144 out:
2145 	/* Release the nodes. */
2146 	vput(dvp);
2147 	vput(vp);
2148 	return error;
2149 }
2150 
2151 int
2152 tmpfs_symlink(void *v)
2153 {
2154 	struct vop_symlink_args /* {
2155 		struct vnode		*a_dvp;
2156 		struct vnode		**a_vpp;
2157 		struct componentname	*a_cnp;
2158 		struct vattr		*a_vap;
2159 		char			*a_target;
2160 	} */ *ap = v;
2161 	vnode_t *dvp = ap->a_dvp;
2162 	vnode_t **vpp = ap->a_vpp;
2163 	struct componentname *cnp = ap->a_cnp;
2164 	struct vattr *vap = ap->a_vap;
2165 	char *target = ap->a_target;
2166 
2167 	KASSERT(vap->va_type == VLNK);
2168 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
2169 }
2170 
2171 int
2172 tmpfs_readdir(void *v)
2173 {
2174 	struct vop_readdir_args /* {
2175 		struct vnode	*a_vp;
2176 		struct uio	*a_uio;
2177 		kauth_cred_t	a_cred;
2178 		int		*a_eofflag;
2179 		off_t		**a_cookies;
2180 		int		*ncookies;
2181 	} */ *ap = v;
2182 	vnode_t *vp = ap->a_vp;
2183 	struct uio *uio = ap->a_uio;
2184 	int *eofflag = ap->a_eofflag;
2185 	off_t **cookies = ap->a_cookies;
2186 	int *ncookies = ap->a_ncookies;
2187 	off_t startoff, cnt;
2188 	tmpfs_node_t *node;
2189 	int error;
2190 
2191 	KASSERT(VOP_ISLOCKED(vp));
2192 
2193 	/* This operation only makes sense on directory nodes. */
2194 	if (vp->v_type != VDIR) {
2195 		return ENOTDIR;
2196 	}
2197 	node = VP_TO_TMPFS_DIR(vp);
2198 	startoff = uio->uio_offset;
2199 	cnt = 0;
2200 
2201 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
2202 		error = tmpfs_dir_getdotdent(node, uio);
2203 		if (error != 0) {
2204 			if (error == -1)
2205 				error = 0;
2206 			goto out;
2207 		}
2208 		cnt++;
2209 	}
2210 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
2211 		error = tmpfs_dir_getdotdotdent(node, uio);
2212 		if (error != 0) {
2213 			if (error == -1)
2214 				error = 0;
2215 			goto out;
2216 		}
2217 		cnt++;
2218 	}
2219 	error = tmpfs_dir_getdents(node, uio, &cnt);
2220 	if (error == -1) {
2221 		error = 0;
2222 	}
2223 	KASSERT(error >= 0);
2224 out:
2225 	if (eofflag != NULL) {
2226 		*eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
2227 	}
2228 	if (error || cookies == NULL || ncookies == NULL) {
2229 		return error;
2230 	}
2231 
2232 	/* Update NFS-related variables, if any. */
2233 	off_t i, off = startoff;
2234 	tmpfs_dirent_t *de = NULL;
2235 
2236 	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
2237 	*ncookies = cnt;
2238 
2239 	for (i = 0; i < cnt; i++) {
2240 		KASSERT(off != TMPFS_DIRCOOKIE_EOF);
2241 		if (off != TMPFS_DIRCOOKIE_DOT) {
2242 			if (off == TMPFS_DIRCOOKIE_DOTDOT) {
2243 				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
2244 			} else if (de != NULL) {
2245 				de = TAILQ_NEXT(de, td_entries);
2246 			} else {
2247 				de = tmpfs_dir_lookupbycookie(node, off);
2248 				KASSERT(de != NULL);
2249 				de = TAILQ_NEXT(de, td_entries);
2250 			}
2251 			if (de == NULL) {
2252 				off = TMPFS_DIRCOOKIE_EOF;
2253 			} else {
2254 				off = tmpfs_dircookie(de);
2255 			}
2256 		} else {
2257 			off = TMPFS_DIRCOOKIE_DOTDOT;
2258 		}
2259 		(*cookies)[i] = off;
2260 	}
2261 	KASSERT(uio->uio_offset == off);
2262 	return error;
2263 }
2264 
2265 int
2266 tmpfs_readlink(void *v)
2267 {
2268 	struct vop_readlink_args /* {
2269 		struct vnode	*a_vp;
2270 		struct uio	*a_uio;
2271 		kauth_cred_t	a_cred;
2272 	} */ *ap = v;
2273 	vnode_t *vp = ap->a_vp;
2274 	struct uio *uio = ap->a_uio;
2275 	tmpfs_node_t *node;
2276 	int error;
2277 
2278 	KASSERT(VOP_ISLOCKED(vp));
2279 	KASSERT(uio->uio_offset == 0);
2280 	KASSERT(vp->v_type == VLNK);
2281 
2282 	node = VP_TO_TMPFS_NODE(vp);
2283 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
2284 	    MIN(node->tn_size, uio->uio_resid), uio);
2285 	node->tn_status |= TMPFS_NODE_ACCESSED;
2286 
2287 	return error;
2288 }
2289 
2290 int
2291 tmpfs_inactive(void *v)
2292 {
2293 	struct vop_inactive_args /* {
2294 		struct vnode *a_vp;
2295 		bool *a_recycle;
2296 	} */ *ap = v;
2297 	vnode_t *vp = ap->a_vp;
2298 	tmpfs_node_t *node;
2299 
2300 	KASSERT(VOP_ISLOCKED(vp));
2301 
2302 	node = VP_TO_TMPFS_NODE(vp);
2303 	*ap->a_recycle = (node->tn_links == 0);
2304 	VOP_UNLOCK(vp);
2305 
2306 	return 0;
2307 }
2308 
2309 int
2310 tmpfs_reclaim(void *v)
2311 {
2312 	struct vop_reclaim_args /* {
2313 		struct vnode *a_vp;
2314 	} */ *ap = v;
2315 	vnode_t *vp = ap->a_vp;
2316 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
2317 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2318 	bool racing;
2319 
2320 	/* Disassociate inode from vnode. */
2321 	mutex_enter(&node->tn_vlock);
2322 	node->tn_vnode = NULL;
2323 	vp->v_data = NULL;
2324 	/* Check if tmpfs_vnode_get() is racing with us. */
2325 	racing = TMPFS_NODE_RECLAIMING(node);
2326 	mutex_exit(&node->tn_vlock);
2327 
2328 	/*
2329 	 * If inode is not referenced, i.e. no links, then destroy it.
2330 	 * Note: if racing - inode is about to get a new vnode, leave it.
2331 	 */
2332 	if (node->tn_links == 0 && !racing) {
2333 		tmpfs_free_node(tmp, node);
2334 	}
2335 	return 0;
2336 }
2337 
2338 int
2339 tmpfs_pathconf(void *v)
2340 {
2341 	struct vop_pathconf_args /* {
2342 		struct vnode	*a_vp;
2343 		int		a_name;
2344 		register_t	*a_retval;
2345 	} */ *ap = v;
2346 	const int name = ap->a_name;
2347 	register_t *retval = ap->a_retval;
2348 	int error = 0;
2349 
2350 	switch (name) {
2351 	case _PC_LINK_MAX:
2352 		*retval = LINK_MAX;
2353 		break;
2354 	case _PC_NAME_MAX:
2355 		*retval = NAME_MAX;
2356 		break;
2357 	case _PC_PATH_MAX:
2358 		*retval = PATH_MAX;
2359 		break;
2360 	case _PC_PIPE_BUF:
2361 		*retval = PIPE_BUF;
2362 		break;
2363 	case _PC_CHOWN_RESTRICTED:
2364 		*retval = 1;
2365 		break;
2366 	case _PC_NO_TRUNC:
2367 		*retval = 1;
2368 		break;
2369 	case _PC_SYNC_IO:
2370 		*retval = 1;
2371 		break;
2372 	case _PC_FILESIZEBITS:
2373 		*retval = sizeof(off_t) * CHAR_BIT;
2374 		break;
2375 	default:
2376 		error = EINVAL;
2377 	}
2378 	return error;
2379 }
2380 
2381 int
2382 tmpfs_advlock(void *v)
2383 {
2384 	struct vop_advlock_args /* {
2385 		struct vnode	*a_vp;
2386 		void *		a_id;
2387 		int		a_op;
2388 		struct flock	*a_fl;
2389 		int		a_flags;
2390 	} */ *ap = v;
2391 	vnode_t *vp = ap->a_vp;
2392 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2393 
2394 	return lf_advlock(v, &node->tn_lockf, node->tn_size);
2395 }
2396 
2397 int
2398 tmpfs_getpages(void *v)
2399 {
2400 	struct vop_getpages_args /* {
2401 		struct vnode *a_vp;
2402 		voff_t a_offset;
2403 		struct vm_page **a_m;
2404 		int *a_count;
2405 		int a_centeridx;
2406 		vm_prot_t a_access_type;
2407 		int a_advice;
2408 		int a_flags;
2409 	} */ * const ap = v;
2410 	vnode_t *vp = ap->a_vp;
2411 	const voff_t offset = ap->a_offset;
2412 	struct vm_page **pgs = ap->a_m;
2413 	const int centeridx = ap->a_centeridx;
2414 	const vm_prot_t access_type = ap->a_access_type;
2415 	const int advice = ap->a_advice;
2416 	const int flags = ap->a_flags;
2417 	int error, npages = *ap->a_count;
2418 	tmpfs_node_t *node;
2419 	struct uvm_object *uobj;
2420 
2421 	KASSERT(vp->v_type == VREG);
2422 	KASSERT(mutex_owned(vp->v_interlock));
2423 
2424 	node = VP_TO_TMPFS_NODE(vp);
2425 	uobj = node->tn_spec.tn_reg.tn_aobj;
2426 
2427 	/*
2428 	 * Currently, PGO_PASTEOF is not supported.
2429 	 */
2430 	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
2431 		if ((flags & PGO_LOCKED) == 0)
2432 			mutex_exit(vp->v_interlock);
2433 		return EINVAL;
2434 	}
2435 
2436 	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
2437 		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
2438 	}
2439 
2440 	if ((flags & PGO_LOCKED) != 0)
2441 		return EBUSY;
2442 
2443 	if ((flags & PGO_NOTIMESTAMP) == 0) {
2444 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
2445 			node->tn_status |= TMPFS_NODE_ACCESSED;
2446 
2447 		if ((access_type & VM_PROT_WRITE) != 0)
2448 			node->tn_status |= TMPFS_NODE_MODIFIED;
2449 	}
2450 
2451 	/*
2452 	 * Invoke the pager.
2453 	 *
2454 	 * Clean the array of pages before.  XXX: PR/32166
2455 	 * Note that vnode lock is shared with underlying UVM object.
2456 	 */
2457 	if (pgs) {
2458 		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
2459 	}
2460 	KASSERT(vp->v_interlock == uobj->vmobjlock);
2461 
2462 	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
2463 	    access_type, advice, flags | PGO_ALLPAGES);
2464 
2465 #if defined(DEBUG)
2466 	if (!error && pgs) {
2467 		for (int i = 0; i < npages; i++) {
2468 			KASSERT(pgs[i] != NULL);
2469 		}
2470 	}
2471 #endif
2472 	return error;
2473 }
2474 
2475 int
2476 tmpfs_putpages(void *v)
2477 {
2478 	struct vop_putpages_args /* {
2479 		struct vnode *a_vp;
2480 		voff_t a_offlo;
2481 		voff_t a_offhi;
2482 		int a_flags;
2483 	} */ * const ap = v;
2484 	vnode_t *vp = ap->a_vp;
2485 	const voff_t offlo = ap->a_offlo;
2486 	const voff_t offhi = ap->a_offhi;
2487 	const int flags = ap->a_flags;
2488 	tmpfs_node_t *node;
2489 	struct uvm_object *uobj;
2490 	int error;
2491 
2492 	KASSERT(mutex_owned(vp->v_interlock));
2493 
2494 	if (vp->v_type != VREG) {
2495 		mutex_exit(vp->v_interlock);
2496 		return 0;
2497 	}
2498 
2499 	node = VP_TO_TMPFS_NODE(vp);
2500 	uobj = node->tn_spec.tn_reg.tn_aobj;
2501 
2502 	KASSERT(vp->v_interlock == uobj->vmobjlock);
2503 	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
2504 
2505 	/* XXX mtime */
2506 
2507 	return error;
2508 }
2509 
2510 #ifdef TMPFS_WHITEOUT
2511 int
2512 tmpfs_whiteout(void *v)
2513 {
2514 	struct vop_whiteout_args /* {
2515 		struct vnode		*a_dvp;
2516 		struct componentname	*a_cnp;
2517 		int			a_flags;
2518 	} */ *ap = v;
2519 	vnode_t *dvp = ap->a_dvp;
2520 	struct componentname *cnp = ap->a_cnp;
2521 	const int flags = ap->a_flags;
2522 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
2523 	tmpfs_dirent_t *de;
2524 	int error;
2525 
2526 	switch (flags) {
2527 	case LOOKUP:
2528 		break;
2529 	case CREATE:
2530 		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
2531 		    cnp->cn_namelen, &de);
2532 		if (error)
2533 			return error;
2534 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
2535 		break;
2536 	case DELETE:
2537 		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
2538 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp);
2539 		if (de == NULL)
2540 			return ENOENT;
2541 		tmpfs_dir_detach(dvp, de);
2542 		tmpfs_free_dirent(tmp, de);
2543 		break;
2544 	}
2545 	return 0;
2546 }
2547 #endif
2548 
2549 int
2550 tmpfs_print(void *v)
2551 {
2552 	struct vop_print_args /* {
2553 		struct vnode	*a_vp;
2554 	} */ *ap = v;
2555 	vnode_t *vp = ap->a_vp;
2556 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2557 
2558 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
2559 	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX ", status 0x%x",
2560 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
2561 	    node->tn_gid, (uintmax_t)node->tn_size, node->tn_status);
2562 	if (vp->v_type == VFIFO) {
2563 		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
2564 	}
2565 	printf("\n");
2566 	return 0;
2567 }
2568