xref: /netbsd-src/sys/fs/tmpfs/tmpfs_vnops.c (revision b757af438b42b93f8c6571f026d8b8ef3eaf5fc9)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.96 2012/03/13 18:40:50 elad Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.96 2012/03/13 18:40:50 elad Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/fcntl.h>
43 #include <sys/event.h>
44 #include <sys/malloc.h>
45 #include <sys/namei.h>
46 #include <sys/stat.h>
47 #include <sys/uio.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/lockf.h>
51 #include <sys/kauth.h>
52 
53 #include <uvm/uvm.h>
54 
55 #include <miscfs/fifofs/fifo.h>
56 #include <miscfs/genfs/genfs.h>
57 #include <fs/tmpfs/tmpfs_vnops.h>
58 #include <fs/tmpfs/tmpfs.h>
59 
60 /*
61  * vnode operations vector used for files stored in a tmpfs file system.
62  */
63 int (**tmpfs_vnodeop_p)(void *);
64 const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
65 	{ &vop_default_desc,		vn_default_error },
66 	{ &vop_lookup_desc,		tmpfs_lookup },
67 	{ &vop_create_desc,		tmpfs_create },
68 	{ &vop_mknod_desc,		tmpfs_mknod },
69 	{ &vop_open_desc,		tmpfs_open },
70 	{ &vop_close_desc,		tmpfs_close },
71 	{ &vop_access_desc,		tmpfs_access },
72 	{ &vop_getattr_desc,		tmpfs_getattr },
73 	{ &vop_setattr_desc,		tmpfs_setattr },
74 	{ &vop_read_desc,		tmpfs_read },
75 	{ &vop_write_desc,		tmpfs_write },
76 	{ &vop_ioctl_desc,		tmpfs_ioctl },
77 	{ &vop_fcntl_desc,		tmpfs_fcntl },
78 	{ &vop_poll_desc,		tmpfs_poll },
79 	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
80 	{ &vop_revoke_desc,		tmpfs_revoke },
81 	{ &vop_mmap_desc,		tmpfs_mmap },
82 	{ &vop_fsync_desc,		tmpfs_fsync },
83 	{ &vop_seek_desc,		tmpfs_seek },
84 	{ &vop_remove_desc,		tmpfs_remove },
85 	{ &vop_link_desc,		tmpfs_link },
86 	{ &vop_rename_desc,		tmpfs_rename },
87 	{ &vop_mkdir_desc,		tmpfs_mkdir },
88 	{ &vop_rmdir_desc,		tmpfs_rmdir },
89 	{ &vop_symlink_desc,		tmpfs_symlink },
90 	{ &vop_readdir_desc,		tmpfs_readdir },
91 	{ &vop_readlink_desc,		tmpfs_readlink },
92 	{ &vop_abortop_desc,		tmpfs_abortop },
93 	{ &vop_inactive_desc,		tmpfs_inactive },
94 	{ &vop_reclaim_desc,		tmpfs_reclaim },
95 	{ &vop_lock_desc,		tmpfs_lock },
96 	{ &vop_unlock_desc,		tmpfs_unlock },
97 	{ &vop_bmap_desc,		tmpfs_bmap },
98 	{ &vop_strategy_desc,		tmpfs_strategy },
99 	{ &vop_print_desc,		tmpfs_print },
100 	{ &vop_pathconf_desc,		tmpfs_pathconf },
101 	{ &vop_islocked_desc,		tmpfs_islocked },
102 	{ &vop_advlock_desc,		tmpfs_advlock },
103 	{ &vop_bwrite_desc,		tmpfs_bwrite },
104 	{ &vop_getpages_desc,		tmpfs_getpages },
105 	{ &vop_putpages_desc,		tmpfs_putpages },
106 	{ &vop_whiteout_desc,		tmpfs_whiteout },
107 	{ NULL, NULL }
108 };
109 
110 const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
111 	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
112 };
113 
114 /*
115  * tmpfs_lookup: path name traversal routine.
116  *
117  * Arguments: dvp (directory being searched), vpp (result),
118  * cnp (component name - path).
119  *
120  * => Caller holds a reference and lock on dvp.
121  * => We return looked-up vnode (vpp) locked, with a reference held.
122  */
123 int
124 tmpfs_lookup(void *v)
125 {
126 	struct vop_lookup_args /* {
127 		struct vnode *a_dvp;
128 		struct vnode **a_vpp;
129 		struct componentname *a_cnp;
130 	} */ *ap = v;
131 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
132 	struct componentname *cnp = ap->a_cnp;
133 	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
134 	tmpfs_node_t *dnode, *tnode;
135 	tmpfs_dirent_t *de;
136 	int error;
137 
138 	KASSERT(VOP_ISLOCKED(dvp));
139 
140 	dnode = VP_TO_TMPFS_DIR(dvp);
141 	*vpp = NULL;
142 
143 	/* Check accessibility of directory. */
144 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
145 	if (error) {
146 		goto out;
147 	}
148 
149 	/*
150 	 * If requesting the last path component on a read-only file system
151 	 * with a write operation, deny it.
152 	 */
153 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
154 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
155 		error = EROFS;
156 		goto out;
157 	}
158 
159 	/*
160 	 * Avoid doing a linear scan of the directory if the requested
161 	 * directory/name couple is already in the cache.
162 	 */
163 	error = cache_lookup(dvp, vpp, cnp);
164 	if (error >= 0) {
165 		/* Both cache-hit or an error case. */
166 		goto out;
167 	}
168 
169 	if (cnp->cn_flags & ISDOTDOT) {
170 		tmpfs_node_t *pnode;
171 
172 		/*
173 		 * Lookup of ".." case.
174 		 */
175 		if (lastcn && cnp->cn_nameiop == RENAME) {
176 			error = EINVAL;
177 			goto out;
178 		}
179 		KASSERT(dnode->tn_type == VDIR);
180 		pnode = dnode->tn_spec.tn_dir.tn_parent;
181 		if (pnode == NULL) {
182 			error = ENOENT;
183 			goto out;
184 		}
185 
186 		/*
187 		 * Lock the parent tn_vlock before releasing the vnode lock,
188 		 * and thus prevents parent from disappearing.
189 		 */
190 		mutex_enter(&pnode->tn_vlock);
191 		VOP_UNLOCK(dvp);
192 
193 		/*
194 		 * Get a vnode of the '..' entry and re-acquire the lock.
195 		 * Release the tn_vlock.
196 		 */
197 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
198 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
199 		goto out;
200 
201 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
202 		/*
203 		 * Lookup of "." case.
204 		 */
205 		if (lastcn && cnp->cn_nameiop == RENAME) {
206 			error = EISDIR;
207 			goto out;
208 		}
209 		vref(dvp);
210 		*vpp = dvp;
211 		error = 0;
212 		goto done;
213 	}
214 
215 	/*
216 	 * Other lookup cases: perform directory scan.
217 	 */
218 	de = tmpfs_dir_lookup(dnode, cnp);
219 	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
220 		/*
221 		 * The entry was not found in the directory.  This is valid
222 		 * if we are creating or renaming an entry and are working
223 		 * on the last component of the path name.
224 		 */
225 		if (lastcn && (cnp->cn_nameiop == CREATE ||
226 		    cnp->cn_nameiop == RENAME)) {
227 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
228 			if (error) {
229 				goto out;
230 			}
231 			error = EJUSTRETURN;
232 		} else {
233 			error = ENOENT;
234 		}
235 		if (de) {
236 			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
237 			cnp->cn_flags |= ISWHITEOUT;
238 		}
239 		goto done;
240 	}
241 
242 	tnode = de->td_node;
243 
244 	/*
245 	 * If it is not the last path component and found a non-directory
246 	 * or non-link entry (which may itself be pointing to a directory),
247 	 * raise an error.
248 	 */
249 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
250 		error = ENOTDIR;
251 		goto out;
252 	}
253 
254 	/* Check the permissions. */
255 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
256 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
257 		if (error)
258 			goto out;
259 
260 		if ((dnode->tn_mode & S_ISTXT) != 0) {
261 			error = kauth_authorize_vnode(cnp->cn_cred,
262 			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
263 			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
264 			    dnode->tn_uid, tnode->tn_uid));
265 			if (error) {
266 				error = EPERM;
267 				goto out;
268 			}
269 		}
270 	}
271 
272 	/* Get a vnode for the matching entry. */
273 	mutex_enter(&tnode->tn_vlock);
274 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
275 done:
276 	/*
277 	 * Cache the result, unless request was for creation (as it does
278 	 * not improve the performance).
279 	 */
280 	if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE) {
281 		cache_enter(dvp, *vpp, cnp);
282 	}
283 out:
284 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
285 	KASSERT(VOP_ISLOCKED(dvp));
286 
287 	return error;
288 }
289 
290 int
291 tmpfs_create(void *v)
292 {
293 	struct vop_create_args /* {
294 		struct vnode		*a_dvp;
295 		struct vnode		**a_vpp;
296 		struct componentname	*a_cnp;
297 		struct vattr		*a_vap;
298 	} */ *ap = v;
299 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
300 	struct componentname *cnp = ap->a_cnp;
301 	struct vattr *vap = ap->a_vap;
302 
303 	KASSERT(VOP_ISLOCKED(dvp));
304 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
305 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
306 }
307 
308 int
309 tmpfs_mknod(void *v)
310 {
311 	struct vop_mknod_args /* {
312 		struct vnode		*a_dvp;
313 		struct vnode		**a_vpp;
314 		struct componentname	*a_cnp;
315 		struct vattr		*a_vap;
316 	} */ *ap = v;
317 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
318 	struct componentname *cnp = ap->a_cnp;
319 	struct vattr *vap = ap->a_vap;
320 	enum vtype vt = vap->va_type;
321 
322 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
323 		vput(dvp);
324 		return EINVAL;
325 	}
326 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
327 }
328 
329 int
330 tmpfs_open(void *v)
331 {
332 	struct vop_open_args /* {
333 		struct vnode	*a_vp;
334 		int		a_mode;
335 		kauth_cred_t	a_cred;
336 	} */ *ap = v;
337 	vnode_t *vp = ap->a_vp;
338 	mode_t mode = ap->a_mode;
339 	tmpfs_node_t *node;
340 
341 	KASSERT(VOP_ISLOCKED(vp));
342 
343 	node = VP_TO_TMPFS_NODE(vp);
344 	if (node->tn_links < 1) {
345 		/*
346 		 * The file is still active, but all its names have been
347 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
348 		 * any more, as it is about to be destroyed.
349 		 */
350 		return ENOENT;
351 	}
352 
353 	/* If the file is marked append-only, deny write requests. */
354 	if ((node->tn_flags & APPEND) != 0 &&
355 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
356 		return EPERM;
357 	}
358 	return 0;
359 }
360 
361 int
362 tmpfs_close(void *v)
363 {
364 	struct vop_close_args /* {
365 		struct vnode	*a_vp;
366 		int		a_fflag;
367 		kauth_cred_t	a_cred;
368 	} */ *ap = v;
369 	vnode_t *vp = ap->a_vp;
370 
371 	KASSERT(VOP_ISLOCKED(vp));
372 
373 	tmpfs_update(vp, NULL, NULL, NULL, UPDATE_CLOSE);
374 	return 0;
375 }
376 
377 int
378 tmpfs_access(void *v)
379 {
380 	struct vop_access_args /* {
381 		struct vnode	*a_vp;
382 		int		a_mode;
383 		kauth_cred_t	a_cred;
384 	} */ *ap = v;
385 	vnode_t *vp = ap->a_vp;
386 	mode_t mode = ap->a_mode;
387 	kauth_cred_t cred = ap->a_cred;
388 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
389 	const bool writing = (mode & VWRITE) != 0;
390 
391 	KASSERT(VOP_ISLOCKED(vp));
392 
393 	/* Possible? */
394 	switch (vp->v_type) {
395 	case VDIR:
396 	case VLNK:
397 	case VREG:
398 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
399 			return EROFS;
400 		}
401 		break;
402 	case VBLK:
403 	case VCHR:
404 	case VSOCK:
405 	case VFIFO:
406 		break;
407 	default:
408 		return EINVAL;
409 	}
410 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
411 		return EPERM;
412 	}
413 
414 	return kauth_authorize_vnode(cred, kauth_access_action(mode,
415 	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
416 	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
417 }
418 
419 int
420 tmpfs_getattr(void *v)
421 {
422 	struct vop_getattr_args /* {
423 		struct vnode	*a_vp;
424 		struct vattr	*a_vap;
425 		kauth_cred_t	a_cred;
426 	} */ *ap = v;
427 	vnode_t *vp = ap->a_vp;
428 	struct vattr *vap = ap->a_vap;
429 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
430 
431 	vattr_null(vap);
432 
433 	tmpfs_update(vp, NULL, NULL, NULL, 0);
434 
435 	vap->va_type = vp->v_type;
436 	vap->va_mode = node->tn_mode;
437 	vap->va_nlink = node->tn_links;
438 	vap->va_uid = node->tn_uid;
439 	vap->va_gid = node->tn_gid;
440 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
441 	vap->va_fileid = node->tn_id;
442 	vap->va_size = node->tn_size;
443 	vap->va_blocksize = PAGE_SIZE;
444 	vap->va_atime = node->tn_atime;
445 	vap->va_mtime = node->tn_mtime;
446 	vap->va_ctime = node->tn_ctime;
447 	vap->va_birthtime = node->tn_birthtime;
448 	vap->va_gen = TMPFS_NODE_GEN(node);
449 	vap->va_flags = node->tn_flags;
450 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
451 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
452 	vap->va_bytes = round_page(node->tn_size);
453 	vap->va_filerev = VNOVAL;
454 	vap->va_vaflags = 0;
455 	vap->va_spare = VNOVAL; /* XXX */
456 
457 	return 0;
458 }
459 
460 #define GOODTIME(tv)	((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL)
461 /* XXX Should this operation be atomic?  I think it should, but code in
462  * XXX other places (e.g., ufs) doesn't seem to be... */
463 int
464 tmpfs_setattr(void *v)
465 {
466 	struct vop_setattr_args /* {
467 		struct vnode	*a_vp;
468 		struct vattr	*a_vap;
469 		kauth_cred_t	a_cred;
470 	} */ *ap = v;
471 	vnode_t *vp = ap->a_vp;
472 	struct vattr *vap = ap->a_vap;
473 	kauth_cred_t cred = ap->a_cred;
474 	lwp_t *l = curlwp;
475 	int error = 0;
476 
477 	KASSERT(VOP_ISLOCKED(vp));
478 
479 	/* Abort if any unsettable attribute is given. */
480 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
481 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
482 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
483 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
484 	    vap->va_bytes != VNOVAL) {
485 		return EINVAL;
486 	}
487 	if (error == 0 && (vap->va_flags != VNOVAL))
488 		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
489 
490 	if (error == 0 && (vap->va_size != VNOVAL))
491 		error = tmpfs_chsize(vp, vap->va_size, cred, l);
492 
493 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
494 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
495 
496 	if (error == 0 && (vap->va_mode != VNOVAL))
497 		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
498 
499 	if (error == 0 && (GOODTIME(&vap->va_atime) || GOODTIME(&vap->va_mtime)
500 	    || GOODTIME(&vap->va_birthtime))) {
501 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
502 		    &vap->va_birthtime, vap->va_vaflags, cred, l);
503 		if (error == 0)
504 			return 0;
505 	}
506 	tmpfs_update(vp, NULL, NULL, NULL, 0);
507 	return error;
508 }
509 
510 int
511 tmpfs_read(void *v)
512 {
513 	struct vop_read_args /* {
514 		struct vnode *a_vp;
515 		struct uio *a_uio;
516 		int a_ioflag;
517 		kauth_cred_t a_cred;
518 	} */ *ap = v;
519 	vnode_t *vp = ap->a_vp;
520 	struct uio *uio = ap->a_uio;
521 	const int ioflag = ap->a_ioflag;
522 	tmpfs_node_t *node;
523 	struct uvm_object *uobj;
524 	int error;
525 
526 	KASSERT(VOP_ISLOCKED(vp));
527 
528 	if (vp->v_type != VREG) {
529 		return EISDIR;
530 	}
531 	if (uio->uio_offset < 0) {
532 		return EINVAL;
533 	}
534 
535 	node = VP_TO_TMPFS_NODE(vp);
536 	node->tn_status |= TMPFS_NODE_ACCESSED;
537 	uobj = node->tn_spec.tn_reg.tn_aobj;
538 	error = 0;
539 
540 	while (error == 0 && uio->uio_resid > 0) {
541 		vsize_t len;
542 
543 		if (node->tn_size <= uio->uio_offset) {
544 			break;
545 		}
546 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
547 		if (len == 0) {
548 			break;
549 		}
550 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
551 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
552 	}
553 	return error;
554 }
555 
556 int
557 tmpfs_write(void *v)
558 {
559 	struct vop_write_args /* {
560 		struct vnode	*a_vp;
561 		struct uio	*a_uio;
562 		int		a_ioflag;
563 		kauth_cred_t	a_cred;
564 	} */ *ap = v;
565 	vnode_t *vp = ap->a_vp;
566 	struct uio *uio = ap->a_uio;
567 	const int ioflag = ap->a_ioflag;
568 	tmpfs_node_t *node;
569 	struct uvm_object *uobj;
570 	off_t oldsize;
571 	bool extended;
572 	int error;
573 
574 	KASSERT(VOP_ISLOCKED(vp));
575 
576 	node = VP_TO_TMPFS_NODE(vp);
577 	oldsize = node->tn_size;
578 
579 	if (uio->uio_offset < 0 || vp->v_type != VREG) {
580 		error = EINVAL;
581 		goto out;
582 	}
583 	if (uio->uio_resid == 0) {
584 		error = 0;
585 		goto out;
586 	}
587 	if (ioflag & IO_APPEND) {
588 		uio->uio_offset = node->tn_size;
589 	}
590 
591 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
592 	if (extended) {
593 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
594 		if (error)
595 			goto out;
596 	}
597 
598 	uobj = node->tn_spec.tn_reg.tn_aobj;
599 	error = 0;
600 	while (error == 0 && uio->uio_resid > 0) {
601 		vsize_t len;
602 
603 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
604 		if (len == 0) {
605 			break;
606 		}
607 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
608 		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
609 	}
610 	if (error) {
611 		(void)tmpfs_reg_resize(vp, oldsize);
612 	}
613 
614 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
615 	    (extended ? TMPFS_NODE_CHANGED : 0);
616 	VN_KNOTE(vp, NOTE_WRITE);
617 out:
618 	if (error) {
619 		KASSERT(oldsize == node->tn_size);
620 	} else {
621 		KASSERT(uio->uio_resid == 0);
622 	}
623 	return error;
624 }
625 
626 int
627 tmpfs_fsync(void *v)
628 {
629 	struct vop_fsync_args /* {
630 		struct vnode *a_vp;
631 		kauth_cred_t a_cred;
632 		int a_flags;
633 		off_t a_offlo;
634 		off_t a_offhi;
635 		struct lwp *a_l;
636 	} */ *ap = v;
637 	vnode_t *vp = ap->a_vp;
638 
639 	/* Nothing to do.  Just update. */
640 	KASSERT(VOP_ISLOCKED(vp));
641 	tmpfs_update(vp, NULL, NULL, NULL, 0);
642 	return 0;
643 }
644 
645 /*
646  * tmpfs_remove: unlink a file.
647  *
648  * => Both directory (dvp) and file (vp) are locked.
649  * => We unlock and drop the reference on both.
650  */
651 int
652 tmpfs_remove(void *v)
653 {
654 	struct vop_remove_args /* {
655 		struct vnode *a_dvp;
656 		struct vnode *a_vp;
657 		struct componentname *a_cnp;
658 	} */ *ap = v;
659 	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
660 	tmpfs_node_t *node;
661 	tmpfs_dirent_t *de;
662 	int error;
663 
664 	KASSERT(VOP_ISLOCKED(dvp));
665 	KASSERT(VOP_ISLOCKED(vp));
666 
667 	if (vp->v_type == VDIR) {
668 		error = EPERM;
669 		goto out;
670 	}
671 	node = VP_TO_TMPFS_NODE(vp);
672 
673 	/* Files marked as immutable or append-only cannot be deleted. */
674 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
675 		error = EPERM;
676 		goto out;
677 	}
678 
679 	/* Lookup the directory entry (check the cached hint first). */
680 	de = tmpfs_dir_cached(node);
681 	if (de == NULL) {
682 		tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
683 		struct componentname *cnp = ap->a_cnp;
684 		de = tmpfs_dir_lookup(dnode, cnp);
685 	}
686 	KASSERT(de && de->td_node == node);
687 
688 	/*
689 	 * Remove the entry from the directory (drops the link count) and
690 	 * destroy it or replace it with a whiteout.
691 	 * Note: the inode referred by it will not be destroyed
692 	 * until the vnode is reclaimed/recycled.
693 	 */
694 	tmpfs_dir_detach(dvp, de);
695 	if (ap->a_cnp->cn_flags & DOWHITEOUT)
696 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
697 	else
698 		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
699 	error = 0;
700 out:
701 	/* Drop the references and unlock the vnodes. */
702 	vput(vp);
703 	if (dvp == vp) {
704 		vrele(dvp);
705 	} else {
706 		vput(dvp);
707 	}
708 	return error;
709 }
710 
711 /*
712  * tmpfs_link: create a hard link.
713  */
714 int
715 tmpfs_link(void *v)
716 {
717 	struct vop_link_args /* {
718 		struct vnode *a_dvp;
719 		struct vnode *a_vp;
720 		struct componentname *a_cnp;
721 	} */ *ap = v;
722 	vnode_t *dvp = ap->a_dvp;
723 	vnode_t *vp = ap->a_vp;
724 	struct componentname *cnp = ap->a_cnp;
725 	tmpfs_node_t *dnode, *node;
726 	tmpfs_dirent_t *de;
727 	int error;
728 
729 	KASSERT(dvp != vp);
730 	KASSERT(VOP_ISLOCKED(dvp));
731 	KASSERT(vp->v_type != VDIR);
732 	KASSERT(dvp->v_mount == vp->v_mount);
733 
734 	dnode = VP_TO_TMPFS_DIR(dvp);
735 	node = VP_TO_TMPFS_NODE(vp);
736 
737 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
738 
739 	/* Check for maximum number of links limit. */
740 	if (node->tn_links == LINK_MAX) {
741 		error = EMLINK;
742 		goto out;
743 	}
744 	KASSERT(node->tn_links < LINK_MAX);
745 
746 	/* We cannot create links of files marked immutable or append-only. */
747 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
748 		error = EPERM;
749 		goto out;
750 	}
751 
752 	/* Allocate a new directory entry to represent the inode. */
753 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
754 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
755 	if (error) {
756 		goto out;
757 	}
758 
759 	/*
760 	 * Insert the entry into the directory.
761 	 * It will increase the inode link count.
762 	 */
763 	tmpfs_dir_attach(dvp, de, node);
764 
765 	/* Update the timestamps and trigger the event. */
766 	if (node->tn_vnode) {
767 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
768 	}
769 	node->tn_status |= TMPFS_NODE_CHANGED;
770 	tmpfs_update(vp, NULL, NULL, NULL, 0);
771 	error = 0;
772 out:
773 	VOP_UNLOCK(vp);
774 	vput(dvp);
775 	return error;
776 }
777 
778 /*
779  * tmpfs_rename: rename routine, the hairiest system call, with the
780  * insane API.
781  *
782  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
783  * and tvp (to-leaf), if exists (NULL if not).
784  *
785  * => Caller holds a reference on fdvp and fvp, they are unlocked.
786  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
787  *
788  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
789  *    to release the references and unlock them (or destroy).
790  */
791 
792 /*
793  * First, some forward declarations of subroutines.
794  */
795 
796 static int tmpfs_sane_rename(struct vnode *, struct componentname *,
797     struct vnode *, struct componentname *, kauth_cred_t, bool);
798 static int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
799     kauth_cred_t,
800     struct vnode *, struct tmpfs_node *, struct componentname *,
801     struct tmpfs_dirent **, struct vnode **,
802     struct vnode *, struct tmpfs_node *, struct componentname *,
803     struct tmpfs_dirent **, struct vnode **);
804 static int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
805     kauth_cred_t,
806     struct vnode *, struct tmpfs_node *,
807     struct componentname *, struct tmpfs_dirent **, struct vnode **,
808     struct componentname *, struct tmpfs_dirent **, struct vnode **);
809 static int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
810     kauth_cred_t,
811     struct vnode *, struct tmpfs_node *, struct componentname *,
812     struct tmpfs_dirent **, struct vnode **,
813     struct vnode *, struct tmpfs_node *, struct componentname *,
814     struct tmpfs_dirent **, struct vnode **);
815 static void tmpfs_rename_exit(struct tmpfs_mount *,
816     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
817 static int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
818 static int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
819     struct tmpfs_node **);
820 static int tmpfs_rename_lock(struct mount *, kauth_cred_t, int,
821     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
822     struct tmpfs_dirent **, struct vnode **,
823     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
824     struct tmpfs_dirent **, struct vnode **);
825 static void tmpfs_rename_attachdetach(struct tmpfs_mount *,
826     struct vnode *, struct tmpfs_dirent *, struct vnode *,
827     struct vnode *, struct tmpfs_dirent *, struct vnode *);
828 static int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
829     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, kauth_cred_t);
830 static int tmpfs_rename_check_possible(struct tmpfs_node *,
831     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
832 static int tmpfs_rename_check_permitted(kauth_cred_t,
833     struct tmpfs_node *, struct tmpfs_node *,
834     struct tmpfs_node *, struct tmpfs_node *);
835 static int tmpfs_remove_check_possible(struct tmpfs_node *,
836     struct tmpfs_node *);
837 static int tmpfs_remove_check_permitted(kauth_cred_t,
838     struct tmpfs_node *, struct tmpfs_node *);
839 static int tmpfs_check_sticky(kauth_cred_t,
840     struct tmpfs_node *, struct tmpfs_node *);
841 
842 int
843 tmpfs_rename(void *v)
844 {
845 	struct vop_rename_args  /* {
846 		struct vnode		*a_fdvp;
847 		struct vnode		*a_fvp;
848 		struct componentname	*a_fcnp;
849 		struct vnode		*a_tdvp;
850 		struct vnode		*a_tvp;
851 		struct componentname	*a_tcnp;
852 	} */ *ap = v;
853 	struct vnode *fdvp = ap->a_fdvp;
854 	struct vnode *fvp = ap->a_fvp;
855 	struct componentname *fcnp = ap->a_fcnp;
856 	struct vnode *tdvp = ap->a_tdvp;
857 	struct vnode *tvp = ap->a_tvp;
858 	struct componentname *tcnp = ap->a_tcnp;
859 	kauth_cred_t cred;
860 	int error;
861 
862 	KASSERT(fdvp != NULL);
863 	KASSERT(fvp != NULL);
864 	KASSERT(fcnp != NULL);
865 	KASSERT(fcnp->cn_nameptr != NULL);
866 	KASSERT(tdvp != NULL);
867 	KASSERT(tcnp != NULL);
868 	KASSERT(fcnp->cn_nameptr != NULL);
869 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
870 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
871 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
872 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
873 	KASSERT(fdvp->v_type == VDIR);
874 	KASSERT(tdvp->v_type == VDIR);
875 
876 	cred = fcnp->cn_cred;
877 	KASSERT(tcnp->cn_cred == cred);
878 
879 	/*
880 	 * Sanitize our world from the VFS insanity.  Unlock the target
881 	 * directory and node, which are locked.  Release the children,
882 	 * which are referenced.  Check for rename("x", "y/."), which
883 	 * it is our responsibility to reject, not the caller's.  (But
884 	 * the caller does reject rename("x/.", "y").  Go figure.)
885 	 */
886 
887 	VOP_UNLOCK(tdvp);
888 	if ((tvp != NULL) && (tvp != tdvp))
889 		VOP_UNLOCK(tvp);
890 
891 	vrele(fvp);
892 	if (tvp != NULL)
893 		vrele(tvp);
894 
895 	if (tvp == tdvp) {
896 		error = EINVAL;
897 		goto out;
898 	}
899 
900 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, false);
901 
902 out:	/*
903 	 * All done, whether with success or failure.  Release the
904 	 * directory nodes now, as the caller expects from the VFS
905 	 * protocol.
906 	 */
907 	vrele(fdvp);
908 	vrele(tdvp);
909 
910 	return error;
911 }
912 
913 /*
914  * tmpfs_sane_rename: rename routine, the hairiest system call, with
915  * the sane API.
916  *
917  * Arguments:
918  *
919  * . fdvp (from directory vnode),
920  * . fcnp (from component name),
921  * . tdvp (to directory vnode), and
922  * . tcnp (to component name).
923  *
924  * fdvp and tdvp must be referenced and unlocked.
925  */
926 static int
927 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
928     struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t cred,
929     bool posixly_correct)
930 {
931 	struct mount *mount;
932 	struct tmpfs_mount *tmpfs;
933 	struct tmpfs_node *fdnode, *tdnode;
934 	struct tmpfs_dirent *fde, *tde;
935 	struct vnode *fvp, *tvp;
936 	char *newname;
937 	int error;
938 
939 	KASSERT(fdvp != NULL);
940 	KASSERT(fcnp != NULL);
941 	KASSERT(tdvp != NULL);
942 	KASSERT(tcnp != NULL);
943 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
944 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
945 	KASSERT(fdvp->v_type == VDIR);
946 	KASSERT(tdvp->v_type == VDIR);
947 	KASSERT(fdvp->v_mount == tdvp->v_mount);
948 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
949 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
950 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
951 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
952 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
953 	    (fcnp->cn_nameptr[1] != '.'));
954 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
955 	    (tcnp->cn_nameptr[1] != '.'));
956 
957 	/*
958 	 * Pull out the tmpfs data structures.
959 	 */
960 	fdnode = VP_TO_TMPFS_NODE(fdvp);
961 	tdnode = VP_TO_TMPFS_NODE(tdvp);
962 	KASSERT(fdnode != NULL);
963 	KASSERT(tdnode != NULL);
964 	KASSERT(fdnode->tn_vnode == fdvp);
965 	KASSERT(tdnode->tn_vnode == tdvp);
966 	KASSERT(fdnode->tn_type == VDIR);
967 	KASSERT(tdnode->tn_type == VDIR);
968 
969 	mount = fdvp->v_mount;
970 	KASSERT(mount != NULL);
971 	KASSERT(mount == tdvp->v_mount);
972 	/* XXX How can we be sure this stays true?  (Not that you're
973 	 * likely to mount a tmpfs read-only...)  */
974 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
975 	tmpfs = VFS_TO_TMPFS(mount);
976 	KASSERT(tmpfs != NULL);
977 
978 	/*
979 	 * Decide whether we need a new name, and allocate memory for
980 	 * it if so.  Do this before locking anything or taking
981 	 * destructive actions so that we can back out safely and sleep
982 	 * safely.  XXX Is sleeping an issue here?  Can this just be
983 	 * moved into tmpfs_rename_attachdetach?
984 	 */
985 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
986 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
987 		if (newname == NULL) {
988 			error = ENOSPC;
989 			goto out_unlocked;
990 		}
991 	} else {
992 		newname = NULL;
993 	}
994 
995 	/*
996 	 * Lock and look up everything.  GCC is not very clever.
997 	 */
998 	fde = tde = NULL;
999 	fvp = tvp = NULL;
1000 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1001 	    fdvp, fdnode, fcnp, &fde, &fvp,
1002 	    tdvp, tdnode, tcnp, &tde, &tvp);
1003 	if (error)
1004 		goto out_unlocked;
1005 
1006 	/*
1007 	 * Check that everything is locked and looks right.
1008 	 */
1009 	KASSERT(fde != NULL);
1010 	KASSERT(fvp != NULL);
1011 	KASSERT(fde->td_node != NULL);
1012 	KASSERT(fde->td_node->tn_vnode == fvp);
1013 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1014 	KASSERT((tde == NULL) == (tvp == NULL));
1015 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1016 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1017 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1018 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1019 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1020 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1021 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1022 
1023 	/*
1024 	 * If the source and destination are the same object, we need
1025 	 * only at most delete the source entry.
1026 	 */
1027 	if (fvp == tvp) {
1028 		KASSERT(tvp != NULL);
1029 		if (fde->td_node->tn_type == VDIR) {
1030 			/* XXX How can this possibly happen?  */
1031 			error = EINVAL;
1032 			goto out_locked;
1033 		}
1034 		if (!posixly_correct && (fde != tde)) {
1035 			/* XXX Doesn't work because of locking.
1036 			 * error = VOP_REMOVE(fdvp, fvp);
1037 			 */
1038 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1039 			    cred);
1040 			if (error)
1041 				goto out_locked;
1042 		}
1043 		goto success;
1044 	}
1045 	KASSERT(fde != tde);
1046 	KASSERT(fvp != tvp);
1047 
1048 	/*
1049 	 * If the target exists, refuse to rename a directory over a
1050 	 * non-directory or vice versa, or to clobber a non-empty
1051 	 * directory.
1052 	 */
1053 	if (tvp != NULL) {
1054 		KASSERT(tde != NULL);
1055 		KASSERT(tde->td_node != NULL);
1056 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1057 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1058 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1059 			error = ENOTDIR;
1060 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1061 			error = EISDIR;
1062 		else
1063 			error = 0;
1064 		if (error)
1065 			goto out_locked;
1066 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1067 	}
1068 
1069 	/*
1070 	 * Authorize the rename.
1071 	 */
1072 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1073 	    tdnode, (tde? tde->td_node : NULL));
1074 	if (error)
1075 		goto out_locked;
1076 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1077 	    tdnode, (tde? tde->td_node : NULL));
1078 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp,
1079 	    error);
1080 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp,
1081 	    error);
1082 	if (error)
1083 		goto out_locked;
1084 
1085 	/*
1086 	 * Everything is hunky-dory.  Shuffle the directory entries.
1087 	 */
1088 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1089 
1090 	/*
1091 	 * Update the directory entry's name necessary, and flag
1092 	 * metadata updates.  A memory allocation failure here is not
1093 	 * OK because we've already committed some changes that we
1094 	 * can't back out at this point, and we have things locked so
1095 	 * we can't sleep, hence the early allocation above.
1096 	 */
1097 	if (newname != NULL) {
1098 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1099 
1100 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1101 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1102 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1103 		/* Commit newname and don't free it on the way out.  */
1104 		fde->td_name = newname;
1105 		newname = NULL;
1106 
1107 		fde->td_node->tn_status |= TMPFS_NODE_CHANGED;
1108 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1109 	}
1110 
1111 success:
1112 	VN_KNOTE(fvp, NOTE_RENAME);
1113 	error = 0;
1114 
1115 out_locked:
1116 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1117 
1118 out_unlocked:
1119 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1120 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1121 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1122 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1123 
1124 	if (newname != NULL)
1125 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1126 
1127 	return error;
1128 }
1129 
1130 /*
1131  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1132  * and the associated vnode in fvp_ret; fail if not found.  Look up
1133  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1134  * associated vnode in tvp_ret; store null instead if not found.  Fail
1135  * if anything has been mounted on any of the nodes involved.
1136  *
1137  * fdvp and tdvp must be referenced.
1138  *
1139  * On entry, nothing is locked.
1140  *
1141  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1142  * nonnull, are referenced.  The only pairs of vnodes that may be
1143  * identical are {fdvp, tdvp} and {fvp, tvp}.
1144  *
1145  * On failure, everything remains as was.
1146  *
1147  * Locking everything including the source and target nodes is
1148  * necessary to make sure that, e.g., link count updates are OK.  The
1149  * locking order is, in general, ancestor-first, matching the order you
1150  * need to use to look up a descendant anyway.
1151  */
1152 static int
1153 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1154     kauth_cred_t cred,
1155     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1156     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1157     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1158     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1159 {
1160 	int error;
1161 
1162 	KASSERT(mount != NULL);
1163 	KASSERT(tmpfs != NULL);
1164 	KASSERT(fdvp != NULL);
1165 	KASSERT(fdnode != NULL);
1166 	KASSERT(fcnp != NULL);
1167 	KASSERT(fde_ret != NULL);
1168 	KASSERT(fvp_ret != NULL);
1169 	KASSERT(tdvp != NULL);
1170 	KASSERT(tdnode != NULL);
1171 	KASSERT(tcnp != NULL);
1172 	KASSERT(tde_ret != NULL);
1173 	KASSERT(tvp_ret != NULL);
1174 	KASSERT(fdnode->tn_vnode == fdvp);
1175 	KASSERT(tdnode->tn_vnode == tdvp);
1176 	KASSERT(fdnode->tn_type == VDIR);
1177 	KASSERT(tdnode->tn_type == VDIR);
1178 
1179 	if (fdvp == tdvp) {
1180 		KASSERT(fdnode == tdnode);
1181 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1182 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1183 	} else {
1184 		KASSERT(fdnode != tdnode);
1185 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1186 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1187 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1188 	}
1189 
1190 	if (error)
1191 		return error;
1192 
1193 	KASSERT(*fde_ret != NULL);
1194 	KASSERT(*fvp_ret != NULL);
1195 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1196 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1197 	KASSERT((*tde_ret == NULL) ||
1198 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1199 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1200 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1201 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1202 	KASSERT((*tvp_ret == NULL) ||
1203 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1204 	KASSERT(*fvp_ret != fdvp);
1205 	KASSERT(*fvp_ret != tdvp);
1206 	KASSERT(*tvp_ret != fdvp);
1207 	KASSERT(*tvp_ret != tdvp);
1208 	return 0;
1209 }
1210 
1211 /*
1212  * Lock and look up with a common source/target directory.
1213  */
1214 static int
1215 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1216     kauth_cred_t cred,
1217     struct vnode *dvp, struct tmpfs_node *dnode,
1218     struct componentname *fcnp,
1219     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1220     struct componentname *tcnp,
1221     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1222 {
1223 	struct tmpfs_dirent *fde, *tde;
1224 	struct vnode *fvp, *tvp;
1225 	int error;
1226 
1227 	error = tmpfs_rename_lock_directory(dvp, dnode);
1228 	if (error)
1229 		goto fail0;
1230 
1231 	/* Did we lose a race with mount?  */
1232 	if (dvp->v_mountedhere != NULL) {
1233 		error = EBUSY;
1234 		goto fail1;
1235 	}
1236 
1237 	/* Make sure the caller may read the directory.  */
1238 	error = VOP_ACCESS(dvp, VEXEC, cred);
1239 	if (error)
1240 		goto fail1;
1241 
1242 	/*
1243 	 * The order in which we lock the source and target nodes is
1244 	 * irrelevant because there can only be one rename on this
1245 	 * directory in flight at a time, and we have it locked.
1246 	 */
1247 
1248 	fde = tmpfs_dir_lookup(dnode, fcnp);
1249 	if (fde == NULL) {
1250 		error = ENOENT;
1251 		goto fail1;
1252 	}
1253 
1254 	KASSERT(fde->td_node != NULL);
1255 	/* We ruled out `.' earlier.  */
1256 	KASSERT(fde->td_node != dnode);
1257 	/* We ruled out `..' earlier.  */
1258 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1259 	mutex_enter(&fde->td_node->tn_vlock);
1260 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1261 	if (error)
1262 		goto fail1;
1263 	KASSERT(fvp != NULL);
1264 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1265 	KASSERT(fvp != dvp);
1266 	KASSERT(fvp->v_mount == mount);
1267 
1268 	/* Refuse to rename a mount point.  */
1269 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1270 		error = EBUSY;
1271 		goto fail2;
1272 	}
1273 
1274 	tde = tmpfs_dir_lookup(dnode, tcnp);
1275 	if (tde == NULL) {
1276 		tvp = NULL;
1277 	} else {
1278 		KASSERT(tde->td_node != NULL);
1279 		/* We ruled out `.' earlier.  */
1280 		KASSERT(tde->td_node != dnode);
1281 		/* We ruled out `..' earlier.  */
1282 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1283 		if (tde->td_node != fde->td_node) {
1284 			mutex_enter(&tde->td_node->tn_vlock);
1285 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1286 			if (error)
1287 				goto fail2;
1288 			KASSERT(tvp->v_mount == mount);
1289 			/* Refuse to rename over a mount point.  */
1290 			if ((tvp->v_type == VDIR) &&
1291 			    (tvp->v_mountedhere != NULL)) {
1292 				error = EBUSY;
1293 				goto fail3;
1294 			}
1295 		} else {
1296 			tvp = fvp;
1297 			vref(tvp);
1298 		}
1299 		KASSERT(tvp != NULL);
1300 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1301 	}
1302 	KASSERT(tvp != dvp);
1303 
1304 	*fde_ret = fde;
1305 	*fvp_ret = fvp;
1306 	*tde_ret = tde;
1307 	*tvp_ret = tvp;
1308 	return 0;
1309 
1310 fail3:	if (tvp != NULL) {
1311 		if (tvp != fvp)
1312 			vput(tvp);
1313 		else
1314 			vrele(tvp);
1315 	}
1316 
1317 fail2:	vput(fvp);
1318 fail1:	VOP_UNLOCK(dvp);
1319 fail0:	return error;
1320 }
1321 
1322 /*
1323  * Lock and look up with separate source and target directories.
1324  */
1325 static int
1326 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1327     kauth_cred_t cred,
1328     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1329     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1330     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1331     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1332 {
1333 	struct tmpfs_node *intermediate_node;
1334 	struct tmpfs_dirent *fde, *tde;
1335 	struct vnode *fvp, *tvp;
1336 	int error;
1337 
1338 	KASSERT(fdvp != tdvp);
1339 	KASSERT(fdnode != tdnode);
1340 
1341 #if 0				/* XXX */
1342 	mutex_enter(&tmpfs->tm_rename_lock);
1343 #endif
1344 
1345 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1346 	if (error)
1347 		goto fail;
1348 
1349 	/*
1350 	 * intermediate_node == NULL means fdnode is not an ancestor of
1351 	 * tdnode.
1352 	 */
1353 	if (intermediate_node == NULL)
1354 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1355 		    tdvp, tdnode, tcnp, true, &tde, &tvp,
1356 		    fdvp, fdnode, fcnp, false, &fde, &fvp);
1357 	else
1358 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1359 		    fdvp, fdnode, fcnp, false, &fde, &fvp,
1360 		    tdvp, tdnode, tcnp, true, &tde, &tvp);
1361 	if (error)
1362 		goto fail;
1363 
1364 	KASSERT(fde != NULL);
1365 	KASSERT(fde->td_node != NULL);
1366 
1367 	/*
1368 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1369 	 */
1370 	if (fde->td_node == intermediate_node) {
1371 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1372 		return EINVAL;
1373 	}
1374 
1375 	*fde_ret = fde;
1376 	*fvp_ret = fvp;
1377 	*tde_ret = tde;
1378 	*tvp_ret = tvp;
1379 	return 0;
1380 
1381 fail:
1382 #if 0				/* XXX */
1383 	mutex_exit(&tmpfs->tm_rename_lock);
1384 #endif
1385 	return error;
1386 }
1387 
1388 /*
1389  * Unlock everything we locked for rename.
1390  *
1391  * fdvp and tdvp must be referenced.
1392  *
1393  * On entry, everything is locked, and fvp and tvp referenced.
1394  *
1395  * On exit, everything is unlocked, and fvp and tvp are released.
1396  */
1397 static void
1398 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1399     struct vnode *fdvp, struct vnode *fvp,
1400     struct vnode *tdvp, struct vnode *tvp)
1401 {
1402 
1403 	KASSERT(tmpfs != NULL);
1404 	KASSERT(fdvp != NULL);
1405 	KASSERT(fvp != NULL);
1406 	KASSERT(fdvp != fvp);
1407 	KASSERT(fdvp != tvp);
1408 	KASSERT(tdvp != tvp);
1409 	KASSERT(tdvp != fvp);
1410 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1411 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1412 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1413 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1414 
1415 	if (tvp != NULL) {
1416 		if (tvp != fvp)
1417 			vput(tvp);
1418 		else
1419 			vrele(tvp);
1420 	}
1421 	VOP_UNLOCK(tdvp);
1422 	vput(fvp);
1423 	if (fdvp != tdvp)
1424 		VOP_UNLOCK(fdvp);
1425 
1426 #if 0				/* XXX */
1427 	if (fdvp != tdvp)
1428 		mutex_exit(&tmpfs->tm_rename_lock);
1429 #endif
1430 }
1431 
1432 /*
1433  * Lock a directory, but fail if it has been rmdir'd.
1434  *
1435  * vp must be referenced.
1436  */
1437 static int
1438 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1439 {
1440 
1441 	KASSERT(vp != NULL);
1442 	KASSERT(node != NULL);
1443 	KASSERT(node->tn_vnode == vp);
1444 	KASSERT(node->tn_type == VDIR);
1445 
1446 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1447 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1448 		VOP_UNLOCK(vp);
1449 		return ENOENT;
1450 	}
1451 
1452 	return 0;
1453 }
1454 
1455 /*
1456  * Analyze the genealogy of the source and target nodes.
1457  *
1458  * On success, stores in *intermediate_node_ret either the child of
1459  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1460  * descendant of fdnode at all.
1461  *
1462  * fdnode and tdnode must be unlocked and referenced.  The file
1463  * system's rename lock must also be held, to exclude concurrent
1464  * changes to the file system's genealogy other than rmdir.
1465  *
1466  * XXX This causes an extra lock/unlock of tdnode in the case when
1467  * we're just about to lock it again before locking anything else.
1468  * However, changing that requires reorganizing the code to make it
1469  * even more horrifically obscure.
1470  */
1471 static int
1472 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1473     struct tmpfs_node **intermediate_node_ret)
1474 {
1475 	struct tmpfs_node *node = tdnode, *parent;
1476 	int error;
1477 
1478 	KASSERT(fdnode != NULL);
1479 	KASSERT(tdnode != NULL);
1480 	KASSERT(fdnode != tdnode);
1481 	KASSERT(intermediate_node_ret != NULL);
1482 
1483 	KASSERT(fdnode->tn_vnode != NULL);
1484 	KASSERT(tdnode->tn_vnode != NULL);
1485 	KASSERT(fdnode->tn_type == VDIR);
1486 	KASSERT(tdnode->tn_type == VDIR);
1487 
1488 	/*
1489 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1490 	 * from deleting it -- or any ancestor -- at an inopportune
1491 	 * moment.
1492 	 */
1493 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1494 	if (error)
1495 		return error;
1496 
1497 	for (;;) {
1498 		parent = node->tn_spec.tn_dir.tn_parent;
1499 		KASSERT(parent != NULL);
1500 		KASSERT(parent->tn_type == VDIR);
1501 
1502 		/* Did we hit the root without finding fdnode?  */
1503 		if (parent == node) {
1504 			*intermediate_node_ret = NULL;
1505 			break;
1506 		}
1507 
1508 		/* Did we find that fdnode is an ancestor?  */
1509 		if (parent == fdnode) {
1510 			*intermediate_node_ret = node;
1511 			break;
1512 		}
1513 
1514 		/* Neither -- keep ascending the family tree.  */
1515 		node = parent;
1516 	}
1517 
1518 	VOP_UNLOCK(tdnode->tn_vnode);
1519 	return 0;
1520 }
1521 
1522 /*
1523  * Lock directories a and b, which must be distinct, and look up and
1524  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1525  * an ancestor of directory a, although directory a may be an ancestor
1526  * of directory b.  Fail with overlap_error if node a is directory b.
1527  * Neither componentname may be `.' or `..'.
1528  *
1529  * a_dvp and b_dvp must be referenced.
1530  *
1531  * On entry, a_dvp and b_dvp are unlocked.
1532  *
1533  * On success,
1534  * . a_dvp and b_dvp are locked,
1535  * . *a_dirent_ret is filled with a directory entry whose node is
1536  *     locked and referenced,
1537  * . *b_vp_ret is filled with the corresponding vnode,
1538  * . *b_dirent_ret is filled either with null or with a directory entry
1539  *     whose node is locked and referenced,
1540  * . *b_vp is filled either with null or with the corresponding vnode,
1541  *     and
1542  * . the only pair of vnodes that may be identical is a_vp and b_vp.
1543  *
1544  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
1545  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
1546  */
1547 static int
1548 tmpfs_rename_lock(struct mount *mount, kauth_cred_t cred, int overlap_error,
1549     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
1550     struct componentname *a_cnp, bool a_missing_ok,
1551     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
1552     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
1553     struct componentname *b_cnp, bool b_missing_ok,
1554     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
1555 {
1556 	struct tmpfs_dirent *a_dirent, *b_dirent;
1557 	struct vnode *a_vp, *b_vp;
1558 	int error;
1559 
1560 	KASSERT(a_dvp != NULL);
1561 	KASSERT(a_dnode != NULL);
1562 	KASSERT(a_cnp != NULL);
1563 	KASSERT(a_dirent_ret != NULL);
1564 	KASSERT(a_vp_ret != NULL);
1565 	KASSERT(b_dvp != NULL);
1566 	KASSERT(b_dnode != NULL);
1567 	KASSERT(b_cnp != NULL);
1568 	KASSERT(b_dirent_ret != NULL);
1569 	KASSERT(b_vp_ret != NULL);
1570 	KASSERT(a_dvp != b_dvp);
1571 	KASSERT(a_dnode != b_dnode);
1572 	KASSERT(a_dnode->tn_vnode == a_dvp);
1573 	KASSERT(b_dnode->tn_vnode == b_dvp);
1574 	KASSERT(a_dnode->tn_type == VDIR);
1575 	KASSERT(b_dnode->tn_type == VDIR);
1576 	KASSERT(a_missing_ok != b_missing_ok);
1577 
1578 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
1579 	if (error)
1580 		goto fail0;
1581 
1582 	/* Did we lose a race with mount?  */
1583 	if (a_dvp->v_mountedhere != NULL) {
1584 		error = EBUSY;
1585 		goto fail1;
1586 	}
1587 
1588 	/* Make sure the caller may read the directory.  */
1589 	error = VOP_ACCESS(a_dvp, VEXEC, cred);
1590 	if (error)
1591 		goto fail1;
1592 
1593 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
1594 	if (a_dirent != NULL) {
1595 		KASSERT(a_dirent->td_node != NULL);
1596 		/* We ruled out `.' earlier.  */
1597 		KASSERT(a_dirent->td_node != a_dnode);
1598 		/* We ruled out `..' earlier.  */
1599 		KASSERT(a_dirent->td_node !=
1600 		    a_dnode->tn_spec.tn_dir.tn_parent);
1601 		if (a_dirent->td_node == b_dnode) {
1602 			error = overlap_error;
1603 			goto fail1;
1604 		}
1605 		mutex_enter(&a_dirent->td_node->tn_vlock);
1606 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
1607 		if (error)
1608 			goto fail1;
1609 		KASSERT(a_vp->v_mount == mount);
1610 		/* Refuse to rename (over) a mount point.  */
1611 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
1612 			error = EBUSY;
1613 			goto fail2;
1614 		}
1615 	} else if (!a_missing_ok) {
1616 		error = ENOENT;
1617 		goto fail1;
1618 	} else {
1619 		a_vp = NULL;
1620 	}
1621 	KASSERT(a_vp != a_dvp);
1622 	KASSERT(a_vp != b_dvp);
1623 
1624 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
1625 	if (error)
1626 		goto fail2;
1627 
1628 	/* Did we lose a race with mount?  */
1629 	if (b_dvp->v_mountedhere != NULL) {
1630 		error = EBUSY;
1631 		goto fail3;
1632 	}
1633 
1634 	/* Make sure the caller may read the directory.  */
1635 	error = VOP_ACCESS(b_dvp, VEXEC, cred);
1636 	if (error)
1637 		goto fail3;
1638 
1639 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
1640 	if (b_dirent != NULL) {
1641 		KASSERT(b_dirent->td_node != NULL);
1642 		/* We ruled out `.' earlier.  */
1643 		KASSERT(b_dirent->td_node != b_dnode);
1644 		/* We ruled out `..' earlier.  */
1645 		KASSERT(b_dirent->td_node !=
1646 		    b_dnode->tn_spec.tn_dir.tn_parent);
1647 		/* b is not an ancestor of a.  */
1648 		KASSERT(b_dirent->td_node != a_dnode);
1649 		/* But the source and target nodes might be the same.  */
1650 		if ((a_dirent == NULL) ||
1651 		    (a_dirent->td_node != b_dirent->td_node)) {
1652 			mutex_enter(&b_dirent->td_node->tn_vlock);
1653 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
1654 			    &b_vp);
1655 			if (error)
1656 				goto fail3;
1657 			KASSERT(b_vp->v_mount == mount);
1658 			KASSERT(a_vp != b_vp);
1659 			/* Refuse to rename (over) a mount point.  */
1660 			if ((b_vp->v_type == VDIR) &&
1661 			    (b_vp->v_mountedhere != NULL)) {
1662 				error = EBUSY;
1663 				goto fail4;
1664 			}
1665 		} else {
1666 			b_vp = a_vp;
1667 			vref(b_vp);
1668 		}
1669 	} else if (!b_missing_ok) {
1670 		error = ENOENT;
1671 		goto fail3;
1672 	} else {
1673 		b_vp = NULL;
1674 	}
1675 	KASSERT(b_vp != a_dvp);
1676 	KASSERT(b_vp != b_dvp);
1677 
1678 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
1679 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
1680 	KASSERT(a_missing_ok || (a_dirent != NULL));
1681 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
1682 	KASSERT(b_missing_ok || (b_dirent != NULL));
1683 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
1684 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
1685 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
1686 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
1687 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
1688 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
1689 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
1690 
1691 	*a_dirent_ret = a_dirent;
1692 	*b_dirent_ret = b_dirent;
1693 	*a_vp_ret = a_vp;
1694 	*b_vp_ret = b_vp;
1695 	return 0;
1696 
1697 fail4:	if (b_vp != NULL) {
1698 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
1699 		if (b_vp != a_vp)
1700 			vput(b_vp);
1701 		else
1702 			vrele(a_vp);
1703 	}
1704 
1705 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
1706 	VOP_UNLOCK(b_dvp);
1707 
1708 fail2:	if (a_vp != NULL) {
1709 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
1710 		vput(a_vp);
1711 	}
1712 
1713 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
1714 	VOP_UNLOCK(a_dvp);
1715 
1716 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
1717 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
1718 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
1719 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
1720 	return error;
1721 }
1722 
1723 /*
1724  * Shuffle the directory entries to move fvp from the directory fdvp
1725  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
1726  * we are overwriting a target node, it is tvp, and tde is its
1727  * directory entry in tdvp.
1728  *
1729  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
1730  */
1731 static void
1732 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
1733     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
1734     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
1735 {
1736 
1737 	KASSERT(tmpfs != NULL);
1738 	KASSERT(fdvp != NULL);
1739 	KASSERT(fde != NULL);
1740 	KASSERT(fvp != NULL);
1741 	KASSERT(tdvp != NULL);
1742 	KASSERT(fde->td_node != NULL);
1743 	KASSERT(fde->td_node->tn_vnode == fvp);
1744 	KASSERT((tde == NULL) == (tvp == NULL));
1745 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1746 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1747 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1748 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1749 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1750 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1751 
1752 	/*
1753 	 * If we are moving from one directory to another, detach the
1754 	 * source entry and reattach it to the target directory.
1755 	 */
1756 	if (fdvp != tdvp) {
1757 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
1758 		struct tmpfs_node *fnode = fde->td_node;
1759 		tmpfs_dir_detach(fdvp, fde);
1760 		tmpfs_dir_attach(tdvp, fde, fnode);
1761 	} else if (tvp == NULL) {
1762 		/*
1763 		 * We are changing the directory.  tmpfs_dir_attach and
1764 		 * tmpfs_dir_detach note the events for us, but for
1765 		 * this case we don't call them, so we must note the
1766 		 * event explicitly.
1767 		 */
1768 		VN_KNOTE(fdvp, NOTE_WRITE);
1769 	}
1770 
1771 	/*
1772 	 * If we are replacing an existing target entry, delete it.
1773 	 */
1774 	if (tde != NULL) {
1775 		KASSERT(tvp != NULL);
1776 		KASSERT(tde->td_node != NULL);
1777 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1778 		if (tde->td_node->tn_type == VDIR) {
1779 			KASSERT(tde->td_node->tn_size == 0);
1780 			KASSERT(tde->td_node->tn_links == 2);
1781 			/* Decrement the extra link count for `.' so
1782 			 * the vnode will be recycled when released.  */
1783 			tde->td_node->tn_links--;
1784 		}
1785 		tmpfs_dir_detach(tdvp, tde);
1786 		tmpfs_free_dirent(tmpfs, tde);
1787 	}
1788 }
1789 
1790 /*
1791  * Remove the entry de for the non-directory vp from the directory dvp.
1792  *
1793  * Everything must be locked and referenced.
1794  */
1795 static int
1796 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
1797     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
1798     kauth_cred_t cred)
1799 {
1800 	int error;
1801 
1802 	KASSERT(tmpfs != NULL);
1803 	KASSERT(dvp != NULL);
1804 	KASSERT(dnode != NULL);
1805 	KASSERT(de != NULL);
1806 	KASSERT(vp != NULL);
1807 	KASSERT(dnode->tn_vnode == dvp);
1808 	KASSERT(de->td_node != NULL);
1809 	KASSERT(de->td_node->tn_vnode == vp);
1810 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
1811 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
1812 
1813 	error = tmpfs_remove_check_possible(dnode, de->td_node);
1814 	if (error)
1815 		return error;
1816 
1817 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
1818 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp,
1819 	    error);
1820 	if (error)
1821 		return error;
1822 
1823 	tmpfs_dir_detach(dvp, de);
1824 	tmpfs_free_dirent(tmpfs, de);
1825 
1826 	return 0;
1827 }
1828 
1829 /*
1830  * Check whether a rename is possible independent of credentials.
1831  *
1832  * Everything must be locked and referenced.
1833  */
1834 static int
1835 tmpfs_rename_check_possible(
1836     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
1837     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
1838 {
1839 
1840 	KASSERT(fdnode != NULL);
1841 	KASSERT(fnode != NULL);
1842 	KASSERT(tdnode != NULL);
1843 	KASSERT(fdnode != fnode);
1844 	KASSERT(tdnode != tnode);
1845 	KASSERT(fnode != tnode);
1846 	KASSERT(fdnode->tn_vnode != NULL);
1847 	KASSERT(fnode->tn_vnode != NULL);
1848 	KASSERT(tdnode->tn_vnode != NULL);
1849 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
1850 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
1851 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
1852 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
1853 	KASSERT((tnode == NULL) ||
1854 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
1855 
1856 	/*
1857 	 * If fdnode is immutable, we can't write to it.  If fdnode is
1858 	 * append-only, the only change we can make is to add entries
1859 	 * to it.  If fnode is immutable, we can't change the links to
1860 	 * it.  If fnode is append-only...well, this is what UFS does.
1861 	 */
1862 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
1863 		return EPERM;
1864 
1865 	/*
1866 	 * If tdnode is immutable, we can't write to it.  If tdnode is
1867 	 * append-only, we can add entries, but we can't change
1868 	 * existing entries.
1869 	 */
1870 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
1871 		return EPERM;
1872 
1873 	/*
1874 	 * If tnode is immutable, we can't replace links to it.  If
1875 	 * tnode is append-only...well, this is what UFS does.
1876 	 */
1877 	if (tnode != NULL) {
1878 		KASSERT(tnode != NULL);
1879 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
1880 			return EPERM;
1881 	}
1882 
1883 	return 0;
1884 }
1885 
1886 /*
1887  * Check whether a rename is permitted given our credentials.
1888  *
1889  * Everything must be locked and referenced.
1890  */
1891 static int
1892 tmpfs_rename_check_permitted(kauth_cred_t cred,
1893     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
1894     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
1895 {
1896 	int error;
1897 
1898 	KASSERT(fdnode != NULL);
1899 	KASSERT(fnode != NULL);
1900 	KASSERT(tdnode != NULL);
1901 	KASSERT(fdnode != fnode);
1902 	KASSERT(tdnode != tnode);
1903 	KASSERT(fnode != tnode);
1904 	KASSERT(fdnode->tn_vnode != NULL);
1905 	KASSERT(fnode->tn_vnode != NULL);
1906 	KASSERT(tdnode->tn_vnode != NULL);
1907 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
1908 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
1909 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
1910 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
1911 	KASSERT((tnode == NULL) ||
1912 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
1913 
1914 	/*
1915 	 * We need to remove or change an entry in the source directory.
1916 	 */
1917 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred);
1918 	if (error)
1919 		return error;
1920 
1921 	/*
1922 	 * If we are changing directories, then we need to write to the
1923 	 * target directory to add or change an entry.  Also, if fnode
1924 	 * is a directory, we need to write to it to change its `..'
1925 	 * entry.
1926 	 */
1927 	if (fdnode != tdnode) {
1928 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred);
1929 		if (error)
1930 			return error;
1931 		if (fnode->tn_type == VDIR) {
1932 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred);
1933 			if (error)
1934 				return error;
1935 		}
1936 	}
1937 
1938 	error = tmpfs_check_sticky(cred, fdnode, fnode);
1939 	if (error)
1940 		return error;
1941 
1942 	error = tmpfs_check_sticky(cred, tdnode, tnode);
1943 	if (error)
1944 		return error;
1945 
1946 	return 0;
1947 }
1948 
1949 /*
1950  * Check whether removing node's entry in dnode is possible independent
1951  * of credentials.
1952  *
1953  * Everything must be locked and referenced.
1954  */
1955 static int
1956 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
1957 {
1958 
1959 	KASSERT(dnode != NULL);
1960 	KASSERT(dnode->tn_vnode != NULL);
1961 	KASSERT(node != NULL);
1962 	KASSERT(dnode != node);
1963 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
1964 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
1965 
1966 	/*
1967 	 * We want to delete the entry.  If dnode is immutable, we
1968 	 * can't write to it to delete the entry.  If dnode is
1969 	 * append-only, the only change we can make is to add entries,
1970 	 * so we can't delete entries.  If node is immutable, we can't
1971 	 * change the links to it, so we can't delete the entry.  If
1972 	 * node is append-only...well, this is what UFS does.
1973 	 */
1974 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
1975 		return EPERM;
1976 
1977 	return 0;
1978 }
1979 
1980 /*
1981  * Check whether removing node's entry in dnode is permitted given our
1982  * credentials.
1983  *
1984  * Everything must be locked and referenced.
1985  */
1986 static int
1987 tmpfs_remove_check_permitted(kauth_cred_t cred,
1988     struct tmpfs_node *dnode, struct tmpfs_node *node)
1989 {
1990 	int error;
1991 
1992 	KASSERT(dnode != NULL);
1993 	KASSERT(dnode->tn_vnode != NULL);
1994 	KASSERT(node != NULL);
1995 	KASSERT(dnode != node);
1996 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
1997 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
1998 
1999 	/*
2000 	 * Check whether we are permitted to write to the source
2001 	 * directory in order to delete an entry from it.
2002 	 */
2003 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred);
2004 	if (error)
2005 		return error;
2006 
2007 	error = tmpfs_check_sticky(cred, dnode, node);
2008 	if (error)
2009 		return error;
2010 
2011 	return 0;
2012 }
2013 
2014 /*
2015  * Check whether we may change an entry in a sticky directory.  If the
2016  * directory is sticky, the user must own either the directory or, if
2017  * it exists, the node, in order to change the entry.
2018  *
2019  * Everything must be locked and referenced.
2020  */
2021 static int
2022 tmpfs_check_sticky(kauth_cred_t cred,
2023     struct tmpfs_node *dnode, struct tmpfs_node *node)
2024 {
2025 
2026 	KASSERT(dnode != NULL);
2027 	KASSERT(dnode->tn_vnode != NULL);
2028 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2029 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2030 	KASSERT((node == NULL) ||
2031 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2032 
2033 	if (node == NULL)
2034 		return 0;
2035 
2036 	if (dnode->tn_mode & S_ISTXT) {
2037 		if (kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE,
2038 		    node->tn_vnode, dnode->tn_vnode, genfs_can_sticky(cred,
2039 		    dnode->tn_uid, node->tn_uid)) != 0)
2040 			return EPERM;
2041 	}
2042 
2043 	return 0;
2044 }
2045 
2046 int
2047 tmpfs_mkdir(void *v)
2048 {
2049 	struct vop_mkdir_args /* {
2050 		struct vnode		*a_dvp;
2051 		struct vnode		**a_vpp;
2052 		struct componentname	*a_cnp;
2053 		struct vattr		*a_vap;
2054 	} */ *ap = v;
2055 	vnode_t *dvp = ap->a_dvp;
2056 	vnode_t **vpp = ap->a_vpp;
2057 	struct componentname *cnp = ap->a_cnp;
2058 	struct vattr *vap = ap->a_vap;
2059 
2060 	KASSERT(vap->va_type == VDIR);
2061 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
2062 }
2063 
2064 int
2065 tmpfs_rmdir(void *v)
2066 {
2067 	struct vop_rmdir_args /* {
2068 		struct vnode		*a_dvp;
2069 		struct vnode		*a_vp;
2070 		struct componentname	*a_cnp;
2071 	} */ *ap = v;
2072 	vnode_t *dvp = ap->a_dvp;
2073 	vnode_t *vp = ap->a_vp;
2074 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
2075 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
2076 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
2077 	tmpfs_dirent_t *de;
2078 	int error = 0;
2079 
2080 	KASSERT(VOP_ISLOCKED(dvp));
2081 	KASSERT(VOP_ISLOCKED(vp));
2082 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
2083 
2084 	/*
2085 	 * Directories with more than two non-whiteout
2086 	 * entries ('.' and '..') cannot be removed.
2087 	 */
2088 	if (node->tn_size > 0) {
2089 		KASSERT(error == 0);
2090 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
2091 			if (de->td_node != TMPFS_NODE_WHITEOUT) {
2092 				error = ENOTEMPTY;
2093 				break;
2094 			}
2095 		}
2096 		if (error)
2097 			goto out;
2098 	}
2099 
2100 	/* Lookup the directory entry (check the cached hint first). */
2101 	de = tmpfs_dir_cached(node);
2102 	if (de == NULL) {
2103 		struct componentname *cnp = ap->a_cnp;
2104 		de = tmpfs_dir_lookup(dnode, cnp);
2105 	}
2106 	KASSERT(de && de->td_node == node);
2107 
2108 	/* Check flags to see if we are allowed to remove the directory. */
2109 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
2110 		error = EPERM;
2111 		goto out;
2112 	}
2113 
2114 	/* Decrement the link count for the virtual '.' entry. */
2115 	node->tn_links--;
2116 	node->tn_status |= TMPFS_NODE_STATUSALL;
2117 
2118 	/* Detach the directory entry from the directory. */
2119 	tmpfs_dir_detach(dvp, de);
2120 
2121 	/* Purge the cache for parent. */
2122 	cache_purge(dvp);
2123 
2124 	/*
2125 	 * Destroy the directory entry or replace it with a whiteout.
2126 	 * Note: the inode referred by it will not be destroyed
2127 	 * until the vnode is reclaimed.
2128 	 */
2129 	if (ap->a_cnp->cn_flags & DOWHITEOUT)
2130 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
2131 	else
2132 		tmpfs_free_dirent(tmp, de);
2133 
2134 	/* Destroy the whiteout entries from the node. */
2135 	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
2136 		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
2137 		tmpfs_dir_detach(vp, de);
2138 		tmpfs_free_dirent(tmp, de);
2139 	}
2140 
2141 	KASSERT(node->tn_links == 0);
2142 out:
2143 	/* Release the nodes. */
2144 	vput(dvp);
2145 	vput(vp);
2146 	return error;
2147 }
2148 
2149 int
2150 tmpfs_symlink(void *v)
2151 {
2152 	struct vop_symlink_args /* {
2153 		struct vnode		*a_dvp;
2154 		struct vnode		**a_vpp;
2155 		struct componentname	*a_cnp;
2156 		struct vattr		*a_vap;
2157 		char			*a_target;
2158 	} */ *ap = v;
2159 	vnode_t *dvp = ap->a_dvp;
2160 	vnode_t **vpp = ap->a_vpp;
2161 	struct componentname *cnp = ap->a_cnp;
2162 	struct vattr *vap = ap->a_vap;
2163 	char *target = ap->a_target;
2164 
2165 	KASSERT(vap->va_type == VLNK);
2166 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
2167 }
2168 
2169 int
2170 tmpfs_readdir(void *v)
2171 {
2172 	struct vop_readdir_args /* {
2173 		struct vnode	*a_vp;
2174 		struct uio	*a_uio;
2175 		kauth_cred_t	a_cred;
2176 		int		*a_eofflag;
2177 		off_t		**a_cookies;
2178 		int		*ncookies;
2179 	} */ *ap = v;
2180 	vnode_t *vp = ap->a_vp;
2181 	struct uio *uio = ap->a_uio;
2182 	int *eofflag = ap->a_eofflag;
2183 	off_t **cookies = ap->a_cookies;
2184 	int *ncookies = ap->a_ncookies;
2185 	off_t startoff, cnt;
2186 	tmpfs_node_t *node;
2187 	int error;
2188 
2189 	KASSERT(VOP_ISLOCKED(vp));
2190 
2191 	/* This operation only makes sense on directory nodes. */
2192 	if (vp->v_type != VDIR) {
2193 		return ENOTDIR;
2194 	}
2195 	node = VP_TO_TMPFS_DIR(vp);
2196 	startoff = uio->uio_offset;
2197 	cnt = 0;
2198 	if (node->tn_links == 0) {
2199 		error = 0;
2200 		goto out;
2201 	}
2202 
2203 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
2204 		error = tmpfs_dir_getdotdent(node, uio);
2205 		if (error != 0) {
2206 			if (error == -1)
2207 				error = 0;
2208 			goto out;
2209 		}
2210 		cnt++;
2211 	}
2212 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
2213 		error = tmpfs_dir_getdotdotdent(node, uio);
2214 		if (error != 0) {
2215 			if (error == -1)
2216 				error = 0;
2217 			goto out;
2218 		}
2219 		cnt++;
2220 	}
2221 	error = tmpfs_dir_getdents(node, uio, &cnt);
2222 	if (error == -1) {
2223 		error = 0;
2224 	}
2225 	KASSERT(error >= 0);
2226 out:
2227 	if (eofflag != NULL) {
2228 		*eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
2229 	}
2230 	if (error || cookies == NULL || ncookies == NULL) {
2231 		return error;
2232 	}
2233 
2234 	/* Update NFS-related variables, if any. */
2235 	off_t i, off = startoff;
2236 	tmpfs_dirent_t *de = NULL;
2237 
2238 	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
2239 	*ncookies = cnt;
2240 
2241 	for (i = 0; i < cnt; i++) {
2242 		KASSERT(off != TMPFS_DIRCOOKIE_EOF);
2243 		if (off != TMPFS_DIRCOOKIE_DOT) {
2244 			if (off == TMPFS_DIRCOOKIE_DOTDOT) {
2245 				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
2246 			} else if (de != NULL) {
2247 				de = TAILQ_NEXT(de, td_entries);
2248 			} else {
2249 				de = tmpfs_dir_lookupbycookie(node, off);
2250 				KASSERT(de != NULL);
2251 				de = TAILQ_NEXT(de, td_entries);
2252 			}
2253 			if (de == NULL) {
2254 				off = TMPFS_DIRCOOKIE_EOF;
2255 			} else {
2256 				off = tmpfs_dircookie(de);
2257 			}
2258 		} else {
2259 			off = TMPFS_DIRCOOKIE_DOTDOT;
2260 		}
2261 		(*cookies)[i] = off;
2262 	}
2263 	KASSERT(uio->uio_offset == off);
2264 	return error;
2265 }
2266 
2267 int
2268 tmpfs_readlink(void *v)
2269 {
2270 	struct vop_readlink_args /* {
2271 		struct vnode	*a_vp;
2272 		struct uio	*a_uio;
2273 		kauth_cred_t	a_cred;
2274 	} */ *ap = v;
2275 	vnode_t *vp = ap->a_vp;
2276 	struct uio *uio = ap->a_uio;
2277 	tmpfs_node_t *node;
2278 	int error;
2279 
2280 	KASSERT(VOP_ISLOCKED(vp));
2281 	KASSERT(uio->uio_offset == 0);
2282 	KASSERT(vp->v_type == VLNK);
2283 
2284 	node = VP_TO_TMPFS_NODE(vp);
2285 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
2286 	    MIN(node->tn_size, uio->uio_resid), uio);
2287 	node->tn_status |= TMPFS_NODE_ACCESSED;
2288 
2289 	return error;
2290 }
2291 
2292 int
2293 tmpfs_inactive(void *v)
2294 {
2295 	struct vop_inactive_args /* {
2296 		struct vnode *a_vp;
2297 		bool *a_recycle;
2298 	} */ *ap = v;
2299 	vnode_t *vp = ap->a_vp;
2300 	tmpfs_node_t *node;
2301 
2302 	KASSERT(VOP_ISLOCKED(vp));
2303 
2304 	node = VP_TO_TMPFS_NODE(vp);
2305 	*ap->a_recycle = (node->tn_links == 0);
2306 	VOP_UNLOCK(vp);
2307 
2308 	return 0;
2309 }
2310 
2311 int
2312 tmpfs_reclaim(void *v)
2313 {
2314 	struct vop_reclaim_args /* {
2315 		struct vnode *a_vp;
2316 	} */ *ap = v;
2317 	vnode_t *vp = ap->a_vp;
2318 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
2319 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2320 	bool racing;
2321 
2322 	/* Disassociate inode from vnode. */
2323 	mutex_enter(&node->tn_vlock);
2324 	node->tn_vnode = NULL;
2325 	vp->v_data = NULL;
2326 	/* Check if tmpfs_vnode_get() is racing with us. */
2327 	racing = TMPFS_NODE_RECLAIMING(node);
2328 	mutex_exit(&node->tn_vlock);
2329 
2330 	/*
2331 	 * If inode is not referenced, i.e. no links, then destroy it.
2332 	 * Note: if racing - inode is about to get a new vnode, leave it.
2333 	 */
2334 	if (node->tn_links == 0 && !racing) {
2335 		tmpfs_free_node(tmp, node);
2336 	}
2337 	return 0;
2338 }
2339 
2340 int
2341 tmpfs_pathconf(void *v)
2342 {
2343 	struct vop_pathconf_args /* {
2344 		struct vnode	*a_vp;
2345 		int		a_name;
2346 		register_t	*a_retval;
2347 	} */ *ap = v;
2348 	const int name = ap->a_name;
2349 	register_t *retval = ap->a_retval;
2350 	int error = 0;
2351 
2352 	switch (name) {
2353 	case _PC_LINK_MAX:
2354 		*retval = LINK_MAX;
2355 		break;
2356 	case _PC_NAME_MAX:
2357 		*retval = TMPFS_MAXNAMLEN;
2358 		break;
2359 	case _PC_PATH_MAX:
2360 		*retval = PATH_MAX;
2361 		break;
2362 	case _PC_PIPE_BUF:
2363 		*retval = PIPE_BUF;
2364 		break;
2365 	case _PC_CHOWN_RESTRICTED:
2366 		*retval = 1;
2367 		break;
2368 	case _PC_NO_TRUNC:
2369 		*retval = 1;
2370 		break;
2371 	case _PC_SYNC_IO:
2372 		*retval = 1;
2373 		break;
2374 	case _PC_FILESIZEBITS:
2375 		*retval = sizeof(off_t) * CHAR_BIT;
2376 		break;
2377 	default:
2378 		error = EINVAL;
2379 	}
2380 	return error;
2381 }
2382 
2383 int
2384 tmpfs_advlock(void *v)
2385 {
2386 	struct vop_advlock_args /* {
2387 		struct vnode	*a_vp;
2388 		void *		a_id;
2389 		int		a_op;
2390 		struct flock	*a_fl;
2391 		int		a_flags;
2392 	} */ *ap = v;
2393 	vnode_t *vp = ap->a_vp;
2394 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2395 
2396 	return lf_advlock(v, &node->tn_lockf, node->tn_size);
2397 }
2398 
2399 int
2400 tmpfs_getpages(void *v)
2401 {
2402 	struct vop_getpages_args /* {
2403 		struct vnode *a_vp;
2404 		voff_t a_offset;
2405 		struct vm_page **a_m;
2406 		int *a_count;
2407 		int a_centeridx;
2408 		vm_prot_t a_access_type;
2409 		int a_advice;
2410 		int a_flags;
2411 	} */ * const ap = v;
2412 	vnode_t *vp = ap->a_vp;
2413 	const voff_t offset = ap->a_offset;
2414 	struct vm_page **pgs = ap->a_m;
2415 	const int centeridx = ap->a_centeridx;
2416 	const vm_prot_t access_type = ap->a_access_type;
2417 	const int advice = ap->a_advice;
2418 	const int flags = ap->a_flags;
2419 	int error, npages = *ap->a_count;
2420 	tmpfs_node_t *node;
2421 	struct uvm_object *uobj;
2422 
2423 	KASSERT(vp->v_type == VREG);
2424 	KASSERT(mutex_owned(vp->v_interlock));
2425 
2426 	node = VP_TO_TMPFS_NODE(vp);
2427 	uobj = node->tn_spec.tn_reg.tn_aobj;
2428 
2429 	/*
2430 	 * Currently, PGO_PASTEOF is not supported.
2431 	 */
2432 	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
2433 		if ((flags & PGO_LOCKED) == 0)
2434 			mutex_exit(vp->v_interlock);
2435 		return EINVAL;
2436 	}
2437 
2438 	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
2439 		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
2440 	}
2441 
2442 	if ((flags & PGO_LOCKED) != 0)
2443 		return EBUSY;
2444 
2445 	if ((flags & PGO_NOTIMESTAMP) == 0) {
2446 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
2447 			node->tn_status |= TMPFS_NODE_ACCESSED;
2448 
2449 		if ((access_type & VM_PROT_WRITE) != 0) {
2450 			node->tn_status |= TMPFS_NODE_MODIFIED;
2451 			if (vp->v_mount->mnt_flag & MNT_RELATIME)
2452 				node->tn_status |= TMPFS_NODE_ACCESSED;
2453 		}
2454 	}
2455 
2456 	/*
2457 	 * Invoke the pager.
2458 	 *
2459 	 * Clean the array of pages before.  XXX: PR/32166
2460 	 * Note that vnode lock is shared with underlying UVM object.
2461 	 */
2462 	if (pgs) {
2463 		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
2464 	}
2465 	KASSERT(vp->v_interlock == uobj->vmobjlock);
2466 
2467 	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
2468 	    access_type, advice, flags | PGO_ALLPAGES);
2469 
2470 #if defined(DEBUG)
2471 	if (!error && pgs) {
2472 		for (int i = 0; i < npages; i++) {
2473 			KASSERT(pgs[i] != NULL);
2474 		}
2475 	}
2476 #endif
2477 	return error;
2478 }
2479 
2480 int
2481 tmpfs_putpages(void *v)
2482 {
2483 	struct vop_putpages_args /* {
2484 		struct vnode *a_vp;
2485 		voff_t a_offlo;
2486 		voff_t a_offhi;
2487 		int a_flags;
2488 	} */ * const ap = v;
2489 	vnode_t *vp = ap->a_vp;
2490 	const voff_t offlo = ap->a_offlo;
2491 	const voff_t offhi = ap->a_offhi;
2492 	const int flags = ap->a_flags;
2493 	tmpfs_node_t *node;
2494 	struct uvm_object *uobj;
2495 	int error;
2496 
2497 	KASSERT(mutex_owned(vp->v_interlock));
2498 
2499 	if (vp->v_type != VREG) {
2500 		mutex_exit(vp->v_interlock);
2501 		return 0;
2502 	}
2503 
2504 	node = VP_TO_TMPFS_NODE(vp);
2505 	uobj = node->tn_spec.tn_reg.tn_aobj;
2506 
2507 	KASSERT(vp->v_interlock == uobj->vmobjlock);
2508 	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
2509 
2510 	/* XXX mtime */
2511 
2512 	return error;
2513 }
2514 
2515 int
2516 tmpfs_whiteout(void *v)
2517 {
2518 	struct vop_whiteout_args /* {
2519 		struct vnode		*a_dvp;
2520 		struct componentname	*a_cnp;
2521 		int			a_flags;
2522 	} */ *ap = v;
2523 	vnode_t *dvp = ap->a_dvp;
2524 	struct componentname *cnp = ap->a_cnp;
2525 	const int flags = ap->a_flags;
2526 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
2527 	tmpfs_dirent_t *de;
2528 	int error;
2529 
2530 	switch (flags) {
2531 	case LOOKUP:
2532 		break;
2533 	case CREATE:
2534 		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
2535 		    cnp->cn_namelen, &de);
2536 		if (error)
2537 			return error;
2538 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
2539 		break;
2540 	case DELETE:
2541 		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
2542 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp);
2543 		if (de == NULL)
2544 			return ENOENT;
2545 		tmpfs_dir_detach(dvp, de);
2546 		tmpfs_free_dirent(tmp, de);
2547 		break;
2548 	}
2549 	return 0;
2550 }
2551 
2552 int
2553 tmpfs_print(void *v)
2554 {
2555 	struct vop_print_args /* {
2556 		struct vnode	*a_vp;
2557 	} */ *ap = v;
2558 	vnode_t *vp = ap->a_vp;
2559 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2560 
2561 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
2562 	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX ", status 0x%x",
2563 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
2564 	    node->tn_gid, (uintmax_t)node->tn_size, node->tn_status);
2565 	if (vp->v_type == VFIFO) {
2566 		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
2567 	}
2568 	printf("\n");
2569 	return 0;
2570 }
2571