xref: /openbsd-src/sys/tmpfs/tmpfs_vnops.c (revision 505ee9ea3b177e2387d907a91ca7da069f3f14d8)
1 /*	$OpenBSD: tmpfs_vnops.c,v 1.43 2020/07/15 07:27:07 gerhard Exp $	*/
2 /*	$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $	*/
3 
4 /*
5  * Copyright (c) 2005, 2006, 2007, 2012 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Taylor R Campbell.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/event.h>
42 #include <sys/namei.h>
43 #include <sys/stat.h>
44 #include <sys/uio.h>
45 #include <sys/unistd.h>
46 #include <sys/vnode.h>
47 #include <sys/lockf.h>
48 #include <sys/poll.h>
49 #include <sys/file.h>
50 
51 #include <miscfs/fifofs/fifo.h>
52 #include <tmpfs/tmpfs_vnops.h>
53 #include <tmpfs/tmpfs.h>
54 
55 int tmpfs_kqfilter(void *v);
56 
57 /*
58  * vnode operations vector used for files stored in a tmpfs file system.
59  */
60 const struct vops tmpfs_vops = {
61 	.vop_lookup	= tmpfs_lookup,
62 	.vop_create	= tmpfs_create,
63 	.vop_mknod	= tmpfs_mknod,
64 	.vop_open	= tmpfs_open,
65 	.vop_close	= tmpfs_close,
66 	.vop_access	= tmpfs_access,
67 	.vop_getattr	= tmpfs_getattr,
68 	.vop_setattr	= tmpfs_setattr,
69 	.vop_read	= tmpfs_read,
70 	.vop_write	= tmpfs_write,
71 	.vop_ioctl	= tmpfs_ioctl,
72 	.vop_poll	= tmpfs_poll,
73 	.vop_kqfilter	= tmpfs_kqfilter,
74 	.vop_revoke	= vop_generic_revoke,
75 	.vop_fsync	= tmpfs_fsync,
76 	.vop_remove	= tmpfs_remove,
77 	.vop_link	= tmpfs_link,
78 	.vop_rename	= tmpfs_rename,
79 	.vop_mkdir	= tmpfs_mkdir,
80 	.vop_rmdir	= tmpfs_rmdir,
81 	.vop_symlink	= tmpfs_symlink,
82 	.vop_readdir	= tmpfs_readdir,
83 	.vop_readlink	= tmpfs_readlink,
84 	.vop_abortop	= vop_generic_abortop,
85 	.vop_inactive	= tmpfs_inactive,
86 	.vop_reclaim	= tmpfs_reclaim,
87 	.vop_lock	= tmpfs_lock,
88 	.vop_unlock	= tmpfs_unlock,
89 	.vop_bmap	= vop_generic_bmap,
90 	.vop_strategy	= tmpfs_strategy,
91 	.vop_print	= tmpfs_print,
92 	.vop_islocked	= tmpfs_islocked,
93 	.vop_pathconf	= tmpfs_pathconf,
94 	.vop_advlock	= tmpfs_advlock,
95 	.vop_bwrite	= tmpfs_bwrite,
96 };
97 
98 /*
99  * tmpfs_lookup: path name traversal routine.
100  *
101  * Arguments: dvp (directory being searched), vpp (result),
102  * cnp (component name - path).
103  *
104  * => Caller holds a reference and lock on dvp.
105  * => We return looked-up vnode (vpp) locked, with a reference held.
106  */
107 int
108 tmpfs_lookup(void *v)
109 {
110 	struct vop_lookup_args /* {
111 		struct vnode *a_dvp;
112 		struct vnode **a_vpp;
113 		struct componentname *a_cnp;
114 	} */ *ap = v;
115 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
116 	struct componentname *cnp = ap->a_cnp;
117 	struct ucred *cred = cnp->cn_cred;
118 	const int lastcn = (cnp->cn_flags & ISLASTCN) != 0;
119 	const int lockparent = (cnp->cn_flags & LOCKPARENT) != 0;
120 	tmpfs_node_t *dnode, *tnode;
121 	tmpfs_dirent_t *de;
122 	int cachefound;
123 	int error;
124 
125 	KASSERT(VOP_ISLOCKED(dvp));
126 
127 	dnode = VP_TO_TMPFS_DIR(dvp);
128 	cnp->cn_flags &= ~PDIRUNLOCK;
129 	*vpp = NULL;
130 
131 	/* Check accessibility of directory. */
132 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
133 	if (error) {
134 		goto out;
135 	}
136 
137 	/*
138 	 * If requesting the last path component on a read-only file system
139 	 * with a write operation, deny it.
140 	 */
141 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
142 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
143 		error = EROFS;
144 		goto out;
145 	}
146 
147 	/*
148 	 * Avoid doing a linear scan of the directory if the requested
149 	 * directory/name couple is already in the cache.
150 	 */
151 	cachefound = cache_lookup(dvp, vpp, cnp);
152 	if (cachefound == ENOENT /* && *vpp == NULLVP */)
153 		return ENOENT; /* Negative cache hit. */
154 	else if (cachefound != -1)
155 		return 0; /* Found in cache. */
156 
157 	if (cnp->cn_flags & ISDOTDOT) {
158 		tmpfs_node_t *pnode;
159 
160 		/*
161 		 * Lookup of ".." case.
162 		 */
163 		if (lastcn) {
164 			if (cnp->cn_nameiop == RENAME) {
165 				error = EINVAL;
166 				goto out;
167 			}
168 			if (cnp->cn_nameiop == DELETE) {
169 				/* Keep the name for tmpfs_rmdir(). */
170 				cnp->cn_flags |= SAVENAME;
171 			}
172 		}
173 		KASSERT(dnode->tn_type == VDIR);
174 		pnode = dnode->tn_spec.tn_dir.tn_parent;
175 		if (pnode == NULL) {
176 			error = ENOENT;
177 			goto out;
178 		}
179 
180 		/*
181 		 * Lock the parent tn_nlock before releasing the vnode lock,
182 		 * and thus prevents parent from disappearing.
183 		 */
184 		rw_enter_write(&pnode->tn_nlock);
185 		VOP_UNLOCK(dvp);
186 
187 		/*
188 		 * Get a vnode of the '..' entry and re-acquire the lock.
189 		 * Release the tn_nlock.
190 		 */
191 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
192 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
193 		goto out;
194 
195 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
196 		/*
197 		 * Lookup of "." case.
198 		 */
199 		if (lastcn && cnp->cn_nameiop == RENAME) {
200 			error = EISDIR;
201 			goto out;
202 		}
203 		vref(dvp);
204 		*vpp = dvp;
205 		error = 0;
206 		goto done;
207 	}
208 
209 	/*
210 	 * Other lookup cases: perform directory scan.
211 	 */
212 	de = tmpfs_dir_lookup(dnode, cnp);
213 	if (de == NULL) {
214 		/*
215 		 * The entry was not found in the directory.  This is valid
216 		 * if we are creating or renaming an entry and are working
217 		 * on the last component of the path name.
218 		 */
219 		if (lastcn && (cnp->cn_nameiop == CREATE ||
220 		    cnp->cn_nameiop == RENAME)) {
221 			error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
222 			if (error) {
223 				goto out;
224 			}
225 			/*
226 			 * We are creating an entry in the file system, so
227 			 * save its name for further use by tmpfs_create().
228 			 */
229 			cnp->cn_flags |= SAVENAME;
230 			error = EJUSTRETURN;
231 		} else {
232 			error = ENOENT;
233 		}
234 		goto done;
235 	}
236 
237 	tnode = de->td_node;
238 
239 	/*
240 	 * If it is not the last path component and found a non-directory
241 	 * or non-link entry (which may itself be pointing to a directory),
242 	 * raise an error.
243 	 */
244 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
245 		error = ENOTDIR;
246 		goto out;
247 	}
248 
249 	/* Check the permissions. */
250 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
251 		error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
252 		if (error)
253 			goto out;
254 
255 		/*
256 		 * If not root and directory is sticky, check for permission
257 		 * on directory or on file. This implements append-only
258 		 * directories.
259 		 */
260 		if ((dnode->tn_mode & S_ISTXT) != 0) {
261 			if (cred->cr_uid != 0 &&
262 			    cred->cr_uid != dnode->tn_uid &&
263 			    cred->cr_uid != tnode->tn_uid) {
264 				error = EPERM;
265 				goto out;
266 			}
267 		}
268 
269 		/*
270 		 * XXX pedro: We might need cn_nameptr later in tmpfs_remove()
271 		 * or tmpfs_rmdir() for a tmpfs_dir_lookup(). We should really
272 		 * get rid of SAVENAME at some point.
273 		 */
274 		if (cnp->cn_nameiop == DELETE)
275 			cnp->cn_flags |= SAVENAME;
276 	}
277 
278 	/* Get a vnode for the matching entry. */
279 	rw_enter_write(&tnode->tn_nlock);
280 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
281 done:
282 	/*
283 	 * Cache the result, unless request was for creation (as it does
284 	 * not improve the performance).
285 	 */
286 	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
287 		cache_enter(dvp, *vpp, cnp);
288 	}
289 out:
290 	/*
291 	 * If (1) we succeded, (2) found a distinct vnode to return and (3) were
292 	 * either explicitly told to keep the parent locked or are in the
293 	 * middle of a lookup, unlock the parent vnode.
294 	 */
295 	if ((error == 0 || error == EJUSTRETURN) && /* (1) */
296 	    *vpp != dvp &&			    /* (2) */
297 	    (!lockparent || !lastcn)) {		    /* (3) */
298 		VOP_UNLOCK(dvp);
299 		cnp->cn_flags |= PDIRUNLOCK;
300 	} else
301 		KASSERT(VOP_ISLOCKED(dvp));
302 
303 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
304 
305 	return error;
306 }
307 
308 int
309 tmpfs_create(void *v)
310 {
311 	struct vop_create_args /* {
312 		struct vnode		*a_dvp;
313 		struct vnode		**a_vpp;
314 		struct componentname	*a_cnp;
315 		struct vattr		*a_vap;
316 	} */ *ap = v;
317 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
318 	struct componentname *cnp = ap->a_cnp;
319 	struct vattr *vap = ap->a_vap;
320 
321 	KASSERT(VOP_ISLOCKED(dvp));
322 	KASSERT(cnp->cn_flags & HASBUF);
323 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
324 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
325 }
326 
327 int
328 tmpfs_mknod(void *v)
329 {
330 	struct vop_mknod_args /* {
331 		struct vnode		*a_dvp;
332 		struct vnode		**a_vpp;
333 		struct componentname	*a_cnp;
334 		struct vattr		*a_vap;
335 	} */ *ap = v;
336 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
337 	struct componentname *cnp = ap->a_cnp;
338 	struct vattr *vap = ap->a_vap;
339 	enum vtype vt = vap->va_type;
340 	int error;
341 
342 	if (vt != VBLK && vt != VCHR && vt != VFIFO)
343 		return EINVAL;
344 
345 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
346 
347 	if (error == 0)
348 		vput(*vpp);
349 
350 	return error;
351 }
352 
353 int
354 tmpfs_open(void *v)
355 {
356 	struct vop_open_args /* {
357 		struct vnode	*a_vp;
358 		int		a_mode;
359 		kauth_cred_t	a_cred;
360 	} */ *ap = v;
361 	struct vnode *vp = ap->a_vp;
362 	mode_t mode = ap->a_mode;
363 	tmpfs_node_t *node;
364 
365 	KASSERT(VOP_ISLOCKED(vp));
366 
367 	node = VP_TO_TMPFS_NODE(vp);
368 	if (node->tn_links < 1) {
369 		/*
370 		 * The file is still active, but all its names have been
371 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
372 		 * any more, as it is about to be destroyed.
373 		 */
374 		return ENOENT;
375 	}
376 
377 	/* If the file is marked append-only, deny write requests. */
378 	if ((node->tn_flags & APPEND) != 0 &&
379 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
380 		return EPERM;
381 	}
382 	return 0;
383 }
384 
385 int
386 tmpfs_close(void *v)
387 {
388 #ifdef DIAGNOSTIC
389 	struct vop_close_args /* {
390 		struct vnode	*a_vp;
391 		int		a_fflag;
392 		kauth_cred_t	a_cred;
393 	} */ *ap = v;
394 	struct vnode *vp = ap->a_vp;
395 
396 	KASSERT(VOP_ISLOCKED(vp));
397 #endif
398 	return 0;
399 }
400 
401 int
402 tmpfs_access(void *v)
403 {
404 	struct vop_access_args /* {
405 		struct vnode	*a_vp;
406 		int		a_mode;
407 		kauth_cred_t	a_cred;
408 	} */ *ap = v;
409 	struct vnode *vp = ap->a_vp;
410 	mode_t mode = ap->a_mode;
411 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
412 	const int writing = (mode & VWRITE) != 0;
413 
414 	KASSERT(VOP_ISLOCKED(vp));
415 
416 	/* Possible? */
417 	switch (vp->v_type) {
418 	case VDIR:
419 	case VLNK:
420 	case VREG:
421 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
422 			return EROFS;
423 		}
424 		break;
425 	case VBLK:
426 	case VCHR:
427 	case VSOCK:
428 	case VFIFO:
429 		break;
430 	default:
431 		return EINVAL;
432 	}
433 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
434 		return EPERM;
435 	}
436 
437 	return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
438 	    mode, ap->a_cred));
439 }
440 
441 int
442 tmpfs_getattr(void *v)
443 {
444 	struct vop_getattr_args /* {
445 		struct vnode	*a_vp;
446 		struct vattr	*a_vap;
447 		kauth_cred_t	a_cred;
448 	} */ *ap = v;
449 	struct vnode *vp = ap->a_vp;
450 	struct vattr *vap = ap->a_vap;
451 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
452 
453 	vattr_null(vap);
454 
455 	vap->va_type = vp->v_type;
456 	vap->va_mode = node->tn_mode;
457 	vap->va_nlink = node->tn_links;
458 	vap->va_uid = node->tn_uid;
459 	vap->va_gid = node->tn_gid;
460 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
461 	vap->va_fileid = node->tn_id;
462 	vap->va_size = node->tn_size;
463 	vap->va_blocksize = PAGE_SIZE;
464 	vap->va_atime = node->tn_atime;
465 	vap->va_mtime = node->tn_mtime;
466 	vap->va_ctime = node->tn_ctime;
467 	/* vap->va_birthtime = node->tn_birthtime; */
468 	vap->va_gen = TMPFS_NODE_GEN(node);
469 	vap->va_flags = node->tn_flags;
470 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
471 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
472 	vap->va_bytes = round_page(node->tn_size);
473 	vap->va_filerev = VNOVAL;
474 	vap->va_vaflags = 0;
475 	vap->va_spare = VNOVAL; /* XXX */
476 
477 	return 0;
478 }
479 
480 #define GOODTIME(tv)	((tv)->tv_nsec != VNOVAL)
481 /* XXX Should this operation be atomic?  I think it should, but code in
482  * XXX other places (e.g., ufs) doesn't seem to be... */
483 int
484 tmpfs_setattr(void *v)
485 {
486 	struct vop_setattr_args /* {
487 		struct vnode	*a_vp;
488 		struct vattr	*a_vap;
489 		kauth_cred_t	a_cred;
490 	} */ *ap = v;
491 	struct vnode *vp = ap->a_vp;
492 	struct vattr *vap = ap->a_vap;
493 	struct ucred *cred = ap->a_cred;
494 	struct proc *p = curproc;
495 	int error = 0;
496 
497 	KASSERT(VOP_ISLOCKED(vp));
498 
499 	/* Abort if any unsettable attribute is given. */
500 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
501 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
502 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
503 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
504 	    vap->va_bytes != VNOVAL) {
505 		return EINVAL;
506 	}
507 	if (error == 0 && (vap->va_flags != VNOVAL))
508 		error = tmpfs_chflags(vp, vap->va_flags, cred, p);
509 
510 	if (error == 0 && (vap->va_size != VNOVAL))
511 		error = tmpfs_chsize(vp, vap->va_size, cred, p);
512 
513 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
514 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
515 
516 	if (error == 0 && (vap->va_mode != VNOVAL))
517 		error = tmpfs_chmod(vp, vap->va_mode, cred, p);
518 
519 	if (error == 0 && ((vap->va_vaflags & VA_UTIMES_CHANGE)
520 	    || GOODTIME(&vap->va_atime)
521 	    || GOODTIME(&vap->va_mtime)))
522 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
523 		    vap->va_vaflags, cred, p);
524 
525 	return error;
526 }
527 
528 int
529 tmpfs_read(void *v)
530 {
531 	struct vop_read_args /* {
532 		struct vnode *a_vp;
533 		struct uio *a_uio;
534 		int a_ioflag;
535 		struct ucred *a_cred;
536 	} */ *ap = v;
537 	struct vnode *vp = ap->a_vp;
538 	struct uio *uio = ap->a_uio;
539 	/* const int ioflag = ap->a_ioflag; */
540 	tmpfs_node_t *node;
541 	int error;
542 
543 	KASSERT(VOP_ISLOCKED(vp));
544 
545 	if (vp->v_type != VREG) {
546 		return EISDIR;
547 	}
548 	if (uio->uio_offset < 0) {
549 		return EINVAL;
550 	}
551 	if (uio->uio_resid == 0)
552 		return 0;
553 
554 	node = VP_TO_TMPFS_NODE(vp);
555 	error = 0;
556 
557 	while (error == 0 && uio->uio_resid > 0) {
558 		vsize_t len;
559 
560 		if (node->tn_size <= uio->uio_offset) {
561 			break;
562 		}
563 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
564 		if (len == 0) {
565 			break;
566 		}
567 		error = tmpfs_uiomove(node, uio, len);
568 	}
569 
570 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
571 		tmpfs_update(node, TMPFS_NODE_ACCESSED);
572 
573 	return error;
574 }
575 
576 int
577 tmpfs_write(void *v)
578 {
579 	struct vop_write_args /* {
580 		struct vnode	*a_vp;
581 		struct uio	*a_uio;
582 		int		a_ioflag;
583 		kauth_cred_t	a_cred;
584 	} */ *ap = v;
585 	struct vnode *vp = ap->a_vp;
586 	struct uio *uio = ap->a_uio;
587 	const int ioflag = ap->a_ioflag;
588 	tmpfs_node_t *node;
589 	off_t oldsize;
590 	ssize_t overrun;
591 	int extended;
592 	int error;
593 
594 	KASSERT(VOP_ISLOCKED(vp));
595 
596 	node = VP_TO_TMPFS_NODE(vp);
597 	oldsize = node->tn_size;
598 
599 	if (vp->v_type != VREG)
600 		return (EINVAL);
601 
602 	if (uio->uio_resid == 0)
603 		return (0);
604 
605 	if (ioflag & IO_APPEND) {
606 		uio->uio_offset = node->tn_size;
607 	}
608 
609 	if (uio->uio_offset < 0 ||
610 	    (u_int64_t)uio->uio_offset + uio->uio_resid > LLONG_MAX)
611 		return (EFBIG);
612 
613 	/* do the filesize rlimit check */
614 	if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
615 		return (error);
616 
617 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
618 	if (extended) {
619 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
620 		if (error)
621 			goto out;
622 	}
623 
624 	error = 0;
625 	while (error == 0 && uio->uio_resid > 0) {
626 		vsize_t len;
627 
628 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
629 		if (len == 0) {
630 			break;
631 		}
632 		error = tmpfs_uiomove(node, uio, len);
633 	}
634 	if (error) {
635 		(void)tmpfs_reg_resize(vp, oldsize);
636 	}
637 
638 	tmpfs_update(node, TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
639 	if (extended)
640 		VN_KNOTE(vp, NOTE_WRITE | NOTE_EXTEND);
641 	else
642 		VN_KNOTE(vp, NOTE_WRITE);
643 out:
644 	if (error) {
645 		KASSERT(oldsize == node->tn_size);
646 	} else {
647 		KASSERT(uio->uio_resid == 0);
648 
649 		/* correct the result for writes clamped by vn_fsizechk() */
650 		uio->uio_resid += overrun;
651 
652 	}
653 	return error;
654 }
655 
656 int
657 tmpfs_fsync(void *v)
658 {
659 #ifdef DIAGNOSTIC
660 	struct vop_fsync_args /* {
661 		struct vnode *a_vp;
662 		struct ucred *a_cred;
663 		int a_flags;
664 		off_t a_offlo;
665 		off_t a_offhi;
666 		struct lwp *a_l;
667 	} */ *ap = v;
668 	struct vnode *vp = ap->a_vp;
669 
670 	/* Nothing to do.  Just update. */
671 	KASSERT(VOP_ISLOCKED(vp));
672 #endif
673 	return 0;
674 }
675 
676 /*
677  * tmpfs_remove: unlink a file.
678  *
679  * => Both directory (dvp) and file (vp) are locked.
680  * => We unlock and drop the reference on both.
681  */
682 int
683 tmpfs_remove(void *v)
684 {
685 	struct vop_remove_args /* {
686 		struct vnode *a_dvp;
687 		struct vnode *a_vp;
688 		struct componentname *a_cnp;
689 	} */ *ap = v;
690 	struct vnode *dvp = ap->a_dvp, *vp = ap->a_vp;
691 	struct componentname *cnp = ap->a_cnp;
692 	tmpfs_node_t *dnode, *node;
693 	tmpfs_dirent_t *de;
694 	int error;
695 
696 	KASSERT(VOP_ISLOCKED(dvp));
697 	KASSERT(VOP_ISLOCKED(vp));
698 	KASSERT(cnp->cn_flags & HASBUF);
699 
700 	if (vp->v_type == VDIR) {
701 		error = EPERM;
702 		goto out;
703 	}
704 
705 	dnode = VP_TO_TMPFS_NODE(dvp);
706 	node = VP_TO_TMPFS_NODE(vp);
707 
708 	/* Files marked as immutable or append-only cannot be deleted. */
709 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
710 		error = EPERM;
711 		goto out;
712 	}
713 
714 	/*
715 	 * Likewise, files residing on directories marked as append-only cannot
716 	 * be deleted.
717 	 */
718 	if (dnode->tn_flags & APPEND) {
719 		error = EPERM;
720 		goto out;
721 	}
722 
723 	/* Lookup the directory entry (check the cached hint first). */
724 	de = tmpfs_dir_cached(node);
725 	if (de == NULL) {
726 		de = tmpfs_dir_lookup(dnode, cnp);
727 	}
728 
729 	KASSERT(de && de->td_node == node);
730 
731 	/*
732 	 * Remove the entry from the directory (drops the link count) and
733 	 * destroy it.
734 	 * Note: the inode referred by it will not be destroyed
735 	 * until the vnode is reclaimed/recycled.
736 	 */
737 	tmpfs_dir_detach(dnode, de);
738 	tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
739 	if (node->tn_links > 0)  {
740 		/* We removed a hard link. */
741 		tmpfs_update(node, TMPFS_NODE_CHANGED);
742 	}
743 	error = 0;
744 out:
745 	pool_put(&namei_pool, cnp->cn_pnbuf);
746 	/* Drop the references and unlock the vnodes. */
747 	vput(vp);
748 	if (dvp == vp) {
749 		vrele(dvp);
750 	} else {
751 		vput(dvp);
752 	}
753 	return error;
754 }
755 
756 /*
757  * tmpfs_link: create a hard link.
758  */
759 int
760 tmpfs_link(void *v)
761 {
762 	struct vop_link_args /* {
763 		struct vnode *a_dvp;
764 		struct vnode *a_vp;
765 		struct componentname *a_cnp;
766 	} */ *ap = v;
767 	struct vnode *dvp = ap->a_dvp;
768 	struct vnode *vp = ap->a_vp;
769 	struct componentname *cnp = ap->a_cnp;
770 	tmpfs_node_t *dnode, *node;
771 	tmpfs_dirent_t *de;
772 	int error;
773 
774 	KASSERT(VOP_ISLOCKED(dvp));
775 
776 	if (vp->v_type == VDIR) {
777 		VOP_ABORTOP(dvp, cnp);
778 		vput(dvp);
779 		return EPERM;
780 	}
781 
782 	KASSERT(dvp != vp);
783 
784 	if (dvp->v_mount != vp->v_mount) {
785 		VOP_ABORTOP(dvp, cnp);
786 		vput(dvp);
787 		return EXDEV;
788 	}
789 
790 	dnode = VP_TO_TMPFS_DIR(dvp);
791 	node = VP_TO_TMPFS_NODE(vp);
792 
793 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
794 
795 	/* Check for maximum number of links limit. */
796 	if (node->tn_links == LINK_MAX) {
797 		error = EMLINK;
798 		goto out;
799 	}
800 	KASSERT(node->tn_links < LINK_MAX);
801 
802 	/* We cannot create links of files marked immutable or append-only. */
803 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
804 		error = EPERM;
805 		goto out;
806 	}
807 
808 	if (TMPFS_DIRSEQ_FULL(dnode)) {
809 		error = ENOSPC;
810 		goto out;
811 	}
812 
813 	/* Allocate a new directory entry to represent the inode. */
814 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
815 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
816 	if (error) {
817 		goto out;
818 	}
819 
820 	/*
821 	 * Insert the entry into the directory.
822 	 * It will increase the inode link count.
823 	 */
824 	tmpfs_dir_attach(dnode, de, node);
825 
826 	/* Update the timestamps and trigger the event. */
827 	if (node->tn_vnode) {
828 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
829 	}
830 	tmpfs_update(node, TMPFS_NODE_CHANGED);
831 	error = 0;
832 out:
833 	pool_put(&namei_pool, cnp->cn_pnbuf);
834 	VOP_UNLOCK(vp);
835 	vput(dvp);
836 	return error;
837 }
838 
839 int
840 tmpfs_mkdir(void *v)
841 {
842 	struct vop_mkdir_args /* {
843 		struct vnode		*a_dvp;
844 		struct vnode		**a_vpp;
845 		struct componentname	*a_cnp;
846 		struct vattr		*a_vap;
847 	} */ *ap = v;
848 	struct vnode *dvp = ap->a_dvp;
849 	struct vnode **vpp = ap->a_vpp;
850 	struct componentname *cnp = ap->a_cnp;
851 	struct vattr *vap = ap->a_vap;
852 	int error;
853 
854 	KASSERT(vap->va_type == VDIR);
855 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
856 	vput(dvp);
857 	return error;
858 }
859 
860 int
861 tmpfs_rmdir(void *v)
862 {
863 	struct vop_rmdir_args /* {
864 		struct vnode		*a_dvp;
865 		struct vnode		*a_vp;
866 		struct componentname	*a_cnp;
867 	} */ *ap = v;
868 	struct vnode *dvp = ap->a_dvp;
869 	struct vnode *vp = ap->a_vp;
870 	struct componentname *cnp = ap->a_cnp;
871 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
872 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
873 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
874 	tmpfs_dirent_t *de;
875 	int error = 0;
876 
877 	KASSERT(VOP_ISLOCKED(dvp));
878 	KASSERT(VOP_ISLOCKED(vp));
879 	KASSERT(cnp->cn_flags & HASBUF);
880 
881 	if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
882 	    cnp->cn_nameptr[1] == '.') {
883 		error = ENOTEMPTY;
884 		goto out;
885 	}
886 
887 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
888 
889 	/*
890 	 * Directories with more than two entries ('.' and '..') cannot be
891 	 * removed.
892 	 */
893 	if (node->tn_size > 0) {
894 		KASSERT(error == 0);
895 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
896 			error = ENOTEMPTY;
897 			break;
898 		}
899 		if (error)
900 			goto out;
901 	}
902 
903 	/* Lookup the directory entry (check the cached hint first). */
904 	de = tmpfs_dir_cached(node);
905 	if (de == NULL)
906 		de = tmpfs_dir_lookup(dnode, cnp);
907 
908 	KASSERT(de && de->td_node == node);
909 
910 	/* Check flags to see if we are allowed to remove the directory. */
911 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
912 		error = EPERM;
913 		goto out;
914 	}
915 
916 	/* Decrement the link count for the virtual '.' entry. */
917 	node->tn_links--;
918 	tmpfs_update(node, TMPFS_NODE_STATUSALL);
919 
920 	/* Detach the directory entry from the directory. */
921 	tmpfs_dir_detach(dnode, de);
922 
923 	/* Purge the cache for parent. */
924 	cache_purge(dvp);
925 
926 	/*
927 	 * Destroy the directory entry.
928 	 * Note: the inode referred by it will not be destroyed
929 	 * until the vnode is reclaimed.
930 	 */
931 	tmpfs_free_dirent(tmp, de);
932 	KASSERT(TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir) == NULL);
933 
934 	KASSERT(node->tn_links == 0);
935 out:
936 	pool_put(&namei_pool, cnp->cn_pnbuf);
937 	/* Release the nodes. */
938 	vput(dvp);
939 	vput(vp);
940 	return error;
941 }
942 
943 int
944 tmpfs_symlink(void *v)
945 {
946 	struct vop_symlink_args /* {
947 		struct vnode		*a_dvp;
948 		struct vnode		**a_vpp;
949 		struct componentname	*a_cnp;
950 		struct vattr		*a_vap;
951 		char			*a_target;
952 	} */ *ap = v;
953 	struct vnode *dvp = ap->a_dvp;
954 	struct vnode **vpp = ap->a_vpp;
955 	struct componentname *cnp = ap->a_cnp;
956 	struct vattr *vap = ap->a_vap;
957 	char *target = ap->a_target;
958 	int error;
959 
960 	KASSERT(vap->va_type == 0);
961 	vap->va_type = VLNK;
962 
963 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
964 	vput(dvp);
965 	if (error == 0)
966 		vput(*vpp);
967 
968 	return error;
969 }
970 
971 int
972 tmpfs_readdir(void *v)
973 {
974 	struct vop_readdir_args /* {
975 		struct vnode	*a_vp;
976 		struct uio	*a_uio;
977 		kauth_cred_t	a_cred;
978 		int		*a_eofflag;
979 	} */ *ap = v;
980 	struct vnode *vp = ap->a_vp;
981 	struct uio *uio = ap->a_uio;
982 	int *eofflag = ap->a_eofflag;
983 	tmpfs_node_t *node;
984 	int error;
985 
986 	KASSERT(VOP_ISLOCKED(vp));
987 
988 	/* This operation only makes sense on directory nodes. */
989 	if (vp->v_type != VDIR) {
990 		return ENOTDIR;
991 	}
992 	node = VP_TO_TMPFS_DIR(vp);
993 	/*
994 	 * Retrieve the directory entries, unless it is being destroyed.
995 	 */
996 	if (node->tn_links) {
997 		error = tmpfs_dir_getdents(node, uio);
998 	} else {
999 		error = 0;
1000 	}
1001 
1002 	if (eofflag != NULL) {
1003 		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
1004 	}
1005 	return error;
1006 }
1007 
1008 int
1009 tmpfs_readlink(void *v)
1010 {
1011 	struct vop_readlink_args /* {
1012 		struct vnode	*a_vp;
1013 		struct uio	*a_uio;
1014 		kauth_cred_t	a_cred;
1015 	} */ *ap = v;
1016 	struct vnode *vp = ap->a_vp;
1017 	struct uio *uio = ap->a_uio;
1018 	tmpfs_node_t *node;
1019 	int error;
1020 
1021 	KASSERT(VOP_ISLOCKED(vp));
1022 	KASSERT(uio->uio_offset == 0);
1023 	KASSERT(vp->v_type == VLNK);
1024 
1025 	node = VP_TO_TMPFS_NODE(vp);
1026 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
1027 	    MIN((size_t)node->tn_size, uio->uio_resid), uio);
1028 
1029 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
1030 		tmpfs_update(node, TMPFS_NODE_ACCESSED);
1031 
1032 	return error;
1033 }
1034 
1035 int
1036 tmpfs_inactive(void *v)
1037 {
1038 	struct vop_inactive_args /* {
1039 		struct vnode *a_vp;
1040 		int *a_recycle;
1041 	} */ *ap = v;
1042 	struct vnode *vp = ap->a_vp;
1043 	tmpfs_node_t *node;
1044 
1045 	KASSERT(VOP_ISLOCKED(vp));
1046 
1047 	node = VP_TO_TMPFS_NODE(vp);
1048 
1049 	if (vp->v_type == VREG && tmpfs_uio_cached(node))
1050 		tmpfs_uio_uncache(node);
1051 
1052 	VOP_UNLOCK(vp);
1053 
1054 	/*
1055 	 * If we are done with the node, reclaim it so that it can be reused
1056 	 * immediately.
1057 	 */
1058 	if (node->tn_links == 0)
1059 		vrecycle(vp, curproc);
1060 
1061 	return 0;
1062 }
1063 
1064 int
1065 tmpfs_reclaim(void *v)
1066 {
1067 	struct vop_reclaim_args /* {
1068 		struct vnode *a_vp;
1069 	} */ *ap = v;
1070 	struct vnode *vp = ap->a_vp;
1071 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1072 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1073 	int racing;
1074 
1075 	/* Disassociate inode from vnode. */
1076 	rw_enter_write(&node->tn_nlock);
1077 	node->tn_vnode = NULL;
1078 	vp->v_data = NULL;
1079 	/* Check if tmpfs_vnode_get() is racing with us. */
1080 	racing = TMPFS_NODE_RECLAIMING(node);
1081 	rw_exit_write(&node->tn_nlock);
1082 
1083 	cache_purge(vp);
1084 
1085 	/*
1086 	 * If inode is not referenced, i.e. no links, then destroy it.
1087 	 * Note: if racing - inode is about to get a new vnode, leave it.
1088 	 */
1089 	if (node->tn_links == 0 && !racing) {
1090 		tmpfs_free_node(tmp, node);
1091 	}
1092 	return 0;
1093 }
1094 
1095 int
1096 tmpfs_pathconf(void *v)
1097 {
1098 	struct vop_pathconf_args /* {
1099 		struct vnode	*a_vp;
1100 		int		a_name;
1101 		register_t	*a_retval;
1102 	} */ *ap = v;
1103 	const int name = ap->a_name;
1104 	register_t *retval = ap->a_retval;
1105 	int error = 0;
1106 
1107 	switch (name) {
1108 	case _PC_LINK_MAX:
1109 		*retval = LINK_MAX;
1110 		break;
1111 	case _PC_NAME_MAX:
1112 		*retval = TMPFS_MAXNAMLEN;
1113 		break;
1114 	case _PC_CHOWN_RESTRICTED:
1115 		*retval = 1;
1116 		break;
1117 	case _PC_NO_TRUNC:
1118 		*retval = 1;
1119 		break;
1120 	case _PC_FILESIZEBITS:
1121 		*retval = 64;
1122 		break;
1123 	case _PC_TIMESTAMP_RESOLUTION:
1124 		*retval = 1;
1125 		break;
1126 	default:
1127 		error = EINVAL;
1128 	}
1129 	return error;
1130 }
1131 
1132 int
1133 tmpfs_advlock(void *v)
1134 {
1135 	struct vop_advlock_args /* {
1136 		struct vnode	*a_vp;
1137 		void *		a_id;
1138 		int		a_op;
1139 		struct flock	*a_fl;
1140 		int		a_flags;
1141 	} */ *ap = v;
1142 	struct vnode *vp = ap->a_vp;
1143 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1144 
1145 	return lf_advlock(&node->tn_lockf, node->tn_size, ap->a_id, ap->a_op,
1146 	    ap->a_fl, ap->a_flags);
1147 }
1148 
1149 int
1150 tmpfs_print(void *v)
1151 {
1152 	struct vop_print_args /* {
1153 		struct vnode	*a_vp;
1154 	} */ *ap = v;
1155 	struct vnode *vp = ap->a_vp;
1156 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1157 
1158 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1159 	    "\tmode 0%o, owner %d, group %d, size %lld",
1160 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1161 	    node->tn_gid, node->tn_size);
1162 #ifdef FIFO
1163 	if (vp->v_type == VFIFO)
1164 		fifo_printinfo(vp);
1165 #endif
1166 	printf("\n");
1167 	return 0;
1168 }
1169 
1170 /* a null op */
1171 int
1172 tmpfs_bwrite(void *v)
1173 {
1174 	return 0;
1175 }
1176 
1177 int
1178 tmpfs_poll(void *v)
1179 {
1180 	struct vop_poll_args *ap = v;
1181 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1182 }
1183 
1184 int
1185 tmpfs_strategy(void *v)
1186 {
1187 	return EOPNOTSUPP;
1188 }
1189 
1190 int
1191 tmpfs_ioctl(void *v)
1192 {
1193 	return ENOTTY;
1194 }
1195 
1196 int
1197 tmpfs_lock(void *v)
1198 {
1199 	struct vop_lock_args *ap = v;
1200 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1201 
1202 	return rrw_enter(&tnp->tn_vlock, ap->a_flags & LK_RWFLAGS);
1203 }
1204 
1205 int
1206 tmpfs_unlock(void *v)
1207 {
1208 	struct vop_unlock_args *ap = v;
1209 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1210 
1211 	rrw_exit(&tnp->tn_vlock);
1212 	return 0;
1213 }
1214 
1215 int
1216 tmpfs_islocked(void *v)
1217 {
1218 	struct vop_islocked_args *ap = v;
1219 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1220 
1221 	return rrw_status(&tnp->tn_vlock);
1222 }
1223 
1224 /*
1225  * tmpfs_rename: rename routine, the hairiest system call, with the
1226  * insane API.
1227  *
1228  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
1229  * and tvp (to-leaf), if exists (NULL if not).
1230  *
1231  * => Caller holds a reference on fdvp and fvp, they are unlocked.
1232  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
1233  *
1234  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
1235  *    to release the references and unlock them (or destroy).
1236  */
1237 
1238 /*
1239  * First, some forward declarations of subroutines.
1240  */
1241 
1242 int tmpfs_sane_rename(struct vnode *, struct componentname *,
1243     struct vnode *, struct componentname *, struct ucred *, int);
1244 int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
1245     struct ucred *,
1246     struct vnode *, struct tmpfs_node *, struct componentname *,
1247     struct tmpfs_dirent **, struct vnode **,
1248     struct vnode *, struct tmpfs_node *, struct componentname *,
1249     struct tmpfs_dirent **, struct vnode **);
1250 int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
1251     struct ucred *,
1252     struct vnode *, struct tmpfs_node *,
1253     struct componentname *, struct tmpfs_dirent **, struct vnode **,
1254     struct componentname *, struct tmpfs_dirent **, struct vnode **);
1255 int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
1256     struct ucred *,
1257     struct vnode *, struct tmpfs_node *, struct componentname *,
1258     struct tmpfs_dirent **, struct vnode **,
1259     struct vnode *, struct tmpfs_node *, struct componentname *,
1260     struct tmpfs_dirent **, struct vnode **);
1261 void tmpfs_rename_exit(struct tmpfs_mount *,
1262     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
1263 int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
1264 int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
1265     struct tmpfs_node **);
1266 int tmpfs_rename_lock(struct mount *, struct ucred *, int,
1267     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1268     struct tmpfs_dirent **, struct vnode **,
1269     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1270     struct tmpfs_dirent **, struct vnode **);
1271 void tmpfs_rename_attachdetach(struct tmpfs_mount *,
1272     struct vnode *, struct tmpfs_dirent *, struct vnode *,
1273     struct vnode *, struct tmpfs_dirent *, struct vnode *);
1274 int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
1275     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, struct ucred *);
1276 int tmpfs_rename_check_possible(struct tmpfs_node *,
1277     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
1278 int tmpfs_rename_check_permitted(struct ucred *,
1279     struct tmpfs_node *, struct tmpfs_node *,
1280     struct tmpfs_node *, struct tmpfs_node *);
1281 int tmpfs_remove_check_possible(struct tmpfs_node *,
1282     struct tmpfs_node *);
1283 int tmpfs_remove_check_permitted(struct ucred *,
1284     struct tmpfs_node *, struct tmpfs_node *);
1285 int tmpfs_check_sticky(struct ucred *,
1286     struct tmpfs_node *, struct tmpfs_node *);
1287 void tmpfs_rename_cache_purge(struct vnode *, struct vnode *, struct vnode *,
1288     struct vnode *);
1289 void tmpfs_rename_abort(void *);
1290 
1291 int
1292 tmpfs_rename(void *v)
1293 {
1294 	struct vop_rename_args  /* {
1295 		struct vnode		*a_fdvp;
1296 		struct vnode		*a_fvp;
1297 		struct componentname	*a_fcnp;
1298 		struct vnode		*a_tdvp;
1299 		struct vnode		*a_tvp;
1300 		struct componentname	*a_tcnp;
1301 	} */ *ap = v;
1302 	struct vnode *fdvp = ap->a_fdvp;
1303 	struct vnode *fvp = ap->a_fvp;
1304 	struct componentname *fcnp = ap->a_fcnp;
1305 	struct vnode *tdvp = ap->a_tdvp;
1306 	struct vnode *tvp = ap->a_tvp;
1307 	struct componentname *tcnp = ap->a_tcnp;
1308 	struct ucred *cred;
1309 	int error;
1310 
1311 	KASSERT(fdvp != NULL);
1312 	KASSERT(fvp != NULL);
1313 	KASSERT(fcnp != NULL);
1314 	KASSERT(fcnp->cn_nameptr != NULL);
1315 	KASSERT(tdvp != NULL);
1316 	KASSERT(tcnp != NULL);
1317 	KASSERT(fcnp->cn_nameptr != NULL);
1318 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1319 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
1320 	KASSERT(fdvp->v_type == VDIR);
1321 	KASSERT(tdvp->v_type == VDIR);
1322 	KASSERT(fcnp->cn_flags & HASBUF);
1323 	KASSERT(tcnp->cn_flags & HASBUF);
1324 
1325 	cred = fcnp->cn_cred;
1326 	KASSERT(tcnp->cn_cred == cred);
1327 
1328 	/*
1329 	 * Check for cross-device rename.
1330 	 */
1331 	if (fvp->v_mount != tdvp->v_mount ||
1332 	    (tvp != NULL && (fvp->v_mount != tvp->v_mount))) {
1333 	    	tmpfs_rename_abort(v);
1334 		return EXDEV;
1335 	}
1336 
1337 	/*
1338 	 * Can't check the locks on these until we know they're on
1339 	 * the same FS, as not all FS do locking the same way.
1340 	 */
1341 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1342 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1343 
1344 	/*
1345 	 * Reject renaming '.' and '..'.
1346 	 */
1347 	if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1348 	    (fcnp->cn_namelen == 2 && fcnp->cn_nameptr[0] == '.' &&
1349 	    fcnp->cn_nameptr[1] == '.')) {
1350 		tmpfs_rename_abort(v);
1351 		return EINVAL;
1352 	}
1353 
1354 	/*
1355 	 * Sanitize our world from the VFS insanity.  Unlock the target
1356 	 * directory and node, which are locked.  Release the children,
1357 	 * which are referenced.  Check for rename("x", "y/."), which
1358 	 * it is our responsibility to reject, not the caller's.  (But
1359 	 * the caller does reject rename("x/.", "y").  Go figure.)
1360 	 */
1361 
1362 	VOP_UNLOCK(tdvp);
1363 	if ((tvp != NULL) && (tvp != tdvp))
1364 		VOP_UNLOCK(tvp);
1365 
1366 	vrele(fvp);
1367 	if (tvp != NULL)
1368 		vrele(tvp);
1369 
1370 	if (tvp == tdvp) {
1371 		error = EINVAL;
1372 		goto out;
1373 	}
1374 
1375 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, 0);
1376 
1377 out:	/*
1378 	 * All done, whether with success or failure.  Release the
1379 	 * directory nodes now, as the caller expects from the VFS
1380 	 * protocol.
1381 	 */
1382 	vrele(fdvp);
1383 	vrele(tdvp);
1384 
1385 	return error;
1386 }
1387 
1388 /*
1389  * tmpfs_sane_rename: rename routine, the hairiest system call, with
1390  * the sane API.
1391  *
1392  * Arguments:
1393  *
1394  * . fdvp (from directory vnode),
1395  * . fcnp (from component name),
1396  * . tdvp (to directory vnode), and
1397  * . tcnp (to component name).
1398  *
1399  * fdvp and tdvp must be referenced and unlocked.
1400  */
1401 int
1402 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
1403     struct vnode *tdvp, struct componentname *tcnp, struct ucred *cred,
1404     int posixly_correct)
1405 {
1406 	struct mount *mount;
1407 	struct tmpfs_mount *tmpfs;
1408 	struct tmpfs_node *fdnode, *tdnode;
1409 	struct tmpfs_dirent *fde, *tde;
1410 	struct vnode *fvp, *tvp;
1411 	char *newname;
1412 	int error;
1413 
1414 	KASSERT(fdvp != NULL);
1415 	KASSERT(fcnp != NULL);
1416 	KASSERT(tdvp != NULL);
1417 	KASSERT(tcnp != NULL);
1418 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1419 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1420 	KASSERT(fdvp->v_type == VDIR);
1421 	KASSERT(tdvp->v_type == VDIR);
1422 	KASSERT(fdvp->v_mount == tdvp->v_mount);
1423 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
1424 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
1425 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
1426 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
1427 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
1428 	    (fcnp->cn_nameptr[1] != '.'));
1429 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
1430 	    (tcnp->cn_nameptr[1] != '.'));
1431 
1432 	/*
1433 	 * Pull out the tmpfs data structures.
1434 	 */
1435 	fdnode = VP_TO_TMPFS_NODE(fdvp);
1436 	tdnode = VP_TO_TMPFS_NODE(tdvp);
1437 	KASSERT(fdnode != NULL);
1438 	KASSERT(tdnode != NULL);
1439 	KASSERT(fdnode->tn_vnode == fdvp);
1440 	KASSERT(tdnode->tn_vnode == tdvp);
1441 	KASSERT(fdnode->tn_type == VDIR);
1442 	KASSERT(tdnode->tn_type == VDIR);
1443 
1444 	mount = fdvp->v_mount;
1445 	KASSERT(mount != NULL);
1446 	KASSERT(mount == tdvp->v_mount);
1447 	/* XXX How can we be sure this stays true?  (Not that you're
1448 	 * likely to mount a tmpfs read-only...)  */
1449 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
1450 	tmpfs = VFS_TO_TMPFS(mount);
1451 	KASSERT(tmpfs != NULL);
1452 
1453 	/*
1454 	 * Decide whether we need a new name, and allocate memory for
1455 	 * it if so.  Do this before locking anything or taking
1456 	 * destructive actions so that we can back out safely and sleep
1457 	 * safely.  XXX Is sleeping an issue here?  Can this just be
1458 	 * moved into tmpfs_rename_attachdetach?
1459 	 */
1460 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
1461 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
1462 		if (newname == NULL) {
1463 			error = ENOSPC;
1464 			goto out_unlocked;
1465 		}
1466 	} else {
1467 		newname = NULL;
1468 	}
1469 
1470 	/*
1471 	 * Lock and look up everything.  GCC is not very clever.
1472 	 */
1473 	fde = tde = NULL;
1474 	fvp = tvp = NULL;
1475 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1476 	    fdvp, fdnode, fcnp, &fde, &fvp,
1477 	    tdvp, tdnode, tcnp, &tde, &tvp);
1478 	if (error)
1479 		goto out_unlocked;
1480 
1481 	/*
1482 	 * Check that everything is locked and looks right.
1483 	 */
1484 	KASSERT(fde != NULL);
1485 	KASSERT(fvp != NULL);
1486 	KASSERT(fde->td_node != NULL);
1487 	KASSERT(fde->td_node->tn_vnode == fvp);
1488 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1489 	KASSERT((tde == NULL) == (tvp == NULL));
1490 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1491 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1492 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1493 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1494 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1495 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1496 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1497 
1498 	/*
1499 	 * If the source and destination are the same object, we need
1500 	 * only at most delete the source entry.
1501 	 */
1502 	if (fvp == tvp) {
1503 		KASSERT(tvp != NULL);
1504 		if (fde->td_node->tn_type == VDIR) {
1505 			/* XXX How can this possibly happen?  */
1506 			error = EINVAL;
1507 			goto out_locked;
1508 		}
1509 		if (!posixly_correct && (fde != tde)) {
1510 			/* XXX Doesn't work because of locking.
1511 			 * error = VOP_REMOVE(fdvp, fvp);
1512 			 */
1513 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1514 			    cred);
1515 			if (error)
1516 				goto out_locked;
1517 		}
1518 		goto success;
1519 	}
1520 	KASSERT(fde != tde);
1521 	KASSERT(fvp != tvp);
1522 
1523 	/*
1524 	 * If the target exists, refuse to rename a directory over a
1525 	 * non-directory or vice versa, or to clobber a non-empty
1526 	 * directory.
1527 	 */
1528 	if (tvp != NULL) {
1529 		KASSERT(tde != NULL);
1530 		KASSERT(tde->td_node != NULL);
1531 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1532 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1533 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1534 			error = ENOTDIR;
1535 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1536 			error = EISDIR;
1537 		else
1538 			error = 0;
1539 		if (error)
1540 			goto out_locked;
1541 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1542 	}
1543 
1544 	/*
1545 	 * Authorize the rename.
1546 	 */
1547 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1548 	    tdnode, (tde? tde->td_node : NULL));
1549 	if (error)
1550 		goto out_locked;
1551 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1552 	    tdnode, (tde? tde->td_node : NULL));
1553 	if (error)
1554 		goto out_locked;
1555 
1556 	/*
1557 	 * Everything is hunky-dory.  Shuffle the directory entries.
1558 	 */
1559 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1560 
1561 	/*
1562 	 * Update the directory entry's name necessary, and flag
1563 	 * metadata updates.  A memory allocation failure here is not
1564 	 * OK because we've already committed some changes that we
1565 	 * can't back out at this point, and we have things locked so
1566 	 * we can't sleep, hence the early allocation above.
1567 	 */
1568 	if (newname != NULL) {
1569 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1570 
1571 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1572 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1573 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1574 		/* Commit newname and don't free it on the way out.  */
1575 		fde->td_name = newname;
1576 		newname = NULL;
1577 
1578 		tmpfs_update(fde->td_node, TMPFS_NODE_CHANGED);
1579 		tmpfs_update(tdnode, TMPFS_NODE_MODIFIED);
1580 	}
1581 
1582 success:
1583 	VN_KNOTE(fvp, NOTE_RENAME);
1584 	tmpfs_rename_cache_purge(fdvp, fvp, tdvp, tvp);
1585 	error = 0;
1586 
1587 out_locked:
1588 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1589 
1590 out_unlocked:
1591 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1592 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1593 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1594 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1595 
1596 	if (newname != NULL)
1597 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1598 
1599 	return error;
1600 }
1601 
1602 /*
1603  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1604  * and the associated vnode in fvp_ret; fail if not found.  Look up
1605  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1606  * associated vnode in tvp_ret; store null instead if not found.  Fail
1607  * if anything has been mounted on any of the nodes involved.
1608  *
1609  * fdvp and tdvp must be referenced.
1610  *
1611  * On entry, nothing is locked.
1612  *
1613  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1614  * nonnull, are referenced.  The only pairs of vnodes that may be
1615  * identical are {fdvp, tdvp} and {fvp, tvp}.
1616  *
1617  * On failure, everything remains as was.
1618  *
1619  * Locking everything including the source and target nodes is
1620  * necessary to make sure that, e.g., link count updates are OK.  The
1621  * locking order is, in general, ancestor-first, matching the order you
1622  * need to use to look up a descendant anyway.
1623  */
1624 int
1625 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1626     struct ucred *cred,
1627     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1628     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1629     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1630     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1631 {
1632 	int error;
1633 
1634 	KASSERT(mount != NULL);
1635 	KASSERT(tmpfs != NULL);
1636 	KASSERT(fdvp != NULL);
1637 	KASSERT(fdnode != NULL);
1638 	KASSERT(fcnp != NULL);
1639 	KASSERT(fde_ret != NULL);
1640 	KASSERT(fvp_ret != NULL);
1641 	KASSERT(tdvp != NULL);
1642 	KASSERT(tdnode != NULL);
1643 	KASSERT(tcnp != NULL);
1644 	KASSERT(tde_ret != NULL);
1645 	KASSERT(tvp_ret != NULL);
1646 	KASSERT(fdnode->tn_vnode == fdvp);
1647 	KASSERT(tdnode->tn_vnode == tdvp);
1648 	KASSERT(fdnode->tn_type == VDIR);
1649 	KASSERT(tdnode->tn_type == VDIR);
1650 
1651 	if (fdvp == tdvp) {
1652 		KASSERT(fdnode == tdnode);
1653 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1654 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1655 	} else {
1656 		KASSERT(fdnode != tdnode);
1657 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1658 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1659 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1660 	}
1661 
1662 	if (error)
1663 		return error;
1664 
1665 	KASSERT(*fde_ret != NULL);
1666 	KASSERT(*fvp_ret != NULL);
1667 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1668 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1669 	KASSERT((*tde_ret == NULL) ||
1670 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1671 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1672 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1673 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1674 	KASSERT((*tvp_ret == NULL) ||
1675 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1676 	KASSERT(*fvp_ret != fdvp);
1677 	KASSERT(*fvp_ret != tdvp);
1678 	KASSERT(*tvp_ret != fdvp);
1679 	KASSERT(*tvp_ret != tdvp);
1680 	return 0;
1681 }
1682 
1683 /*
1684  * Lock and look up with a common source/target directory.
1685  */
1686 int
1687 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1688     struct ucred *cred,
1689     struct vnode *dvp, struct tmpfs_node *dnode,
1690     struct componentname *fcnp,
1691     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1692     struct componentname *tcnp,
1693     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1694 {
1695 	struct tmpfs_dirent *fde, *tde;
1696 	struct vnode *fvp, *tvp;
1697 	int error;
1698 
1699 	error = tmpfs_rename_lock_directory(dvp, dnode);
1700 	if (error)
1701 		goto fail0;
1702 
1703 	/* Did we lose a race with mount?  */
1704 	if (dvp->v_mountedhere != NULL) {
1705 		error = EBUSY;
1706 		goto fail1;
1707 	}
1708 
1709 	/* Make sure the caller may read the directory.  */
1710 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
1711 	if (error)
1712 		goto fail1;
1713 
1714 	/*
1715 	 * The order in which we lock the source and target nodes is
1716 	 * irrelevant because there can only be one rename on this
1717 	 * directory in flight at a time, and we have it locked.
1718 	 */
1719 
1720 	fde = tmpfs_dir_lookup(dnode, fcnp);
1721 	if (fde == NULL) {
1722 		error = ENOENT;
1723 		goto fail1;
1724 	}
1725 
1726 	KASSERT(fde->td_node != NULL);
1727 	/* We ruled out `.' earlier.  */
1728 	KASSERT(fde->td_node != dnode);
1729 	/* We ruled out `..' earlier.  */
1730 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1731 	rw_enter_write(&fde->td_node->tn_nlock);
1732 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1733 	if (error)
1734 		goto fail1;
1735 	KASSERT(fvp != NULL);
1736 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1737 	KASSERT(fvp != dvp);
1738 	KASSERT(fvp->v_mount == mount);
1739 
1740 	/* Refuse to rename a mount point.  */
1741 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1742 		error = EBUSY;
1743 		goto fail2;
1744 	}
1745 
1746 	tde = tmpfs_dir_lookup(dnode, tcnp);
1747 	if (tde == NULL) {
1748 		tvp = NULL;
1749 	} else {
1750 		KASSERT(tde->td_node != NULL);
1751 		/* We ruled out `.' earlier.  */
1752 		KASSERT(tde->td_node != dnode);
1753 		/* We ruled out `..' earlier.  */
1754 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1755 		if (tde->td_node != fde->td_node) {
1756 			rw_enter_write(&tde->td_node->tn_nlock);
1757 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1758 			if (error)
1759 				goto fail2;
1760 			KASSERT(tvp->v_mount == mount);
1761 			/* Refuse to rename over a mount point.  */
1762 			if ((tvp->v_type == VDIR) &&
1763 			    (tvp->v_mountedhere != NULL)) {
1764 				error = EBUSY;
1765 				goto fail3;
1766 			}
1767 		} else {
1768 			tvp = fvp;
1769 			vref(tvp);
1770 		}
1771 		KASSERT(tvp != NULL);
1772 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1773 	}
1774 	KASSERT(tvp != dvp);
1775 
1776 	*fde_ret = fde;
1777 	*fvp_ret = fvp;
1778 	*tde_ret = tde;
1779 	*tvp_ret = tvp;
1780 	return 0;
1781 
1782 fail3:	if (tvp != NULL) {
1783 		if (tvp != fvp)
1784 			vput(tvp);
1785 		else
1786 			vrele(tvp);
1787 	}
1788 
1789 fail2:	vput(fvp);
1790 fail1:	VOP_UNLOCK(dvp);
1791 fail0:	return error;
1792 }
1793 
1794 /*
1795  * Lock and look up with separate source and target directories.
1796  */
1797 int
1798 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1799     struct ucred *cred,
1800     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1801     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1802     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1803     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1804 {
1805 	struct tmpfs_node *intermediate_node;
1806 	struct tmpfs_dirent *fde, *tde;
1807 	struct vnode *fvp, *tvp;
1808 	int error;
1809 
1810 	KASSERT(fdvp != tdvp);
1811 	KASSERT(fdnode != tdnode);
1812 
1813 #if 0				/* XXX */
1814 	mutex_enter(&tmpfs->tm_rename_lock);
1815 #endif
1816 
1817 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1818 	if (error)
1819 		goto fail;
1820 
1821 	/*
1822 	 * intermediate_node == NULL means fdnode is not an ancestor of
1823 	 * tdnode.
1824 	 */
1825 	if (intermediate_node == NULL)
1826 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1827 		    tdvp, tdnode, tcnp, 1, &tde, &tvp,
1828 		    fdvp, fdnode, fcnp, 0, &fde, &fvp);
1829 	else
1830 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1831 		    fdvp, fdnode, fcnp, 0, &fde, &fvp,
1832 		    tdvp, tdnode, tcnp, 1, &tde, &tvp);
1833 	if (error)
1834 		goto fail;
1835 
1836 	KASSERT(fde != NULL);
1837 	KASSERT(fde->td_node != NULL);
1838 
1839 	/*
1840 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1841 	 */
1842 	if (fde->td_node == intermediate_node) {
1843 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1844 		return EINVAL;
1845 	}
1846 
1847 	*fde_ret = fde;
1848 	*fvp_ret = fvp;
1849 	*tde_ret = tde;
1850 	*tvp_ret = tvp;
1851 	return 0;
1852 
1853 fail:
1854 #if 0				/* XXX */
1855 	mutex_exit(&tmpfs->tm_rename_lock);
1856 #endif
1857 	return error;
1858 }
1859 
1860 /*
1861  * Unlock everything we locked for rename.
1862  *
1863  * fdvp and tdvp must be referenced.
1864  *
1865  * On entry, everything is locked, and fvp and tvp referenced.
1866  *
1867  * On exit, everything is unlocked, and fvp and tvp are released.
1868  */
1869 void
1870 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1871     struct vnode *fdvp, struct vnode *fvp,
1872     struct vnode *tdvp, struct vnode *tvp)
1873 {
1874 
1875 	KASSERT(tmpfs != NULL);
1876 	KASSERT(fdvp != NULL);
1877 	KASSERT(fvp != NULL);
1878 	KASSERT(fdvp != fvp);
1879 	KASSERT(fdvp != tvp);
1880 	KASSERT(tdvp != tvp);
1881 	KASSERT(tdvp != fvp);
1882 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1883 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1884 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1885 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1886 
1887 	if (tvp != NULL) {
1888 		if (tvp != fvp)
1889 			vput(tvp);
1890 		else
1891 			vrele(tvp);
1892 	}
1893 	VOP_UNLOCK(tdvp);
1894 	vput(fvp);
1895 	if (fdvp != tdvp)
1896 		VOP_UNLOCK(fdvp);
1897 
1898 #if 0				/* XXX */
1899 	if (fdvp != tdvp)
1900 		mutex_exit(&tmpfs->tm_rename_lock);
1901 #endif
1902 }
1903 
1904 /*
1905  * Lock a directory, but fail if it has been rmdir'd.
1906  *
1907  * vp must be referenced.
1908  */
1909 int
1910 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1911 {
1912 
1913 	KASSERT(vp != NULL);
1914 	KASSERT(node != NULL);
1915 	KASSERT(node->tn_vnode == vp);
1916 	KASSERT(node->tn_type == VDIR);
1917 
1918 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1919 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1920 		VOP_UNLOCK(vp);
1921 		return ENOENT;
1922 	}
1923 
1924 	return 0;
1925 }
1926 
1927 /*
1928  * Analyze the genealogy of the source and target nodes.
1929  *
1930  * On success, stores in *intermediate_node_ret either the child of
1931  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1932  * descendant of fdnode at all.
1933  *
1934  * fdnode and tdnode must be unlocked and referenced.  The file
1935  * system's rename lock must also be held, to exclude concurrent
1936  * changes to the file system's genealogy other than rmdir.
1937  *
1938  * XXX This causes an extra lock/unlock of tdnode in the case when
1939  * we're just about to lock it again before locking anything else.
1940  * However, changing that requires reorganizing the code to make it
1941  * even more horrifically obscure.
1942  */
1943 int
1944 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1945     struct tmpfs_node **intermediate_node_ret)
1946 {
1947 	struct tmpfs_node *node = tdnode, *parent;
1948 	int error;
1949 
1950 	KASSERT(fdnode != NULL);
1951 	KASSERT(tdnode != NULL);
1952 	KASSERT(fdnode != tdnode);
1953 	KASSERT(intermediate_node_ret != NULL);
1954 
1955 	KASSERT(fdnode->tn_vnode != NULL);
1956 	KASSERT(tdnode->tn_vnode != NULL);
1957 	KASSERT(fdnode->tn_type == VDIR);
1958 	KASSERT(tdnode->tn_type == VDIR);
1959 
1960 	/*
1961 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1962 	 * from deleting it -- or any ancestor -- at an inopportune
1963 	 * moment.
1964 	 */
1965 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1966 	if (error)
1967 		return error;
1968 
1969 	for (;;) {
1970 		parent = node->tn_spec.tn_dir.tn_parent;
1971 		KASSERT(parent != NULL);
1972 		KASSERT(parent->tn_type == VDIR);
1973 
1974 		/* Did we hit the root without finding fdnode?  */
1975 		if (parent == node) {
1976 			*intermediate_node_ret = NULL;
1977 			break;
1978 		}
1979 
1980 		/* Did we find that fdnode is an ancestor?  */
1981 		if (parent == fdnode) {
1982 			*intermediate_node_ret = node;
1983 			break;
1984 		}
1985 
1986 		/* Neither -- keep ascending the family tree.  */
1987 		node = parent;
1988 	}
1989 
1990 	VOP_UNLOCK(tdnode->tn_vnode);
1991 	return 0;
1992 }
1993 
1994 /*
1995  * Lock directories a and b, which must be distinct, and look up and
1996  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1997  * an ancestor of directory a, although directory a may be an ancestor
1998  * of directory b.  Fail with overlap_error if node a is directory b.
1999  * Neither componentname may be `.' or `..'.
2000  *
2001  * a_dvp and b_dvp must be referenced.
2002  *
2003  * On entry, a_dvp and b_dvp are unlocked.
2004  *
2005  * On success,
2006  * . a_dvp and b_dvp are locked,
2007  * . *a_dirent_ret is filled with a directory entry whose node is
2008  *     locked and referenced,
2009  * . *b_vp_ret is filled with the corresponding vnode,
2010  * . *b_dirent_ret is filled either with null or with a directory entry
2011  *     whose node is locked and referenced,
2012  * . *b_vp is filled either with null or with the corresponding vnode,
2013  *     and
2014  * . the only pair of vnodes that may be identical is a_vp and b_vp.
2015  *
2016  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
2017  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
2018  */
2019 int
2020 tmpfs_rename_lock(struct mount *mount, struct ucred *cred, int overlap_error,
2021     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
2022     struct componentname *a_cnp, int a_missing_ok,
2023     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
2024     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
2025     struct componentname *b_cnp, int b_missing_ok,
2026     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
2027 {
2028 	struct tmpfs_dirent *a_dirent, *b_dirent;
2029 	struct vnode *a_vp, *b_vp;
2030 	int error;
2031 
2032 	KASSERT(a_dvp != NULL);
2033 	KASSERT(a_dnode != NULL);
2034 	KASSERT(a_cnp != NULL);
2035 	KASSERT(a_dirent_ret != NULL);
2036 	KASSERT(a_vp_ret != NULL);
2037 	KASSERT(b_dvp != NULL);
2038 	KASSERT(b_dnode != NULL);
2039 	KASSERT(b_cnp != NULL);
2040 	KASSERT(b_dirent_ret != NULL);
2041 	KASSERT(b_vp_ret != NULL);
2042 	KASSERT(a_dvp != b_dvp);
2043 	KASSERT(a_dnode != b_dnode);
2044 	KASSERT(a_dnode->tn_vnode == a_dvp);
2045 	KASSERT(b_dnode->tn_vnode == b_dvp);
2046 	KASSERT(a_dnode->tn_type == VDIR);
2047 	KASSERT(b_dnode->tn_type == VDIR);
2048 	KASSERT(a_missing_ok != b_missing_ok);
2049 
2050 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
2051 	if (error)
2052 		goto fail0;
2053 
2054 	/* Did we lose a race with mount?  */
2055 	if (a_dvp->v_mountedhere != NULL) {
2056 		error = EBUSY;
2057 		goto fail1;
2058 	}
2059 
2060 	/* Make sure the caller may read the directory.  */
2061 	error = VOP_ACCESS(a_dvp, VEXEC, cred, curproc);
2062 	if (error)
2063 		goto fail1;
2064 
2065 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
2066 	if (a_dirent != NULL) {
2067 		KASSERT(a_dirent->td_node != NULL);
2068 		/* We ruled out `.' earlier.  */
2069 		KASSERT(a_dirent->td_node != a_dnode);
2070 		/* We ruled out `..' earlier.  */
2071 		KASSERT(a_dirent->td_node !=
2072 		    a_dnode->tn_spec.tn_dir.tn_parent);
2073 		if (a_dirent->td_node == b_dnode) {
2074 			error = overlap_error;
2075 			goto fail1;
2076 		}
2077 		rw_enter_write(&a_dirent->td_node->tn_nlock);
2078 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
2079 		if (error)
2080 			goto fail1;
2081 		KASSERT(a_vp->v_mount == mount);
2082 		/* Refuse to rename (over) a mount point.  */
2083 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
2084 			error = EBUSY;
2085 			goto fail2;
2086 		}
2087 	} else if (!a_missing_ok) {
2088 		error = ENOENT;
2089 		goto fail1;
2090 	} else {
2091 		a_vp = NULL;
2092 	}
2093 	KASSERT(a_vp != a_dvp);
2094 	KASSERT(a_vp != b_dvp);
2095 
2096 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
2097 	if (error)
2098 		goto fail2;
2099 
2100 	/* Did we lose a race with mount?  */
2101 	if (b_dvp->v_mountedhere != NULL) {
2102 		error = EBUSY;
2103 		goto fail3;
2104 	}
2105 
2106 	/* Make sure the caller may read the directory.  */
2107 	error = VOP_ACCESS(b_dvp, VEXEC, cred, curproc);
2108 	if (error)
2109 		goto fail3;
2110 
2111 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
2112 	if (b_dirent != NULL) {
2113 		KASSERT(b_dirent->td_node != NULL);
2114 		/* We ruled out `.' earlier.  */
2115 		KASSERT(b_dirent->td_node != b_dnode);
2116 		/* We ruled out `..' earlier.  */
2117 		KASSERT(b_dirent->td_node !=
2118 		    b_dnode->tn_spec.tn_dir.tn_parent);
2119 		/* b is not an ancestor of a.  */
2120 		KASSERT(b_dirent->td_node != a_dnode);
2121 		/* But the source and target nodes might be the same.  */
2122 		if ((a_dirent == NULL) ||
2123 		    (a_dirent->td_node != b_dirent->td_node)) {
2124 			rw_enter_write(&b_dirent->td_node->tn_nlock);
2125 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
2126 			    &b_vp);
2127 			if (error)
2128 				goto fail3;
2129 			KASSERT(b_vp->v_mount == mount);
2130 			KASSERT(a_vp != b_vp);
2131 			/* Refuse to rename (over) a mount point.  */
2132 			if ((b_vp->v_type == VDIR) &&
2133 			    (b_vp->v_mountedhere != NULL)) {
2134 				error = EBUSY;
2135 				goto fail4;
2136 			}
2137 		} else {
2138 			b_vp = a_vp;
2139 			vref(b_vp);
2140 		}
2141 	} else if (!b_missing_ok) {
2142 		error = ENOENT;
2143 		goto fail3;
2144 	} else {
2145 		b_vp = NULL;
2146 	}
2147 	KASSERT(b_vp != a_dvp);
2148 	KASSERT(b_vp != b_dvp);
2149 
2150 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2151 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2152 	KASSERT(a_missing_ok || (a_dirent != NULL));
2153 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
2154 	KASSERT(b_missing_ok || (b_dirent != NULL));
2155 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
2156 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
2157 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
2158 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
2159 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
2160 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
2161 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
2162 
2163 	*a_dirent_ret = a_dirent;
2164 	*b_dirent_ret = b_dirent;
2165 	*a_vp_ret = a_vp;
2166 	*b_vp_ret = b_vp;
2167 	return 0;
2168 
2169 fail4:	if (b_vp != NULL) {
2170 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
2171 		if (b_vp != a_vp)
2172 			vput(b_vp);
2173 		else
2174 			vrele(a_vp);
2175 	}
2176 
2177 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2178 	VOP_UNLOCK(b_dvp);
2179 
2180 fail2:	if (a_vp != NULL) {
2181 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
2182 		vput(a_vp);
2183 	}
2184 
2185 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2186 	VOP_UNLOCK(a_dvp);
2187 
2188 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
2189 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
2190 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
2191 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
2192 	return error;
2193 }
2194 
2195 /*
2196  * Shuffle the directory entries to move fvp from the directory fdvp
2197  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
2198  * we are overwriting a target node, it is tvp, and tde is its
2199  * directory entry in tdvp.
2200  *
2201  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
2202  */
2203 void
2204 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
2205     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
2206     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
2207 {
2208 
2209 	KASSERT(tmpfs != NULL);
2210 	KASSERT(fdvp != NULL);
2211 	KASSERT(fde != NULL);
2212 	KASSERT(fvp != NULL);
2213 	KASSERT(tdvp != NULL);
2214 	KASSERT(fde->td_node != NULL);
2215 	KASSERT(fde->td_node->tn_vnode == fvp);
2216 	KASSERT((tde == NULL) == (tvp == NULL));
2217 	KASSERT((tde == NULL) || (tde->td_node != NULL));
2218 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
2219 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
2220 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
2221 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
2222 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
2223 
2224 	/*
2225 	 * If we are moving from one directory to another, detach the
2226 	 * source entry and reattach it to the target directory.
2227 	 */
2228 	if (fdvp != tdvp) {
2229 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
2230 		struct tmpfs_node *fnode = fde->td_node;
2231 		tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp);
2232 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2233 		tmpfs_dir_detach(fdnode, fde);
2234 		tmpfs_dir_attach(tdnode, fde, fnode);
2235 	} else if (tvp == NULL) {
2236 		/*
2237 		 * We are changing the directory.  tmpfs_dir_attach and
2238 		 * tmpfs_dir_detach note the events for us, but for
2239 		 * this case we don't call them, so we must note the
2240 		 * event explicitly.
2241 		 */
2242 		VN_KNOTE(fdvp, NOTE_WRITE);
2243 	}
2244 
2245 	/*
2246 	 * If we are replacing an existing target entry, delete it.
2247 	 */
2248 	if (tde != NULL) {
2249 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2250 		KASSERT(tvp != NULL);
2251 		KASSERT(tde->td_node != NULL);
2252 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
2253 		if (tde->td_node->tn_type == VDIR) {
2254 			KASSERT(tde->td_node->tn_size == 0);
2255 			KASSERT(tde->td_node->tn_links == 2);
2256 			/* Decrement the extra link count for `.' so
2257 			 * the vnode will be recycled when released.  */
2258 			tde->td_node->tn_links--;
2259 		}
2260 		tmpfs_dir_detach(tdnode, tde);
2261 		tmpfs_free_dirent(tmpfs, tde);
2262 	}
2263 }
2264 
2265 /*
2266  * Remove the entry de for the non-directory vp from the directory dvp.
2267  *
2268  * Everything must be locked and referenced.
2269  */
2270 int
2271 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
2272     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
2273     struct ucred *cred)
2274 {
2275 	int error;
2276 
2277 	KASSERT(tmpfs != NULL);
2278 	KASSERT(dvp != NULL);
2279 	KASSERT(dnode != NULL);
2280 	KASSERT(de != NULL);
2281 	KASSERT(vp != NULL);
2282 	KASSERT(dnode->tn_vnode == dvp);
2283 	KASSERT(de->td_node != NULL);
2284 	KASSERT(de->td_node->tn_vnode == vp);
2285 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
2286 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2287 
2288 	error = tmpfs_remove_check_possible(dnode, de->td_node);
2289 	if (error)
2290 		return error;
2291 
2292 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
2293 	if (error)
2294 		return error;
2295 
2296 	/*
2297 	 * If not root and directory is sticky, check for permission on
2298 	 * directory or on file. This implements append-only directories.
2299 	 */
2300 	if ((dnode->tn_mode & S_ISTXT) != 0)
2301 		if (cred->cr_uid != 0 && cred->cr_uid != dnode->tn_uid &&
2302 		    cred->cr_uid != de->td_node->tn_uid)
2303 			return EPERM;
2304 
2305 	tmpfs_dir_detach(dnode, de);
2306 	tmpfs_free_dirent(tmpfs, de);
2307 
2308 	return 0;
2309 }
2310 
2311 /*
2312  * Check whether a rename is possible independent of credentials.
2313  *
2314  * Everything must be locked and referenced.
2315  */
2316 int
2317 tmpfs_rename_check_possible(
2318     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2319     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2320 {
2321 
2322 	KASSERT(fdnode != NULL);
2323 	KASSERT(fnode != NULL);
2324 	KASSERT(tdnode != NULL);
2325 	KASSERT(fdnode != fnode);
2326 	KASSERT(tdnode != tnode);
2327 	KASSERT(fnode != tnode);
2328 	KASSERT(fdnode->tn_vnode != NULL);
2329 	KASSERT(fnode->tn_vnode != NULL);
2330 	KASSERT(tdnode->tn_vnode != NULL);
2331 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2332 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2333 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2334 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2335 	KASSERT((tnode == NULL) ||
2336 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2337 
2338 	/*
2339 	 * If fdnode is immutable, we can't write to it.  If fdnode is
2340 	 * append-only, the only change we can make is to add entries
2341 	 * to it.  If fnode is immutable, we can't change the links to
2342 	 * it.  If fnode is append-only...well, this is what UFS does.
2343 	 */
2344 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
2345 		return EPERM;
2346 
2347 	/*
2348 	 * If tdnode is immutable, we can't write to it.  If tdnode is
2349 	 * append-only, we can add entries, but we can't change
2350 	 * existing entries.
2351 	 */
2352 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
2353 		return EPERM;
2354 
2355 	/*
2356 	 * If tnode is immutable, we can't replace links to it.  If
2357 	 * tnode is append-only...well, this is what UFS does.
2358 	 */
2359 	if (tnode != NULL) {
2360 		KASSERT(tnode != NULL);
2361 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
2362 			return EPERM;
2363 	}
2364 
2365 	return 0;
2366 }
2367 
2368 /*
2369  * Check whether a rename is permitted given our credentials.
2370  *
2371  * Everything must be locked and referenced.
2372  */
2373 int
2374 tmpfs_rename_check_permitted(struct ucred *cred,
2375     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2376     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2377 {
2378 	int error;
2379 
2380 	KASSERT(fdnode != NULL);
2381 	KASSERT(fnode != NULL);
2382 	KASSERT(tdnode != NULL);
2383 	KASSERT(fdnode != fnode);
2384 	KASSERT(tdnode != tnode);
2385 	KASSERT(fnode != tnode);
2386 	KASSERT(fdnode->tn_vnode != NULL);
2387 	KASSERT(fnode->tn_vnode != NULL);
2388 	KASSERT(tdnode->tn_vnode != NULL);
2389 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2390 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2391 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2392 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2393 	KASSERT((tnode == NULL) ||
2394 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2395 
2396 	/*
2397 	 * We need to remove or change an entry in the source directory.
2398 	 */
2399 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred, curproc);
2400 	if (error)
2401 		return error;
2402 
2403 	/*
2404 	 * If we are changing directories, then we need to write to the
2405 	 * target directory to add or change an entry.  Also, if fnode
2406 	 * is a directory, we need to write to it to change its `..'
2407 	 * entry.
2408 	 */
2409 	if (fdnode != tdnode) {
2410 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred, curproc);
2411 		if (error)
2412 			return error;
2413 		if (fnode->tn_type == VDIR) {
2414 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred,
2415 			    curproc);
2416 			if (error)
2417 				return error;
2418 		}
2419 	}
2420 
2421 	error = tmpfs_check_sticky(cred, fdnode, fnode);
2422 	if (error)
2423 		return error;
2424 
2425 	if (TMPFS_DIRSEQ_FULL(tdnode))
2426 		return (ENOSPC);
2427 
2428 	error = tmpfs_check_sticky(cred, tdnode, tnode);
2429 	if (error)
2430 		return error;
2431 
2432 	return 0;
2433 }
2434 
2435 /*
2436  * Check whether removing node's entry in dnode is possible independent
2437  * of credentials.
2438  *
2439  * Everything must be locked and referenced.
2440  */
2441 int
2442 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
2443 {
2444 
2445 	KASSERT(dnode != NULL);
2446 	KASSERT(dnode->tn_vnode != NULL);
2447 	KASSERT(node != NULL);
2448 	KASSERT(dnode != node);
2449 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2450 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2451 
2452 	/*
2453 	 * We want to delete the entry.  If dnode is immutable, we
2454 	 * can't write to it to delete the entry.  If dnode is
2455 	 * append-only, the only change we can make is to add entries,
2456 	 * so we can't delete entries.  If node is immutable, we can't
2457 	 * change the links to it, so we can't delete the entry.  If
2458 	 * node is append-only...well, this is what UFS does.
2459 	 */
2460 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
2461 		return EPERM;
2462 
2463 	return 0;
2464 }
2465 
2466 /*
2467  * Check whether removing node's entry in dnode is permitted given our
2468  * credentials.
2469  *
2470  * Everything must be locked and referenced.
2471  */
2472 int
2473 tmpfs_remove_check_permitted(struct ucred *cred,
2474     struct tmpfs_node *dnode, struct tmpfs_node *node)
2475 {
2476 	int error;
2477 
2478 	KASSERT(dnode != NULL);
2479 	KASSERT(dnode->tn_vnode != NULL);
2480 	KASSERT(node != NULL);
2481 	KASSERT(dnode != node);
2482 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2483 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2484 
2485 	/*
2486 	 * Check whether we are permitted to write to the source
2487 	 * directory in order to delete an entry from it.
2488 	 */
2489 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred, curproc);
2490 	if (error)
2491 		return error;
2492 
2493 	error = tmpfs_check_sticky(cred, dnode, node);
2494 	if (error)
2495 		return error;
2496 
2497 	return 0;
2498 }
2499 
2500 /*
2501  * Check whether we may change an entry in a sticky directory.  If the
2502  * directory is sticky, the user must own either the directory or, if
2503  * it exists, the node, in order to change the entry.
2504  *
2505  * Everything must be locked and referenced.
2506  */
2507 int
2508 tmpfs_check_sticky(struct ucred *cred,
2509     struct tmpfs_node *dnode, struct tmpfs_node *node)
2510 {
2511 
2512 	KASSERT(dnode != NULL);
2513 	KASSERT(dnode->tn_vnode != NULL);
2514 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2515 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2516 	KASSERT((node == NULL) ||
2517 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2518 
2519 	if (node == NULL)
2520 		return 0;
2521 
2522 	if (dnode->tn_mode & S_ISTXT) {
2523 		if (cred->cr_uid != 0 &&
2524 		    cred->cr_uid != dnode->tn_uid &&
2525 		    cred->cr_uid != node->tn_uid)
2526 			return EPERM;
2527 	}
2528 
2529 	return 0;
2530 }
2531 
2532 void
2533 tmpfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp,
2534     struct vnode *tdvp, struct vnode *tvp)
2535 {
2536 
2537 	KASSERT(fdvp != NULL);
2538 	KASSERT(fvp != NULL);
2539 	KASSERT(tdvp != NULL);
2540 	KASSERT(fdvp != fvp);
2541 	KASSERT(fdvp != tvp);
2542 	KASSERT(tdvp != fvp);
2543 	KASSERT(tdvp != tvp);
2544 	KASSERT(fvp != tvp);
2545 	KASSERT(fdvp->v_type == VDIR);
2546 	KASSERT(tdvp->v_type == VDIR);
2547 
2548 	/*
2549 	 * XXX What actually needs to be purged?
2550 	 */
2551 
2552 	cache_purge(fdvp);
2553 
2554 	if (fvp->v_type == VDIR)
2555 		cache_purge(fvp);
2556 
2557 	if (tdvp != fdvp)
2558 		cache_purge(tdvp);
2559 
2560 	if ((tvp != NULL) && (tvp->v_type == VDIR))
2561 		cache_purge(tvp);
2562 }
2563 
2564 void
2565 tmpfs_rename_abort(void *v)
2566 {
2567 	struct vop_rename_args *ap = v;
2568 	struct vnode *fdvp = ap->a_fdvp;
2569 	struct vnode *fvp = ap->a_fvp;
2570 	struct componentname *fcnp = ap->a_fcnp;
2571 	struct vnode *tdvp = ap->a_tdvp;
2572 	struct vnode *tvp = ap->a_tvp;
2573 	struct componentname *tcnp = ap->a_tcnp;
2574 
2575 	VOP_ABORTOP(tdvp, tcnp);
2576 	if (tdvp == tvp)
2577 		vrele(tdvp);
2578 	else
2579 		vput(tdvp);
2580 	if (tvp != NULL)
2581 		vput(tvp);
2582 	VOP_ABORTOP(fdvp, fcnp);
2583 	vrele(fdvp);
2584 	vrele(fvp);
2585 }
2586 
2587 void filt_tmpfsdetach(struct knote *kn);
2588 int filt_tmpfsread(struct knote *kn, long hint);
2589 int filt_tmpfswrite(struct knote *kn, long hint);
2590 int filt_tmpfsvnode(struct knote *kn, long hint);
2591 
2592 const struct filterops tmpfsread_filtops = {
2593 	.f_flags	= FILTEROP_ISFD,
2594 	.f_attach	= NULL,
2595 	.f_detach	= filt_tmpfsdetach,
2596 	.f_event	= filt_tmpfsread,
2597 };
2598 
2599 const struct filterops tmpfswrite_filtops = {
2600 	.f_flags	= FILTEROP_ISFD,
2601 	.f_attach	= NULL,
2602 	.f_detach	= filt_tmpfsdetach,
2603 	.f_event	= filt_tmpfswrite,
2604 };
2605 
2606 const struct filterops tmpfsvnode_filtops = {
2607 	.f_flags	= FILTEROP_ISFD,
2608 	.f_attach	= NULL,
2609 	.f_detach	= filt_tmpfsdetach,
2610 	.f_event	= filt_tmpfsvnode,
2611 };
2612 
2613 int
2614 tmpfs_kqfilter(void *v)
2615 {
2616 	struct vop_kqfilter_args *ap = v;
2617 	struct vnode *vp = ap->a_vp;
2618 	struct knote *kn = ap->a_kn;
2619 
2620 	switch (kn->kn_filter) {
2621 	case EVFILT_READ:
2622 		kn->kn_fop = &tmpfsread_filtops;
2623 		break;
2624 	case EVFILT_WRITE:
2625 		kn->kn_fop = &tmpfswrite_filtops;
2626 		break;
2627 	case EVFILT_VNODE:
2628 		kn->kn_fop = &tmpfsvnode_filtops;
2629 		break;
2630 	default:
2631 		return (EINVAL);
2632 	}
2633 
2634 	kn->kn_hook = (caddr_t)vp;
2635 
2636 	klist_insert(&vp->v_selectinfo.si_note, kn);
2637 
2638 	return (0);
2639 }
2640 
2641 void
2642 filt_tmpfsdetach(struct knote *kn)
2643 {
2644 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2645 
2646 	klist_remove(&vp->v_selectinfo.si_note, kn);
2647 }
2648 
2649 int
2650 filt_tmpfsread(struct knote *kn, long hint)
2651 {
2652 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2653 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2654 
2655 	/*
2656 	 * filesystem is gone, so set the EOF flag and schedule
2657 	 * the knote for deletion.
2658 	 */
2659 	if (hint == NOTE_REVOKE) {
2660 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2661 		return (1);
2662 	}
2663 
2664 	kn->kn_data = node->tn_size - foffset(kn->kn_fp);
2665 	if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) {
2666 		kn->kn_fflags |= NOTE_EOF;
2667 		return (1);
2668 	}
2669 
2670 	if (kn->kn_flags & __EV_POLL)
2671 		return (1);
2672 
2673 	return (kn->kn_data != 0);
2674 }
2675 
2676 int
2677 filt_tmpfswrite(struct knote *kn, long hint)
2678 {
2679 	/*
2680 	 * filesystem is gone, so set the EOF flag and schedule
2681 	 * the knote for deletion.
2682 	 */
2683 	if (hint == NOTE_REVOKE) {
2684 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2685 		return (1);
2686 	}
2687 
2688 	kn->kn_data = 0;
2689 	return (1);
2690 }
2691 
2692 int
2693 filt_tmpfsvnode(struct knote *kn, long hint)
2694 {
2695 	if (kn->kn_sfflags & hint)
2696 		kn->kn_fflags |= hint;
2697 	if (hint == NOTE_REVOKE) {
2698 		kn->kn_flags |= EV_EOF;
2699 		return (1);
2700 	}
2701 	return (kn->kn_fflags != 0);
2702 }
2703