xref: /openbsd-src/sys/tmpfs/tmpfs_vnops.c (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1 /*	$OpenBSD: tmpfs_vnops.c,v 1.18 2014/11/02 03:47:08 tedu Exp $	*/
2 /*	$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $	*/
3 
4 /*
5  * Copyright (c) 2005, 2006, 2007, 2012 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Taylor R Campbell.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 
39 #if 0
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $");
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/dirent.h>
46 #include <sys/fcntl.h>
47 #include <sys/event.h>
48 #include <sys/namei.h>
49 #include <sys/stat.h>
50 #include <sys/uio.h>
51 #include <sys/unistd.h>
52 #include <sys/vnode.h>
53 #include <sys/lockf.h>
54 #include <sys/poll.h>
55 #include <sys/file.h>
56 
57 #include <miscfs/fifofs/fifo.h>
58 #include <tmpfs/tmpfs_vnops.h>
59 #include <tmpfs/tmpfs.h>
60 
61 int tmpfs_kqfilter(void *v);
62 
63 /*
64  * vnode operations vector used for files stored in a tmpfs file system.
65  */
66 struct vops tmpfs_vops = {
67 	.vop_lookup	= tmpfs_lookup,
68 	.vop_create	= tmpfs_create,
69 	.vop_mknod	= tmpfs_mknod,
70 	.vop_open	= tmpfs_open,
71 	.vop_close	= tmpfs_close,
72 	.vop_access	= tmpfs_access,
73 	.vop_getattr	= tmpfs_getattr,
74 	.vop_setattr	= tmpfs_setattr,
75 	.vop_read	= tmpfs_read,
76 	.vop_write	= tmpfs_write,
77 	.vop_ioctl	= tmpfs_ioctl,
78 	.vop_poll	= tmpfs_poll,
79 	.vop_kqfilter	= tmpfs_kqfilter,
80 	.vop_revoke	= vop_generic_revoke,
81 	.vop_fsync	= tmpfs_fsync,
82 	.vop_remove	= tmpfs_remove,
83 	.vop_link	= tmpfs_link,
84 	.vop_rename	= tmpfs_rename,
85 	.vop_mkdir	= tmpfs_mkdir,
86 	.vop_rmdir	= tmpfs_rmdir,
87 	.vop_symlink	= tmpfs_symlink,
88 	.vop_readdir	= tmpfs_readdir,
89 	.vop_readlink	= tmpfs_readlink,
90 	.vop_abortop	= vop_generic_abortop,
91 	.vop_inactive	= tmpfs_inactive,
92 	.vop_reclaim	= tmpfs_reclaim,
93 	.vop_lock	= tmpfs_lock,
94 	.vop_unlock	= tmpfs_unlock,
95 	.vop_bmap	= vop_generic_bmap,
96 	.vop_strategy	= tmpfs_strategy,
97 	.vop_print	= tmpfs_print,
98 	.vop_islocked	= tmpfs_islocked,
99 	.vop_pathconf	= tmpfs_pathconf,
100 	.vop_advlock	= tmpfs_advlock,
101 	.vop_bwrite	= tmpfs_bwrite,
102 };
103 
104 /*
105  * tmpfs_lookup: path name traversal routine.
106  *
107  * Arguments: dvp (directory being searched), vpp (result),
108  * cnp (component name - path).
109  *
110  * => Caller holds a reference and lock on dvp.
111  * => We return looked-up vnode (vpp) locked, with a reference held.
112  */
113 int
114 tmpfs_lookup(void *v)
115 {
116 	struct vop_lookup_args /* {
117 		struct vnode *a_dvp;
118 		struct vnode **a_vpp;
119 		struct componentname *a_cnp;
120 	} */ *ap = v;
121 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
122 	struct componentname *cnp = ap->a_cnp;
123 	struct ucred *cred = cnp->cn_cred;
124 	const int lastcn = (cnp->cn_flags & ISLASTCN) != 0;
125 	const int lockparent = (cnp->cn_flags & LOCKPARENT) != 0;
126 	tmpfs_node_t *dnode, *tnode;
127 	tmpfs_dirent_t *de;
128 	int cachefound;
129 	int error;
130 
131 	KASSERT(VOP_ISLOCKED(dvp));
132 
133 	dnode = VP_TO_TMPFS_DIR(dvp);
134 	cnp->cn_flags &= ~PDIRUNLOCK;
135 	*vpp = NULL;
136 
137 	/* Check accessibility of directory. */
138 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
139 	if (error) {
140 		goto out;
141 	}
142 
143 	/*
144 	 * If requesting the last path component on a read-only file system
145 	 * with a write operation, deny it.
146 	 */
147 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
148 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
149 		error = EROFS;
150 		goto out;
151 	}
152 
153 	/*
154 	 * Avoid doing a linear scan of the directory if the requested
155 	 * directory/name couple is already in the cache.
156 	 */
157 	cachefound = cache_lookup(dvp, vpp, cnp);
158 	if (cachefound == ENOENT /* && *vpp == NULLVP */)
159 		return ENOENT; /* Negative cache hit. */
160 	else if (cachefound != -1)
161 		return 0; /* Found in cache. */
162 
163 	if (cnp->cn_flags & ISDOTDOT) {
164 		tmpfs_node_t *pnode;
165 
166 		/*
167 		 * Lookup of ".." case.
168 		 */
169 		if (lastcn) {
170 			if (cnp->cn_nameiop == RENAME) {
171 				error = EINVAL;
172 				goto out;
173 			}
174 			if (cnp->cn_nameiop == DELETE) {
175 				/* Keep the name for tmpfs_rmdir(). */
176 				cnp->cn_flags |= SAVENAME;
177 			}
178 		}
179 		KASSERT(dnode->tn_type == VDIR);
180 		pnode = dnode->tn_spec.tn_dir.tn_parent;
181 		if (pnode == NULL) {
182 			error = ENOENT;
183 			goto out;
184 		}
185 
186 		/*
187 		 * Lock the parent tn_nlock before releasing the vnode lock,
188 		 * and thus prevents parent from disappearing.
189 		 */
190 		rw_enter_write(&pnode->tn_nlock);
191 		VOP_UNLOCK(dvp, 0, curproc);
192 
193 		/*
194 		 * Get a vnode of the '..' entry and re-acquire the lock.
195 		 * Release the tn_nlock.
196 		 */
197 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
198 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
199 		goto out;
200 
201 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
202 		/*
203 		 * Lookup of "." case.
204 		 */
205 		if (lastcn && cnp->cn_nameiop == RENAME) {
206 			error = EISDIR;
207 			goto out;
208 		}
209 		vref(dvp);
210 		*vpp = dvp;
211 		error = 0;
212 		goto done;
213 	}
214 
215 	/*
216 	 * Other lookup cases: perform directory scan.
217 	 */
218 	de = tmpfs_dir_lookup(dnode, cnp);
219 	if (de == NULL) {
220 		/*
221 		 * The entry was not found in the directory.  This is valid
222 		 * if we are creating or renaming an entry and are working
223 		 * on the last component of the path name.
224 		 */
225 		if (lastcn && (cnp->cn_nameiop == CREATE ||
226 		    cnp->cn_nameiop == RENAME)) {
227 			error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
228 			if (error) {
229 				goto out;
230 			}
231 			/*
232 			 * We are creating an entry in the file system, so
233 			 * save its name for further use by tmpfs_create().
234 			 */
235 			cnp->cn_flags |= SAVENAME;
236 			error = EJUSTRETURN;
237 		} else {
238 			error = ENOENT;
239 		}
240 		goto done;
241 	}
242 
243 	tnode = de->td_node;
244 
245 	/*
246 	 * If it is not the last path component and found a non-directory
247 	 * or non-link entry (which may itself be pointing to a directory),
248 	 * raise an error.
249 	 */
250 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
251 		error = ENOTDIR;
252 		goto out;
253 	}
254 
255 	/* Check the permissions. */
256 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
257 		error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
258 		if (error)
259 			goto out;
260 
261 		/*
262 		 * If not root and directory is sticky, check for permission
263 		 * on directory or on file. This implements append-only
264 		 * directories.
265 		 */
266 		if ((dnode->tn_mode & S_ISTXT) != 0) {
267 			if (cred->cr_uid != 0 &&
268 			    cred->cr_uid != dnode->tn_uid &&
269 			    cred->cr_uid != tnode->tn_uid) {
270 				error = EPERM;
271 				goto out;
272 			}
273 		}
274 
275 		/*
276 		 * XXX pedro: We might need cn_nameptr later in tmpfs_remove()
277 		 * or tmpfs_rmdir() for a tmpfs_dir_lookup(). We should really
278 		 * get rid of SAVENAME at some point.
279 		 */
280 		if (cnp->cn_nameiop == DELETE)
281 			cnp->cn_flags |= SAVENAME;
282 	}
283 
284 	/* Get a vnode for the matching entry. */
285 	rw_enter_write(&tnode->tn_nlock);
286 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
287 done:
288 	/*
289 	 * Cache the result, unless request was for creation (as it does
290 	 * not improve the performance).
291 	 */
292 	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
293 		cache_enter(dvp, *vpp, cnp);
294 	}
295 out:
296 	/*
297 	 * If (1) we succeded, (2) found a distinct vnode to return and (3) were
298 	 * either explicitely told to keep the parent locked or are in the
299 	 * middle of a lookup, unlock the parent vnode.
300 	 */
301 	if ((error == 0 || error == EJUSTRETURN) && /* (1) */
302 	    *vpp != dvp &&			    /* (2) */
303 	    (!lockparent || !lastcn)) {		    /* (3) */
304 		VOP_UNLOCK(dvp, 0, curproc);
305 		cnp->cn_flags |= PDIRUNLOCK;
306 	} else
307 		KASSERT(VOP_ISLOCKED(dvp));
308 
309 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
310 
311 	return error;
312 }
313 
314 int
315 tmpfs_create(void *v)
316 {
317 	struct vop_create_args /* {
318 		struct vnode		*a_dvp;
319 		struct vnode		**a_vpp;
320 		struct componentname	*a_cnp;
321 		struct vattr		*a_vap;
322 	} */ *ap = v;
323 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
324 	struct componentname *cnp = ap->a_cnp;
325 	struct vattr *vap = ap->a_vap;
326 
327 	KASSERT(VOP_ISLOCKED(dvp));
328 	KASSERT(cnp->cn_flags & HASBUF);
329 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
330 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
331 }
332 
333 int
334 tmpfs_mknod(void *v)
335 {
336 	struct vop_mknod_args /* {
337 		struct vnode		*a_dvp;
338 		struct vnode		**a_vpp;
339 		struct componentname	*a_cnp;
340 		struct vattr		*a_vap;
341 	} */ *ap = v;
342 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
343 	struct componentname *cnp = ap->a_cnp;
344 	struct vattr *vap = ap->a_vap;
345 	enum vtype vt = vap->va_type;
346 	int error;
347 
348 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
349 		vput(dvp);
350 		return EINVAL;
351 	}
352 
353 	/* tmpfs_alloc_file() will unlock 'dvp'. */
354 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
355 	if (error)
356 		return error;
357 
358 	vput(*vpp);
359 
360 	return 0;
361 }
362 
363 int
364 tmpfs_open(void *v)
365 {
366 	struct vop_open_args /* {
367 		struct vnode	*a_vp;
368 		int		a_mode;
369 		kauth_cred_t	a_cred;
370 	} */ *ap = v;
371 	struct vnode *vp = ap->a_vp;
372 	mode_t mode = ap->a_mode;
373 	tmpfs_node_t *node;
374 
375 	KASSERT(VOP_ISLOCKED(vp));
376 
377 	node = VP_TO_TMPFS_NODE(vp);
378 	if (node->tn_links < 1) {
379 		/*
380 		 * The file is still active, but all its names have been
381 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
382 		 * any more, as it is about to be destroyed.
383 		 */
384 		return ENOENT;
385 	}
386 
387 	/* If the file is marked append-only, deny write requests. */
388 	if ((node->tn_flags & APPEND) != 0 &&
389 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
390 		return EPERM;
391 	}
392 	return 0;
393 }
394 
395 int
396 tmpfs_close(void *v)
397 {
398 	struct vop_close_args /* {
399 		struct vnode	*a_vp;
400 		int		a_fflag;
401 		kauth_cred_t	a_cred;
402 	} */ *ap = v;
403 	struct vnode *vp = ap->a_vp;
404 
405 	KASSERT(VOP_ISLOCKED(vp));
406 
407 	return 0;
408 }
409 
410 int
411 tmpfs_access(void *v)
412 {
413 	struct vop_access_args /* {
414 		struct vnode	*a_vp;
415 		int		a_mode;
416 		kauth_cred_t	a_cred;
417 	} */ *ap = v;
418 	struct vnode *vp = ap->a_vp;
419 	mode_t mode = ap->a_mode;
420 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
421 	const int writing = (mode & VWRITE) != 0;
422 
423 	KASSERT(VOP_ISLOCKED(vp));
424 
425 	/* Possible? */
426 	switch (vp->v_type) {
427 	case VDIR:
428 	case VLNK:
429 	case VREG:
430 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
431 			return EROFS;
432 		}
433 		break;
434 	case VBLK:
435 	case VCHR:
436 	case VSOCK:
437 	case VFIFO:
438 		break;
439 	default:
440 		return EINVAL;
441 	}
442 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
443 		return EPERM;
444 	}
445 
446 	return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
447 	    mode, ap->a_cred));
448 }
449 
450 int
451 tmpfs_getattr(void *v)
452 {
453 	struct vop_getattr_args /* {
454 		struct vnode	*a_vp;
455 		struct vattr	*a_vap;
456 		kauth_cred_t	a_cred;
457 	} */ *ap = v;
458 	struct vnode *vp = ap->a_vp;
459 	struct vattr *vap = ap->a_vap;
460 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
461 
462 	vattr_null(vap);
463 
464 	vap->va_type = vp->v_type;
465 	vap->va_mode = node->tn_mode;
466 	vap->va_nlink = node->tn_links;
467 	vap->va_uid = node->tn_uid;
468 	vap->va_gid = node->tn_gid;
469 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
470 	vap->va_fileid = node->tn_id;
471 	vap->va_size = node->tn_size;
472 	vap->va_blocksize = PAGE_SIZE;
473 	vap->va_atime = node->tn_atime;
474 	vap->va_mtime = node->tn_mtime;
475 	vap->va_ctime = node->tn_ctime;
476 	/* vap->va_birthtime = node->tn_birthtime; */
477 	vap->va_gen = TMPFS_NODE_GEN(node);
478 	vap->va_flags = node->tn_flags;
479 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
480 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
481 	vap->va_bytes = round_page(node->tn_size);
482 	vap->va_filerev = VNOVAL;
483 	vap->va_vaflags = 0;
484 	vap->va_spare = VNOVAL; /* XXX */
485 
486 	return 0;
487 }
488 
489 #define GOODTIME(tv)	((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL)
490 /* XXX Should this operation be atomic?  I think it should, but code in
491  * XXX other places (e.g., ufs) doesn't seem to be... */
492 int
493 tmpfs_setattr(void *v)
494 {
495 	struct vop_setattr_args /* {
496 		struct vnode	*a_vp;
497 		struct vattr	*a_vap;
498 		kauth_cred_t	a_cred;
499 	} */ *ap = v;
500 	struct vnode *vp = ap->a_vp;
501 	struct vattr *vap = ap->a_vap;
502 	struct ucred *cred = ap->a_cred;
503 	struct proc *p = curproc;
504 	int error = 0;
505 
506 	KASSERT(VOP_ISLOCKED(vp));
507 
508 	/* Abort if any unsettable attribute is given. */
509 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
510 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
511 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
512 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
513 	    vap->va_bytes != VNOVAL) {
514 		return EINVAL;
515 	}
516 	if (error == 0 && (vap->va_flags != VNOVAL))
517 		error = tmpfs_chflags(vp, vap->va_flags, cred, p);
518 
519 	if (error == 0 && (vap->va_size != VNOVAL))
520 		error = tmpfs_chsize(vp, vap->va_size, cred, p);
521 
522 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
523 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
524 
525 	if (error == 0 && (vap->va_mode != VNOVAL))
526 		error = tmpfs_chmod(vp, vap->va_mode, cred, p);
527 
528 	if (error == 0 && (GOODTIME(&vap->va_atime)
529 	    || GOODTIME(&vap->va_mtime))) {
530 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
531 		    vap->va_vaflags, cred, p);
532 		if (error == 0)
533 			return 0;
534 	}
535 	return error;
536 }
537 
538 int
539 tmpfs_read(void *v)
540 {
541 	struct vop_read_args /* {
542 		struct vnode *a_vp;
543 		struct uio *a_uio;
544 		int a_ioflag;
545 		struct ucred *a_cred;
546 	} */ *ap = v;
547 	struct vnode *vp = ap->a_vp;
548 	struct uio *uio = ap->a_uio;
549 	/* const int ioflag = ap->a_ioflag; */
550 	tmpfs_node_t *node;
551 	int error;
552 
553 	KASSERT(VOP_ISLOCKED(vp));
554 
555 	if (vp->v_type != VREG) {
556 		return EISDIR;
557 	}
558 	if (uio->uio_offset < 0) {
559 		return EINVAL;
560 	}
561 
562 	node = VP_TO_TMPFS_NODE(vp);
563 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
564 	error = 0;
565 
566 	while (error == 0 && uio->uio_resid > 0) {
567 		vsize_t len;
568 
569 		if (node->tn_size <= uio->uio_offset) {
570 			break;
571 		}
572 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
573 		if (len == 0) {
574 			break;
575 		}
576 		error = tmpfs_uiomove(node, uio, len);
577 	}
578 
579 	return error;
580 }
581 
582 int
583 tmpfs_write(void *v)
584 {
585 	struct vop_write_args /* {
586 		struct vnode	*a_vp;
587 		struct uio	*a_uio;
588 		int		a_ioflag;
589 		kauth_cred_t	a_cred;
590 	} */ *ap = v;
591 	struct vnode *vp = ap->a_vp;
592 	struct uio *uio = ap->a_uio;
593 	const int ioflag = ap->a_ioflag;
594 	tmpfs_node_t *node;
595 	off_t oldsize;
596 	ssize_t overrun;
597 	int extended;
598 	int error;
599 
600 	KASSERT(VOP_ISLOCKED(vp));
601 
602 	node = VP_TO_TMPFS_NODE(vp);
603 	oldsize = node->tn_size;
604 
605 	if (vp->v_type != VREG)
606 		return (EINVAL);
607 
608 	if (uio->uio_resid == 0)
609 		return (0);
610 
611 	if (ioflag & IO_APPEND) {
612 		uio->uio_offset = node->tn_size;
613 	}
614 
615 	if (uio->uio_offset < 0 ||
616 	    (u_int64_t)uio->uio_offset + uio->uio_resid > LLONG_MAX)
617 		return (EFBIG);
618 
619 	/* do the filesize rlimit check */
620 	if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
621 		return (error);
622 
623 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
624 	if (extended) {
625 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
626 		if (error)
627 			goto out;
628 	}
629 
630 	error = 0;
631 	while (error == 0 && uio->uio_resid > 0) {
632 		vsize_t len;
633 
634 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
635 		if (len == 0) {
636 			break;
637 		}
638 		error = tmpfs_uiomove(node, uio, len);
639 	}
640 	if (error) {
641 		(void)tmpfs_reg_resize(vp, oldsize);
642 	}
643 
644 	tmpfs_update(node, TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
645 	    (extended ? TMPFS_NODE_CHANGED : 0));
646 	if (extended)
647 		VN_KNOTE(vp, NOTE_WRITE | NOTE_EXTEND);
648 	else
649 		VN_KNOTE(vp, NOTE_WRITE);
650 out:
651 	if (error) {
652 		KASSERT(oldsize == node->tn_size);
653 	} else {
654 		KASSERT(uio->uio_resid == 0);
655 
656 		/* correct the result for writes clamped by vn_fsizechk() */
657 		uio->uio_resid += overrun;
658 
659 	}
660 	return error;
661 }
662 
663 int
664 tmpfs_fsync(void *v)
665 {
666 	struct vop_fsync_args /* {
667 		struct vnode *a_vp;
668 		struct ucred *a_cred;
669 		int a_flags;
670 		off_t a_offlo;
671 		off_t a_offhi;
672 		struct lwp *a_l;
673 	} */ *ap = v;
674 	struct vnode *vp = ap->a_vp;
675 
676 	/* Nothing to do.  Just update. */
677 	KASSERT(VOP_ISLOCKED(vp));
678 	return 0;
679 }
680 
681 /*
682  * tmpfs_remove: unlink a file.
683  *
684  * => Both directory (dvp) and file (vp) are locked.
685  * => We unlock and drop the reference on both.
686  */
687 int
688 tmpfs_remove(void *v)
689 {
690 	struct vop_remove_args /* {
691 		struct vnode *a_dvp;
692 		struct vnode *a_vp;
693 		struct componentname *a_cnp;
694 	} */ *ap = v;
695 	struct vnode *dvp = ap->a_dvp, *vp = ap->a_vp;
696 	struct componentname *cnp = ap->a_cnp;
697 	tmpfs_node_t *dnode, *node;
698 	tmpfs_dirent_t *de;
699 	int error;
700 
701 	KASSERT(VOP_ISLOCKED(dvp));
702 	KASSERT(VOP_ISLOCKED(vp));
703 	KASSERT(cnp->cn_flags & HASBUF);
704 
705 	if (vp->v_type == VDIR) {
706 		error = EPERM;
707 		goto out;
708 	}
709 
710 	dnode = VP_TO_TMPFS_NODE(dvp);
711 	node = VP_TO_TMPFS_NODE(vp);
712 
713 	/* Files marked as immutable or append-only cannot be deleted. */
714 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
715 		error = EPERM;
716 		goto out;
717 	}
718 
719 	/*
720 	 * Likewise, files residing on directories marked as append-only cannot
721 	 * be deleted.
722 	 */
723 	if (dnode->tn_flags & APPEND) {
724 		error = EPERM;
725 		goto out;
726 	}
727 
728 	/* Lookup the directory entry (check the cached hint first). */
729 	de = tmpfs_dir_cached(node);
730 	if (de == NULL) {
731 		de = tmpfs_dir_lookup(dnode, cnp);
732 	}
733 
734 	KASSERT(de && de->td_node == node);
735 
736 	/*
737 	 * Remove the entry from the directory (drops the link count) and
738 	 * destroy it.
739 	 * Note: the inode referred by it will not be destroyed
740 	 * until the vnode is reclaimed/recycled.
741 	 */
742 	tmpfs_dir_detach(dnode, de);
743 	tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
744 	if (node->tn_links > 0)  {
745 		/* We removed a hard link. */
746 		tmpfs_update(node, TMPFS_NODE_CHANGED);
747 	}
748 	error = 0;
749 out:
750 	pool_put(&namei_pool, cnp->cn_pnbuf);
751 	/* Drop the references and unlock the vnodes. */
752 	vput(vp);
753 	if (dvp == vp) {
754 		vrele(dvp);
755 	} else {
756 		vput(dvp);
757 	}
758 	return error;
759 }
760 
761 /*
762  * tmpfs_link: create a hard link.
763  */
764 int
765 tmpfs_link(void *v)
766 {
767 	struct vop_link_args /* {
768 		struct vnode *a_dvp;
769 		struct vnode *a_vp;
770 		struct componentname *a_cnp;
771 	} */ *ap = v;
772 	struct vnode *dvp = ap->a_dvp;
773 	struct vnode *vp = ap->a_vp;
774 	struct componentname *cnp = ap->a_cnp;
775 	tmpfs_node_t *dnode, *node;
776 	tmpfs_dirent_t *de;
777 	int error;
778 
779 	KASSERT(VOP_ISLOCKED(dvp));
780 
781 	if (vp->v_type == VDIR) {
782 		VOP_ABORTOP(dvp, cnp);
783 		vput(dvp);
784 		return EPERM;
785 	}
786 
787 	KASSERT(dvp != vp);
788 
789 	if (dvp->v_mount != vp->v_mount) {
790 		VOP_ABORTOP(dvp, cnp);
791 		vput(dvp);
792 		return EXDEV;
793 	}
794 
795 	dnode = VP_TO_TMPFS_DIR(dvp);
796 	node = VP_TO_TMPFS_NODE(vp);
797 
798 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
799 
800 	/* Check for maximum number of links limit. */
801 	if (node->tn_links == LINK_MAX) {
802 		error = EMLINK;
803 		goto out;
804 	}
805 	KASSERT(node->tn_links < LINK_MAX);
806 
807 	/* We cannot create links of files marked immutable or append-only. */
808 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
809 		error = EPERM;
810 		goto out;
811 	}
812 
813 	if (TMPFS_DIRSEQ_FULL(dnode)) {
814 		error = ENOSPC;
815 		goto out;
816 	}
817 
818 	/* Allocate a new directory entry to represent the inode. */
819 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
820 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
821 	if (error) {
822 		goto out;
823 	}
824 
825 	/*
826 	 * Insert the entry into the directory.
827 	 * It will increase the inode link count.
828 	 */
829 	tmpfs_dir_attach(dnode, de, node);
830 
831 	/* Update the timestamps and trigger the event. */
832 	if (node->tn_vnode) {
833 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
834 	}
835 	tmpfs_update(node, TMPFS_NODE_CHANGED);
836 	error = 0;
837 out:
838 	pool_put(&namei_pool, cnp->cn_pnbuf);
839 	VOP_UNLOCK(vp, 0, curproc);
840 	vput(dvp);
841 	return error;
842 }
843 
844 int
845 tmpfs_mkdir(void *v)
846 {
847 	struct vop_mkdir_args /* {
848 		struct vnode		*a_dvp;
849 		struct vnode		**a_vpp;
850 		struct componentname	*a_cnp;
851 		struct vattr		*a_vap;
852 	} */ *ap = v;
853 	struct vnode *dvp = ap->a_dvp;
854 	struct vnode **vpp = ap->a_vpp;
855 	struct componentname *cnp = ap->a_cnp;
856 	struct vattr *vap = ap->a_vap;
857 
858 	KASSERT(vap->va_type == VDIR);
859 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
860 }
861 
862 int
863 tmpfs_rmdir(void *v)
864 {
865 	struct vop_rmdir_args /* {
866 		struct vnode		*a_dvp;
867 		struct vnode		*a_vp;
868 		struct componentname	*a_cnp;
869 	} */ *ap = v;
870 	struct vnode *dvp = ap->a_dvp;
871 	struct vnode *vp = ap->a_vp;
872 	struct componentname *cnp = ap->a_cnp;
873 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
874 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
875 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
876 	tmpfs_dirent_t *de;
877 	int error = 0;
878 
879 	KASSERT(VOP_ISLOCKED(dvp));
880 	KASSERT(VOP_ISLOCKED(vp));
881 	KASSERT(cnp->cn_flags & HASBUF);
882 
883 	if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
884 	    cnp->cn_nameptr[1] == '.') {
885 		error = ENOTEMPTY;
886 		goto out;
887 	}
888 
889 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
890 
891 	/*
892 	 * Directories with more than two entries ('.' and '..') cannot be
893 	 * removed.
894 	 */
895 	if (node->tn_size > 0) {
896 		KASSERT(error == 0);
897 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
898 			error = ENOTEMPTY;
899 			break;
900 		}
901 		if (error)
902 			goto out;
903 	}
904 
905 	/* Lookup the directory entry (check the cached hint first). */
906 	de = tmpfs_dir_cached(node);
907 	if (de == NULL)
908 		de = tmpfs_dir_lookup(dnode, cnp);
909 
910 	KASSERT(de && de->td_node == node);
911 
912 	/* Check flags to see if we are allowed to remove the directory. */
913 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
914 		error = EPERM;
915 		goto out;
916 	}
917 
918 	/* Decrement the link count for the virtual '.' entry. */
919 	node->tn_links--;
920 	tmpfs_update(node, TMPFS_NODE_STATUSALL);
921 
922 	/* Detach the directory entry from the directory. */
923 	tmpfs_dir_detach(dnode, de);
924 
925 	/* Purge the cache for parent. */
926 	cache_purge(dvp);
927 
928 	/*
929 	 * Destroy the directory entry.
930 	 * Note: the inode referred by it will not be destroyed
931 	 * until the vnode is reclaimed.
932 	 */
933 	tmpfs_free_dirent(tmp, de);
934 	KASSERT(TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir) == NULL);
935 
936 	KASSERT(node->tn_links == 0);
937 out:
938 	pool_put(&namei_pool, cnp->cn_pnbuf);
939 	/* Release the nodes. */
940 	vput(dvp);
941 	vput(vp);
942 	return error;
943 }
944 
945 int
946 tmpfs_symlink(void *v)
947 {
948 	struct vop_symlink_args /* {
949 		struct vnode		*a_dvp;
950 		struct vnode		**a_vpp;
951 		struct componentname	*a_cnp;
952 		struct vattr		*a_vap;
953 		char			*a_target;
954 	} */ *ap = v;
955 	struct vnode *dvp = ap->a_dvp;
956 	struct vnode **vpp = ap->a_vpp;
957 	struct componentname *cnp = ap->a_cnp;
958 	struct vattr *vap = ap->a_vap;
959 	char *target = ap->a_target;
960 	int error;
961 
962 	KASSERT(vap->va_type == 0);
963 	vap->va_type = VLNK;
964 
965 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
966 	if (error == 0)
967 		vput(*vpp);
968 
969 	return error;
970 }
971 
972 int
973 tmpfs_readdir(void *v)
974 {
975 	struct vop_readdir_args /* {
976 		struct vnode	*a_vp;
977 		struct uio	*a_uio;
978 		kauth_cred_t	a_cred;
979 		int		*a_eofflag;
980 	} */ *ap = v;
981 	struct vnode *vp = ap->a_vp;
982 	struct uio *uio = ap->a_uio;
983 	int *eofflag = ap->a_eofflag;
984 	tmpfs_node_t *node;
985 	int error;
986 
987 	KASSERT(VOP_ISLOCKED(vp));
988 
989 	/* This operation only makes sense on directory nodes. */
990 	if (vp->v_type != VDIR) {
991 		return ENOTDIR;
992 	}
993 	node = VP_TO_TMPFS_DIR(vp);
994 	/*
995 	 * Retrieve the directory entries, unless it is being destroyed.
996 	 */
997 	if (node->tn_links) {
998 		error = tmpfs_dir_getdents(node, uio);
999 	} else {
1000 		error = 0;
1001 	}
1002 
1003 	if (eofflag != NULL) {
1004 		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
1005 	}
1006 	return error;
1007 }
1008 
1009 int
1010 tmpfs_readlink(void *v)
1011 {
1012 	struct vop_readlink_args /* {
1013 		struct vnode	*a_vp;
1014 		struct uio	*a_uio;
1015 		kauth_cred_t	a_cred;
1016 	} */ *ap = v;
1017 	struct vnode *vp = ap->a_vp;
1018 	struct uio *uio = ap->a_uio;
1019 	tmpfs_node_t *node;
1020 	int error;
1021 
1022 	KASSERT(VOP_ISLOCKED(vp));
1023 	KASSERT(uio->uio_offset == 0);
1024 	KASSERT(vp->v_type == VLNK);
1025 
1026 	node = VP_TO_TMPFS_NODE(vp);
1027 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
1028 	    MIN(node->tn_size, uio->uio_resid), uio);
1029 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
1030 
1031 	return error;
1032 }
1033 
1034 int
1035 tmpfs_inactive(void *v)
1036 {
1037 	struct vop_inactive_args /* {
1038 		struct vnode *a_vp;
1039 		int *a_recycle;
1040 	} */ *ap = v;
1041 	struct vnode *vp = ap->a_vp;
1042 	tmpfs_node_t *node;
1043 
1044 	KASSERT(VOP_ISLOCKED(vp));
1045 
1046 	node = VP_TO_TMPFS_NODE(vp);
1047 
1048 	if (vp->v_type == VREG && tmpfs_uio_cached(node))
1049 		tmpfs_uio_uncache(node);
1050 
1051 	VOP_UNLOCK(vp, 0, curproc);
1052 
1053 	/*
1054 	 * If we are done with the node, reclaim it so that it can be reused
1055 	 * immediately.
1056 	 */
1057 	if (node->tn_links == 0)
1058 		vrecycle(vp, curproc);
1059 
1060 	return 0;
1061 }
1062 
1063 int
1064 tmpfs_reclaim(void *v)
1065 {
1066 	struct vop_reclaim_args /* {
1067 		struct vnode *a_vp;
1068 	} */ *ap = v;
1069 	struct vnode *vp = ap->a_vp;
1070 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1071 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1072 	int racing;
1073 
1074 	/* Disassociate inode from vnode. */
1075 	rw_enter_write(&node->tn_nlock);
1076 	node->tn_vnode = NULL;
1077 	vp->v_data = NULL;
1078 	/* Check if tmpfs_vnode_get() is racing with us. */
1079 	racing = TMPFS_NODE_RECLAIMING(node);
1080 	rw_exit_write(&node->tn_nlock);
1081 
1082 	/*
1083 	 * If inode is not referenced, i.e. no links, then destroy it.
1084 	 * Note: if racing - inode is about to get a new vnode, leave it.
1085 	 */
1086 	if (node->tn_links == 0 && !racing) {
1087 		tmpfs_free_node(tmp, node);
1088 	}
1089 	return 0;
1090 }
1091 
1092 int
1093 tmpfs_pathconf(void *v)
1094 {
1095 	struct vop_pathconf_args /* {
1096 		struct vnode	*a_vp;
1097 		int		a_name;
1098 		register_t	*a_retval;
1099 	} */ *ap = v;
1100 	const int name = ap->a_name;
1101 	register_t *retval = ap->a_retval;
1102 	int error = 0;
1103 
1104 	switch (name) {
1105 	case _PC_LINK_MAX:
1106 		*retval = LINK_MAX;
1107 		break;
1108 	case _PC_NAME_MAX:
1109 		*retval = TMPFS_MAXNAMLEN;
1110 		break;
1111 	case _PC_CHOWN_RESTRICTED:
1112 		*retval = 1;
1113 		break;
1114 	case _PC_NO_TRUNC:
1115 		*retval = 1;
1116 		break;
1117 	case _PC_FILESIZEBITS:
1118 		*retval = 64;
1119 		break;
1120 	case _PC_TIMESTAMP_RESOLUTION:
1121 		*retval = 1;
1122 		break;
1123 	default:
1124 		error = EINVAL;
1125 	}
1126 	return error;
1127 }
1128 
1129 int
1130 tmpfs_advlock(void *v)
1131 {
1132 	struct vop_advlock_args /* {
1133 		struct vnode	*a_vp;
1134 		void *		a_id;
1135 		int		a_op;
1136 		struct flock	*a_fl;
1137 		int		a_flags;
1138 	} */ *ap = v;
1139 	struct vnode *vp = ap->a_vp;
1140 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1141 
1142 	return lf_advlock(&node->tn_lockf, node->tn_size, ap->a_id, ap->a_op,
1143 	    ap->a_fl, ap->a_flags);
1144 }
1145 
1146 int
1147 tmpfs_print(void *v)
1148 {
1149 	struct vop_print_args /* {
1150 		struct vnode	*a_vp;
1151 	} */ *ap = v;
1152 	struct vnode *vp = ap->a_vp;
1153 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1154 
1155 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1156 	    "\tmode 0%o, owner %d, group %d, size %lld",
1157 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1158 	    node->tn_gid, node->tn_size);
1159 #ifdef FIFO
1160 	if (vp->v_type == VFIFO)
1161 		fifo_printinfo(vp);
1162 #endif
1163 	printf("\n");
1164 	return 0;
1165 }
1166 
1167 /* a null op */
1168 int
1169 tmpfs_bwrite(void *v)
1170 {
1171 	return 0;
1172 }
1173 
1174 int
1175 tmpfs_poll(void *v)
1176 {
1177 	struct vop_poll_args *ap = v;
1178 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1179 }
1180 
1181 int
1182 tmpfs_strategy(void *v)
1183 {
1184 	return EOPNOTSUPP;
1185 }
1186 
1187 int
1188 tmpfs_ioctl(void *v)
1189 {
1190 	return ENOTTY;
1191 }
1192 
1193 int
1194 tmpfs_lock(void *v)
1195 {
1196 	struct vop_lock_args *ap = v;
1197 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1198 
1199 	return lockmgr(&tnp->tn_vlock, ap->a_flags, NULL);
1200 }
1201 
1202 int
1203 tmpfs_unlock(void *v)
1204 {
1205 	struct vop_unlock_args *ap = v;
1206 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1207 
1208 	return lockmgr(&tnp->tn_vlock, ap->a_flags | LK_RELEASE, NULL);
1209 }
1210 
1211 int
1212 tmpfs_islocked(void *v)
1213 {
1214 	struct vop_islocked_args *ap = v;
1215 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1216 
1217 	return lockstatus(&tnp->tn_vlock);
1218 }
1219 
1220 /*
1221  * tmpfs_rename: rename routine, the hairiest system call, with the
1222  * insane API.
1223  *
1224  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
1225  * and tvp (to-leaf), if exists (NULL if not).
1226  *
1227  * => Caller holds a reference on fdvp and fvp, they are unlocked.
1228  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
1229  *
1230  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
1231  *    to release the references and unlock them (or destroy).
1232  */
1233 
1234 /*
1235  * First, some forward declarations of subroutines.
1236  */
1237 
1238 int tmpfs_sane_rename(struct vnode *, struct componentname *,
1239     struct vnode *, struct componentname *, struct ucred *, int);
1240 int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
1241     struct ucred *,
1242     struct vnode *, struct tmpfs_node *, struct componentname *,
1243     struct tmpfs_dirent **, struct vnode **,
1244     struct vnode *, struct tmpfs_node *, struct componentname *,
1245     struct tmpfs_dirent **, struct vnode **);
1246 int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
1247     struct ucred *,
1248     struct vnode *, struct tmpfs_node *,
1249     struct componentname *, struct tmpfs_dirent **, struct vnode **,
1250     struct componentname *, struct tmpfs_dirent **, struct vnode **);
1251 int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
1252     struct ucred *,
1253     struct vnode *, struct tmpfs_node *, struct componentname *,
1254     struct tmpfs_dirent **, struct vnode **,
1255     struct vnode *, struct tmpfs_node *, struct componentname *,
1256     struct tmpfs_dirent **, struct vnode **);
1257 void tmpfs_rename_exit(struct tmpfs_mount *,
1258     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
1259 int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
1260 int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
1261     struct tmpfs_node **);
1262 int tmpfs_rename_lock(struct mount *, struct ucred *, int,
1263     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1264     struct tmpfs_dirent **, struct vnode **,
1265     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1266     struct tmpfs_dirent **, struct vnode **);
1267 void tmpfs_rename_attachdetach(struct tmpfs_mount *,
1268     struct vnode *, struct tmpfs_dirent *, struct vnode *,
1269     struct vnode *, struct tmpfs_dirent *, struct vnode *);
1270 int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
1271     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, struct ucred *);
1272 int tmpfs_rename_check_possible(struct tmpfs_node *,
1273     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
1274 int tmpfs_rename_check_permitted(struct ucred *,
1275     struct tmpfs_node *, struct tmpfs_node *,
1276     struct tmpfs_node *, struct tmpfs_node *);
1277 int tmpfs_remove_check_possible(struct tmpfs_node *,
1278     struct tmpfs_node *);
1279 int tmpfs_remove_check_permitted(struct ucred *,
1280     struct tmpfs_node *, struct tmpfs_node *);
1281 int tmpfs_check_sticky(struct ucred *,
1282     struct tmpfs_node *, struct tmpfs_node *);
1283 void tmpfs_rename_cache_purge(struct vnode *, struct vnode *, struct vnode *,
1284     struct vnode *);
1285 void tmpfs_rename_abort(void *);
1286 
1287 int
1288 tmpfs_rename(void *v)
1289 {
1290 	struct vop_rename_args  /* {
1291 		struct vnode		*a_fdvp;
1292 		struct vnode		*a_fvp;
1293 		struct componentname	*a_fcnp;
1294 		struct vnode		*a_tdvp;
1295 		struct vnode		*a_tvp;
1296 		struct componentname	*a_tcnp;
1297 	} */ *ap = v;
1298 	struct vnode *fdvp = ap->a_fdvp;
1299 	struct vnode *fvp = ap->a_fvp;
1300 	struct componentname *fcnp = ap->a_fcnp;
1301 	struct vnode *tdvp = ap->a_tdvp;
1302 	struct vnode *tvp = ap->a_tvp;
1303 	struct componentname *tcnp = ap->a_tcnp;
1304 	struct ucred *cred;
1305 	int error;
1306 
1307 	KASSERT(fdvp != NULL);
1308 	KASSERT(fvp != NULL);
1309 	KASSERT(fcnp != NULL);
1310 	KASSERT(fcnp->cn_nameptr != NULL);
1311 	KASSERT(tdvp != NULL);
1312 	KASSERT(tcnp != NULL);
1313 	KASSERT(fcnp->cn_nameptr != NULL);
1314 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1315 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
1316 	KASSERT(fdvp->v_type == VDIR);
1317 	KASSERT(tdvp->v_type == VDIR);
1318 	KASSERT(fcnp->cn_flags & HASBUF);
1319 	KASSERT(tcnp->cn_flags & HASBUF);
1320 
1321 	cred = fcnp->cn_cred;
1322 	KASSERT(tcnp->cn_cred == cred);
1323 
1324 	/*
1325 	 * Check for cross-device rename.
1326 	 */
1327 	if (fvp->v_mount != tdvp->v_mount ||
1328 	    (tvp != NULL && (fvp->v_mount != tvp->v_mount))) {
1329 	    	tmpfs_rename_abort(v);
1330 		return EXDEV;
1331 	}
1332 
1333 	/*
1334 	 * Can't check the locks on these until we know they're on
1335 	 * the same FS, as not all FS do locking the same way.
1336 	 */
1337 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1338 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1339 
1340 	/*
1341 	 * Reject renaming '.' and '..'.
1342 	 */
1343 	if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1344 	    (fcnp->cn_namelen == 2 && fcnp->cn_nameptr[0] == '.' &&
1345 	     fcnp->cn_nameptr[1] == '.')) {
1346 	     	tmpfs_rename_abort(v);
1347 	     	return EINVAL;
1348 	}
1349 
1350 	/*
1351 	 * Sanitize our world from the VFS insanity.  Unlock the target
1352 	 * directory and node, which are locked.  Release the children,
1353 	 * which are referenced.  Check for rename("x", "y/."), which
1354 	 * it is our responsibility to reject, not the caller's.  (But
1355 	 * the caller does reject rename("x/.", "y").  Go figure.)
1356 	 */
1357 
1358 	VOP_UNLOCK(tdvp, 0, curproc);
1359 	if ((tvp != NULL) && (tvp != tdvp))
1360 		VOP_UNLOCK(tvp, 0, curproc);
1361 
1362 	vrele(fvp);
1363 	if (tvp != NULL)
1364 		vrele(tvp);
1365 
1366 	if (tvp == tdvp) {
1367 		error = EINVAL;
1368 		goto out;
1369 	}
1370 
1371 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, 0);
1372 
1373 out:	/*
1374 	 * All done, whether with success or failure.  Release the
1375 	 * directory nodes now, as the caller expects from the VFS
1376 	 * protocol.
1377 	 */
1378 	vrele(fdvp);
1379 	vrele(tdvp);
1380 
1381 	return error;
1382 }
1383 
1384 /*
1385  * tmpfs_sane_rename: rename routine, the hairiest system call, with
1386  * the sane API.
1387  *
1388  * Arguments:
1389  *
1390  * . fdvp (from directory vnode),
1391  * . fcnp (from component name),
1392  * . tdvp (to directory vnode), and
1393  * . tcnp (to component name).
1394  *
1395  * fdvp and tdvp must be referenced and unlocked.
1396  */
1397 int
1398 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
1399     struct vnode *tdvp, struct componentname *tcnp, struct ucred *cred,
1400     int posixly_correct)
1401 {
1402 	struct mount *mount;
1403 	struct tmpfs_mount *tmpfs;
1404 	struct tmpfs_node *fdnode, *tdnode;
1405 	struct tmpfs_dirent *fde, *tde;
1406 	struct vnode *fvp, *tvp;
1407 	char *newname;
1408 	int error;
1409 
1410 	KASSERT(fdvp != NULL);
1411 	KASSERT(fcnp != NULL);
1412 	KASSERT(tdvp != NULL);
1413 	KASSERT(tcnp != NULL);
1414 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1415 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1416 	KASSERT(fdvp->v_type == VDIR);
1417 	KASSERT(tdvp->v_type == VDIR);
1418 	KASSERT(fdvp->v_mount == tdvp->v_mount);
1419 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
1420 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
1421 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
1422 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
1423 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
1424 	    (fcnp->cn_nameptr[1] != '.'));
1425 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
1426 	    (tcnp->cn_nameptr[1] != '.'));
1427 
1428 	/*
1429 	 * Pull out the tmpfs data structures.
1430 	 */
1431 	fdnode = VP_TO_TMPFS_NODE(fdvp);
1432 	tdnode = VP_TO_TMPFS_NODE(tdvp);
1433 	KASSERT(fdnode != NULL);
1434 	KASSERT(tdnode != NULL);
1435 	KASSERT(fdnode->tn_vnode == fdvp);
1436 	KASSERT(tdnode->tn_vnode == tdvp);
1437 	KASSERT(fdnode->tn_type == VDIR);
1438 	KASSERT(tdnode->tn_type == VDIR);
1439 
1440 	mount = fdvp->v_mount;
1441 	KASSERT(mount != NULL);
1442 	KASSERT(mount == tdvp->v_mount);
1443 	/* XXX How can we be sure this stays true?  (Not that you're
1444 	 * likely to mount a tmpfs read-only...)  */
1445 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
1446 	tmpfs = VFS_TO_TMPFS(mount);
1447 	KASSERT(tmpfs != NULL);
1448 
1449 	/*
1450 	 * Decide whether we need a new name, and allocate memory for
1451 	 * it if so.  Do this before locking anything or taking
1452 	 * destructive actions so that we can back out safely and sleep
1453 	 * safely.  XXX Is sleeping an issue here?  Can this just be
1454 	 * moved into tmpfs_rename_attachdetach?
1455 	 */
1456 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
1457 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
1458 		if (newname == NULL) {
1459 			error = ENOSPC;
1460 			goto out_unlocked;
1461 		}
1462 	} else {
1463 		newname = NULL;
1464 	}
1465 
1466 	/*
1467 	 * Lock and look up everything.  GCC is not very clever.
1468 	 */
1469 	fde = tde = NULL;
1470 	fvp = tvp = NULL;
1471 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1472 	    fdvp, fdnode, fcnp, &fde, &fvp,
1473 	    tdvp, tdnode, tcnp, &tde, &tvp);
1474 	if (error)
1475 		goto out_unlocked;
1476 
1477 	/*
1478 	 * Check that everything is locked and looks right.
1479 	 */
1480 	KASSERT(fde != NULL);
1481 	KASSERT(fvp != NULL);
1482 	KASSERT(fde->td_node != NULL);
1483 	KASSERT(fde->td_node->tn_vnode == fvp);
1484 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1485 	KASSERT((tde == NULL) == (tvp == NULL));
1486 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1487 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1488 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1489 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1490 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1491 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1492 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1493 
1494 	/*
1495 	 * If the source and destination are the same object, we need
1496 	 * only at most delete the source entry.
1497 	 */
1498 	if (fvp == tvp) {
1499 		KASSERT(tvp != NULL);
1500 		if (fde->td_node->tn_type == VDIR) {
1501 			/* XXX How can this possibly happen?  */
1502 			error = EINVAL;
1503 			goto out_locked;
1504 		}
1505 		if (!posixly_correct && (fde != tde)) {
1506 			/* XXX Doesn't work because of locking.
1507 			 * error = VOP_REMOVE(fdvp, fvp);
1508 			 */
1509 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1510 			    cred);
1511 			if (error)
1512 				goto out_locked;
1513 		}
1514 		goto success;
1515 	}
1516 	KASSERT(fde != tde);
1517 	KASSERT(fvp != tvp);
1518 
1519 	/*
1520 	 * If the target exists, refuse to rename a directory over a
1521 	 * non-directory or vice versa, or to clobber a non-empty
1522 	 * directory.
1523 	 */
1524 	if (tvp != NULL) {
1525 		KASSERT(tde != NULL);
1526 		KASSERT(tde->td_node != NULL);
1527 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1528 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1529 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1530 			error = ENOTDIR;
1531 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1532 			error = EISDIR;
1533 		else
1534 			error = 0;
1535 		if (error)
1536 			goto out_locked;
1537 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1538 	}
1539 
1540 	/*
1541 	 * Authorize the rename.
1542 	 */
1543 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1544 	    tdnode, (tde? tde->td_node : NULL));
1545 	if (error)
1546 		goto out_locked;
1547 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1548 	    tdnode, (tde? tde->td_node : NULL));
1549 	if (error)
1550 		goto out_locked;
1551 
1552 	/*
1553 	 * Everything is hunky-dory.  Shuffle the directory entries.
1554 	 */
1555 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1556 
1557 	/*
1558 	 * Update the directory entry's name necessary, and flag
1559 	 * metadata updates.  A memory allocation failure here is not
1560 	 * OK because we've already committed some changes that we
1561 	 * can't back out at this point, and we have things locked so
1562 	 * we can't sleep, hence the early allocation above.
1563 	 */
1564 	if (newname != NULL) {
1565 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1566 
1567 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1568 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1569 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1570 		/* Commit newname and don't free it on the way out.  */
1571 		fde->td_name = newname;
1572 		newname = NULL;
1573 
1574 		tmpfs_update(fde->td_node, TMPFS_NODE_CHANGED);
1575 		tmpfs_update(tdnode, TMPFS_NODE_MODIFIED);
1576 	}
1577 
1578 success:
1579 	VN_KNOTE(fvp, NOTE_RENAME);
1580 	tmpfs_rename_cache_purge(fdvp, fvp, tdvp, tvp);
1581 	error = 0;
1582 
1583 out_locked:
1584 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1585 
1586 out_unlocked:
1587 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1588 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1589 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1590 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1591 
1592 	if (newname != NULL)
1593 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1594 
1595 	return error;
1596 }
1597 
1598 /*
1599  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1600  * and the associated vnode in fvp_ret; fail if not found.  Look up
1601  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1602  * associated vnode in tvp_ret; store null instead if not found.  Fail
1603  * if anything has been mounted on any of the nodes involved.
1604  *
1605  * fdvp and tdvp must be referenced.
1606  *
1607  * On entry, nothing is locked.
1608  *
1609  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1610  * nonnull, are referenced.  The only pairs of vnodes that may be
1611  * identical are {fdvp, tdvp} and {fvp, tvp}.
1612  *
1613  * On failure, everything remains as was.
1614  *
1615  * Locking everything including the source and target nodes is
1616  * necessary to make sure that, e.g., link count updates are OK.  The
1617  * locking order is, in general, ancestor-first, matching the order you
1618  * need to use to look up a descendant anyway.
1619  */
1620 int
1621 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1622     struct ucred *cred,
1623     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1624     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1625     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1626     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1627 {
1628 	int error;
1629 
1630 	KASSERT(mount != NULL);
1631 	KASSERT(tmpfs != NULL);
1632 	KASSERT(fdvp != NULL);
1633 	KASSERT(fdnode != NULL);
1634 	KASSERT(fcnp != NULL);
1635 	KASSERT(fde_ret != NULL);
1636 	KASSERT(fvp_ret != NULL);
1637 	KASSERT(tdvp != NULL);
1638 	KASSERT(tdnode != NULL);
1639 	KASSERT(tcnp != NULL);
1640 	KASSERT(tde_ret != NULL);
1641 	KASSERT(tvp_ret != NULL);
1642 	KASSERT(fdnode->tn_vnode == fdvp);
1643 	KASSERT(tdnode->tn_vnode == tdvp);
1644 	KASSERT(fdnode->tn_type == VDIR);
1645 	KASSERT(tdnode->tn_type == VDIR);
1646 
1647 	if (fdvp == tdvp) {
1648 		KASSERT(fdnode == tdnode);
1649 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1650 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1651 	} else {
1652 		KASSERT(fdnode != tdnode);
1653 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1654 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1655 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1656 	}
1657 
1658 	if (error)
1659 		return error;
1660 
1661 	KASSERT(*fde_ret != NULL);
1662 	KASSERT(*fvp_ret != NULL);
1663 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1664 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1665 	KASSERT((*tde_ret == NULL) ||
1666 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1667 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1668 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1669 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1670 	KASSERT((*tvp_ret == NULL) ||
1671 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1672 	KASSERT(*fvp_ret != fdvp);
1673 	KASSERT(*fvp_ret != tdvp);
1674 	KASSERT(*tvp_ret != fdvp);
1675 	KASSERT(*tvp_ret != tdvp);
1676 	return 0;
1677 }
1678 
1679 /*
1680  * Lock and look up with a common source/target directory.
1681  */
1682 int
1683 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1684     struct ucred *cred,
1685     struct vnode *dvp, struct tmpfs_node *dnode,
1686     struct componentname *fcnp,
1687     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1688     struct componentname *tcnp,
1689     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1690 {
1691 	struct tmpfs_dirent *fde, *tde;
1692 	struct vnode *fvp, *tvp;
1693 	int error;
1694 
1695 	error = tmpfs_rename_lock_directory(dvp, dnode);
1696 	if (error)
1697 		goto fail0;
1698 
1699 	/* Did we lose a race with mount?  */
1700 	if (dvp->v_mountedhere != NULL) {
1701 		error = EBUSY;
1702 		goto fail1;
1703 	}
1704 
1705 	/* Make sure the caller may read the directory.  */
1706 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
1707 	if (error)
1708 		goto fail1;
1709 
1710 	/*
1711 	 * The order in which we lock the source and target nodes is
1712 	 * irrelevant because there can only be one rename on this
1713 	 * directory in flight at a time, and we have it locked.
1714 	 */
1715 
1716 	fde = tmpfs_dir_lookup(dnode, fcnp);
1717 	if (fde == NULL) {
1718 		error = ENOENT;
1719 		goto fail1;
1720 	}
1721 
1722 	KASSERT(fde->td_node != NULL);
1723 	/* We ruled out `.' earlier.  */
1724 	KASSERT(fde->td_node != dnode);
1725 	/* We ruled out `..' earlier.  */
1726 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1727 	rw_enter_write(&fde->td_node->tn_nlock);
1728 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1729 	if (error)
1730 		goto fail1;
1731 	KASSERT(fvp != NULL);
1732 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1733 	KASSERT(fvp != dvp);
1734 	KASSERT(fvp->v_mount == mount);
1735 
1736 	/* Refuse to rename a mount point.  */
1737 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1738 		error = EBUSY;
1739 		goto fail2;
1740 	}
1741 
1742 	tde = tmpfs_dir_lookup(dnode, tcnp);
1743 	if (tde == NULL) {
1744 		tvp = NULL;
1745 	} else {
1746 		KASSERT(tde->td_node != NULL);
1747 		/* We ruled out `.' earlier.  */
1748 		KASSERT(tde->td_node != dnode);
1749 		/* We ruled out `..' earlier.  */
1750 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1751 		if (tde->td_node != fde->td_node) {
1752 			rw_enter_write(&tde->td_node->tn_nlock);
1753 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1754 			if (error)
1755 				goto fail2;
1756 			KASSERT(tvp->v_mount == mount);
1757 			/* Refuse to rename over a mount point.  */
1758 			if ((tvp->v_type == VDIR) &&
1759 			    (tvp->v_mountedhere != NULL)) {
1760 				error = EBUSY;
1761 				goto fail3;
1762 			}
1763 		} else {
1764 			tvp = fvp;
1765 			vref(tvp);
1766 		}
1767 		KASSERT(tvp != NULL);
1768 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1769 	}
1770 	KASSERT(tvp != dvp);
1771 
1772 	*fde_ret = fde;
1773 	*fvp_ret = fvp;
1774 	*tde_ret = tde;
1775 	*tvp_ret = tvp;
1776 	return 0;
1777 
1778 fail3:	if (tvp != NULL) {
1779 		if (tvp != fvp)
1780 			vput(tvp);
1781 		else
1782 			vrele(tvp);
1783 	}
1784 
1785 fail2:	vput(fvp);
1786 fail1:	VOP_UNLOCK(dvp, 0, curproc);
1787 fail0:	return error;
1788 }
1789 
1790 /*
1791  * Lock and look up with separate source and target directories.
1792  */
1793 int
1794 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1795     struct ucred *cred,
1796     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1797     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1798     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1799     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1800 {
1801 	struct tmpfs_node *intermediate_node;
1802 	struct tmpfs_dirent *fde, *tde;
1803 	struct vnode *fvp, *tvp;
1804 	int error;
1805 
1806 	KASSERT(fdvp != tdvp);
1807 	KASSERT(fdnode != tdnode);
1808 
1809 #if 0				/* XXX */
1810 	mutex_enter(&tmpfs->tm_rename_lock);
1811 #endif
1812 
1813 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1814 	if (error)
1815 		goto fail;
1816 
1817 	/*
1818 	 * intermediate_node == NULL means fdnode is not an ancestor of
1819 	 * tdnode.
1820 	 */
1821 	if (intermediate_node == NULL)
1822 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1823 		    tdvp, tdnode, tcnp, 1, &tde, &tvp,
1824 		    fdvp, fdnode, fcnp, 0, &fde, &fvp);
1825 	else
1826 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1827 		    fdvp, fdnode, fcnp, 0, &fde, &fvp,
1828 		    tdvp, tdnode, tcnp, 1, &tde, &tvp);
1829 	if (error)
1830 		goto fail;
1831 
1832 	KASSERT(fde != NULL);
1833 	KASSERT(fde->td_node != NULL);
1834 
1835 	/*
1836 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1837 	 */
1838 	if (fde->td_node == intermediate_node) {
1839 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1840 		return EINVAL;
1841 	}
1842 
1843 	*fde_ret = fde;
1844 	*fvp_ret = fvp;
1845 	*tde_ret = tde;
1846 	*tvp_ret = tvp;
1847 	return 0;
1848 
1849 fail:
1850 #if 0				/* XXX */
1851 	mutex_exit(&tmpfs->tm_rename_lock);
1852 #endif
1853 	return error;
1854 }
1855 
1856 /*
1857  * Unlock everything we locked for rename.
1858  *
1859  * fdvp and tdvp must be referenced.
1860  *
1861  * On entry, everything is locked, and fvp and tvp referenced.
1862  *
1863  * On exit, everything is unlocked, and fvp and tvp are released.
1864  */
1865 void
1866 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1867     struct vnode *fdvp, struct vnode *fvp,
1868     struct vnode *tdvp, struct vnode *tvp)
1869 {
1870 
1871 	KASSERT(tmpfs != NULL);
1872 	KASSERT(fdvp != NULL);
1873 	KASSERT(fvp != NULL);
1874 	KASSERT(fdvp != fvp);
1875 	KASSERT(fdvp != tvp);
1876 	KASSERT(tdvp != tvp);
1877 	KASSERT(tdvp != fvp);
1878 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1879 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1880 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1881 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1882 
1883 	if (tvp != NULL) {
1884 		if (tvp != fvp)
1885 			vput(tvp);
1886 		else
1887 			vrele(tvp);
1888 	}
1889 	VOP_UNLOCK(tdvp, 0, curproc);
1890 	vput(fvp);
1891 	if (fdvp != tdvp)
1892 		VOP_UNLOCK(fdvp, 0, curproc);
1893 
1894 #if 0				/* XXX */
1895 	if (fdvp != tdvp)
1896 		mutex_exit(&tmpfs->tm_rename_lock);
1897 #endif
1898 }
1899 
1900 /*
1901  * Lock a directory, but fail if it has been rmdir'd.
1902  *
1903  * vp must be referenced.
1904  */
1905 int
1906 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1907 {
1908 
1909 	KASSERT(vp != NULL);
1910 	KASSERT(node != NULL);
1911 	KASSERT(node->tn_vnode == vp);
1912 	KASSERT(node->tn_type == VDIR);
1913 
1914 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
1915 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1916 		VOP_UNLOCK(vp, 0, curproc);
1917 		return ENOENT;
1918 	}
1919 
1920 	return 0;
1921 }
1922 
1923 /*
1924  * Analyze the genealogy of the source and target nodes.
1925  *
1926  * On success, stores in *intermediate_node_ret either the child of
1927  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1928  * descendant of fdnode at all.
1929  *
1930  * fdnode and tdnode must be unlocked and referenced.  The file
1931  * system's rename lock must also be held, to exclude concurrent
1932  * changes to the file system's genealogy other than rmdir.
1933  *
1934  * XXX This causes an extra lock/unlock of tdnode in the case when
1935  * we're just about to lock it again before locking anything else.
1936  * However, changing that requires reorganizing the code to make it
1937  * even more horrifically obscure.
1938  */
1939 int
1940 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1941     struct tmpfs_node **intermediate_node_ret)
1942 {
1943 	struct tmpfs_node *node = tdnode, *parent;
1944 	int error;
1945 
1946 	KASSERT(fdnode != NULL);
1947 	KASSERT(tdnode != NULL);
1948 	KASSERT(fdnode != tdnode);
1949 	KASSERT(intermediate_node_ret != NULL);
1950 
1951 	KASSERT(fdnode->tn_vnode != NULL);
1952 	KASSERT(tdnode->tn_vnode != NULL);
1953 	KASSERT(fdnode->tn_type == VDIR);
1954 	KASSERT(tdnode->tn_type == VDIR);
1955 
1956 	/*
1957 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1958 	 * from deleting it -- or any ancestor -- at an inopportune
1959 	 * moment.
1960 	 */
1961 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1962 	if (error)
1963 		return error;
1964 
1965 	for (;;) {
1966 		parent = node->tn_spec.tn_dir.tn_parent;
1967 		KASSERT(parent != NULL);
1968 		KASSERT(parent->tn_type == VDIR);
1969 
1970 		/* Did we hit the root without finding fdnode?  */
1971 		if (parent == node) {
1972 			*intermediate_node_ret = NULL;
1973 			break;
1974 		}
1975 
1976 		/* Did we find that fdnode is an ancestor?  */
1977 		if (parent == fdnode) {
1978 			*intermediate_node_ret = node;
1979 			break;
1980 		}
1981 
1982 		/* Neither -- keep ascending the family tree.  */
1983 		node = parent;
1984 	}
1985 
1986 	VOP_UNLOCK(tdnode->tn_vnode, 0, curproc);
1987 	return 0;
1988 }
1989 
1990 /*
1991  * Lock directories a and b, which must be distinct, and look up and
1992  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1993  * an ancestor of directory a, although directory a may be an ancestor
1994  * of directory b.  Fail with overlap_error if node a is directory b.
1995  * Neither componentname may be `.' or `..'.
1996  *
1997  * a_dvp and b_dvp must be referenced.
1998  *
1999  * On entry, a_dvp and b_dvp are unlocked.
2000  *
2001  * On success,
2002  * . a_dvp and b_dvp are locked,
2003  * . *a_dirent_ret is filled with a directory entry whose node is
2004  *     locked and referenced,
2005  * . *b_vp_ret is filled with the corresponding vnode,
2006  * . *b_dirent_ret is filled either with null or with a directory entry
2007  *     whose node is locked and referenced,
2008  * . *b_vp is filled either with null or with the corresponding vnode,
2009  *     and
2010  * . the only pair of vnodes that may be identical is a_vp and b_vp.
2011  *
2012  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
2013  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
2014  */
2015 int
2016 tmpfs_rename_lock(struct mount *mount, struct ucred *cred, int overlap_error,
2017     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
2018     struct componentname *a_cnp, int a_missing_ok,
2019     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
2020     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
2021     struct componentname *b_cnp, int b_missing_ok,
2022     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
2023 {
2024 	struct tmpfs_dirent *a_dirent, *b_dirent;
2025 	struct vnode *a_vp, *b_vp;
2026 	int error;
2027 
2028 	KASSERT(a_dvp != NULL);
2029 	KASSERT(a_dnode != NULL);
2030 	KASSERT(a_cnp != NULL);
2031 	KASSERT(a_dirent_ret != NULL);
2032 	KASSERT(a_vp_ret != NULL);
2033 	KASSERT(b_dvp != NULL);
2034 	KASSERT(b_dnode != NULL);
2035 	KASSERT(b_cnp != NULL);
2036 	KASSERT(b_dirent_ret != NULL);
2037 	KASSERT(b_vp_ret != NULL);
2038 	KASSERT(a_dvp != b_dvp);
2039 	KASSERT(a_dnode != b_dnode);
2040 	KASSERT(a_dnode->tn_vnode == a_dvp);
2041 	KASSERT(b_dnode->tn_vnode == b_dvp);
2042 	KASSERT(a_dnode->tn_type == VDIR);
2043 	KASSERT(b_dnode->tn_type == VDIR);
2044 	KASSERT(a_missing_ok != b_missing_ok);
2045 
2046 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
2047 	if (error)
2048 		goto fail0;
2049 
2050 	/* Did we lose a race with mount?  */
2051 	if (a_dvp->v_mountedhere != NULL) {
2052 		error = EBUSY;
2053 		goto fail1;
2054 	}
2055 
2056 	/* Make sure the caller may read the directory.  */
2057 	error = VOP_ACCESS(a_dvp, VEXEC, cred, curproc);
2058 	if (error)
2059 		goto fail1;
2060 
2061 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
2062 	if (a_dirent != NULL) {
2063 		KASSERT(a_dirent->td_node != NULL);
2064 		/* We ruled out `.' earlier.  */
2065 		KASSERT(a_dirent->td_node != a_dnode);
2066 		/* We ruled out `..' earlier.  */
2067 		KASSERT(a_dirent->td_node !=
2068 		    a_dnode->tn_spec.tn_dir.tn_parent);
2069 		if (a_dirent->td_node == b_dnode) {
2070 			error = overlap_error;
2071 			goto fail1;
2072 		}
2073 		rw_enter_write(&a_dirent->td_node->tn_nlock);
2074 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
2075 		if (error)
2076 			goto fail1;
2077 		KASSERT(a_vp->v_mount == mount);
2078 		/* Refuse to rename (over) a mount point.  */
2079 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
2080 			error = EBUSY;
2081 			goto fail2;
2082 		}
2083 	} else if (!a_missing_ok) {
2084 		error = ENOENT;
2085 		goto fail1;
2086 	} else {
2087 		a_vp = NULL;
2088 	}
2089 	KASSERT(a_vp != a_dvp);
2090 	KASSERT(a_vp != b_dvp);
2091 
2092 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
2093 	if (error)
2094 		goto fail2;
2095 
2096 	/* Did we lose a race with mount?  */
2097 	if (b_dvp->v_mountedhere != NULL) {
2098 		error = EBUSY;
2099 		goto fail3;
2100 	}
2101 
2102 	/* Make sure the caller may read the directory.  */
2103 	error = VOP_ACCESS(b_dvp, VEXEC, cred, curproc);
2104 	if (error)
2105 		goto fail3;
2106 
2107 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
2108 	if (b_dirent != NULL) {
2109 		KASSERT(b_dirent->td_node != NULL);
2110 		/* We ruled out `.' earlier.  */
2111 		KASSERT(b_dirent->td_node != b_dnode);
2112 		/* We ruled out `..' earlier.  */
2113 		KASSERT(b_dirent->td_node !=
2114 		    b_dnode->tn_spec.tn_dir.tn_parent);
2115 		/* b is not an ancestor of a.  */
2116 		KASSERT(b_dirent->td_node != a_dnode);
2117 		/* But the source and target nodes might be the same.  */
2118 		if ((a_dirent == NULL) ||
2119 		    (a_dirent->td_node != b_dirent->td_node)) {
2120 			rw_enter_write(&b_dirent->td_node->tn_nlock);
2121 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
2122 			    &b_vp);
2123 			if (error)
2124 				goto fail3;
2125 			KASSERT(b_vp->v_mount == mount);
2126 			KASSERT(a_vp != b_vp);
2127 			/* Refuse to rename (over) a mount point.  */
2128 			if ((b_vp->v_type == VDIR) &&
2129 			    (b_vp->v_mountedhere != NULL)) {
2130 				error = EBUSY;
2131 				goto fail4;
2132 			}
2133 		} else {
2134 			b_vp = a_vp;
2135 			vref(b_vp);
2136 		}
2137 	} else if (!b_missing_ok) {
2138 		error = ENOENT;
2139 		goto fail3;
2140 	} else {
2141 		b_vp = NULL;
2142 	}
2143 	KASSERT(b_vp != a_dvp);
2144 	KASSERT(b_vp != b_dvp);
2145 
2146 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2147 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2148 	KASSERT(a_missing_ok || (a_dirent != NULL));
2149 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
2150 	KASSERT(b_missing_ok || (b_dirent != NULL));
2151 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
2152 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
2153 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
2154 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
2155 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
2156 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
2157 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
2158 
2159 	*a_dirent_ret = a_dirent;
2160 	*b_dirent_ret = b_dirent;
2161 	*a_vp_ret = a_vp;
2162 	*b_vp_ret = b_vp;
2163 	return 0;
2164 
2165 fail4:	if (b_vp != NULL) {
2166 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
2167 		if (b_vp != a_vp)
2168 			vput(b_vp);
2169 		else
2170 			vrele(a_vp);
2171 	}
2172 
2173 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2174 	VOP_UNLOCK(b_dvp, 0, curproc);
2175 
2176 fail2:	if (a_vp != NULL) {
2177 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
2178 		vput(a_vp);
2179 	}
2180 
2181 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2182 	VOP_UNLOCK(a_dvp, 0, curproc);
2183 
2184 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
2185 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
2186 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
2187 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
2188 	return error;
2189 }
2190 
2191 /*
2192  * Shuffle the directory entries to move fvp from the directory fdvp
2193  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
2194  * we are overwriting a target node, it is tvp, and tde is its
2195  * directory entry in tdvp.
2196  *
2197  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
2198  */
2199 void
2200 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
2201     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
2202     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
2203 {
2204 
2205 	KASSERT(tmpfs != NULL);
2206 	KASSERT(fdvp != NULL);
2207 	KASSERT(fde != NULL);
2208 	KASSERT(fvp != NULL);
2209 	KASSERT(tdvp != NULL);
2210 	KASSERT(fde->td_node != NULL);
2211 	KASSERT(fde->td_node->tn_vnode == fvp);
2212 	KASSERT((tde == NULL) == (tvp == NULL));
2213 	KASSERT((tde == NULL) || (tde->td_node != NULL));
2214 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
2215 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
2216 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
2217 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
2218 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
2219 
2220 	/*
2221 	 * If we are moving from one directory to another, detach the
2222 	 * source entry and reattach it to the target directory.
2223 	 */
2224 	if (fdvp != tdvp) {
2225 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
2226 		struct tmpfs_node *fnode = fde->td_node;
2227 		tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp);
2228 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2229 		tmpfs_dir_detach(fdnode, fde);
2230 		tmpfs_dir_attach(tdnode, fde, fnode);
2231 	} else if (tvp == NULL) {
2232 		/*
2233 		 * We are changing the directory.  tmpfs_dir_attach and
2234 		 * tmpfs_dir_detach note the events for us, but for
2235 		 * this case we don't call them, so we must note the
2236 		 * event explicitly.
2237 		 */
2238 		VN_KNOTE(fdvp, NOTE_WRITE);
2239 	}
2240 
2241 	/*
2242 	 * If we are replacing an existing target entry, delete it.
2243 	 */
2244 	if (tde != NULL) {
2245 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2246 		KASSERT(tvp != NULL);
2247 		KASSERT(tde->td_node != NULL);
2248 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
2249 		if (tde->td_node->tn_type == VDIR) {
2250 			KASSERT(tde->td_node->tn_size == 0);
2251 			KASSERT(tde->td_node->tn_links == 2);
2252 			/* Decrement the extra link count for `.' so
2253 			 * the vnode will be recycled when released.  */
2254 			tde->td_node->tn_links--;
2255 		}
2256 		tmpfs_dir_detach(tdnode, tde);
2257 		tmpfs_free_dirent(tmpfs, tde);
2258 	}
2259 }
2260 
2261 /*
2262  * Remove the entry de for the non-directory vp from the directory dvp.
2263  *
2264  * Everything must be locked and referenced.
2265  */
2266 int
2267 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
2268     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
2269     struct ucred *cred)
2270 {
2271 	int error;
2272 
2273 	KASSERT(tmpfs != NULL);
2274 	KASSERT(dvp != NULL);
2275 	KASSERT(dnode != NULL);
2276 	KASSERT(de != NULL);
2277 	KASSERT(vp != NULL);
2278 	KASSERT(dnode->tn_vnode == dvp);
2279 	KASSERT(de->td_node != NULL);
2280 	KASSERT(de->td_node->tn_vnode == vp);
2281 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
2282 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2283 
2284 	error = tmpfs_remove_check_possible(dnode, de->td_node);
2285 	if (error)
2286 		return error;
2287 
2288 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
2289 	if (error)
2290 		return error;
2291 
2292 	/*
2293 	 * If not root and directory is sticky, check for permission on
2294 	 * directory or on file. This implements append-only directories.
2295 	 */
2296 	if ((dnode->tn_mode & S_ISTXT) != 0)
2297 		if (cred->cr_uid != 0 && cred->cr_uid != dnode->tn_uid &&
2298 		    cred->cr_uid != de->td_node->tn_uid)
2299 			return EPERM;
2300 
2301 	tmpfs_dir_detach(dnode, de);
2302 	tmpfs_free_dirent(tmpfs, de);
2303 
2304 	return 0;
2305 }
2306 
2307 /*
2308  * Check whether a rename is possible independent of credentials.
2309  *
2310  * Everything must be locked and referenced.
2311  */
2312 int
2313 tmpfs_rename_check_possible(
2314     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2315     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2316 {
2317 
2318 	KASSERT(fdnode != NULL);
2319 	KASSERT(fnode != NULL);
2320 	KASSERT(tdnode != NULL);
2321 	KASSERT(fdnode != fnode);
2322 	KASSERT(tdnode != tnode);
2323 	KASSERT(fnode != tnode);
2324 	KASSERT(fdnode->tn_vnode != NULL);
2325 	KASSERT(fnode->tn_vnode != NULL);
2326 	KASSERT(tdnode->tn_vnode != NULL);
2327 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2328 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2329 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2330 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2331 	KASSERT((tnode == NULL) ||
2332 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2333 
2334 	/*
2335 	 * If fdnode is immutable, we can't write to it.  If fdnode is
2336 	 * append-only, the only change we can make is to add entries
2337 	 * to it.  If fnode is immutable, we can't change the links to
2338 	 * it.  If fnode is append-only...well, this is what UFS does.
2339 	 */
2340 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
2341 		return EPERM;
2342 
2343 	/*
2344 	 * If tdnode is immutable, we can't write to it.  If tdnode is
2345 	 * append-only, we can add entries, but we can't change
2346 	 * existing entries.
2347 	 */
2348 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
2349 		return EPERM;
2350 
2351 	/*
2352 	 * If tnode is immutable, we can't replace links to it.  If
2353 	 * tnode is append-only...well, this is what UFS does.
2354 	 */
2355 	if (tnode != NULL) {
2356 		KASSERT(tnode != NULL);
2357 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
2358 			return EPERM;
2359 	}
2360 
2361 	return 0;
2362 }
2363 
2364 /*
2365  * Check whether a rename is permitted given our credentials.
2366  *
2367  * Everything must be locked and referenced.
2368  */
2369 int
2370 tmpfs_rename_check_permitted(struct ucred *cred,
2371     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2372     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2373 {
2374 	int error;
2375 
2376 	KASSERT(fdnode != NULL);
2377 	KASSERT(fnode != NULL);
2378 	KASSERT(tdnode != NULL);
2379 	KASSERT(fdnode != fnode);
2380 	KASSERT(tdnode != tnode);
2381 	KASSERT(fnode != tnode);
2382 	KASSERT(fdnode->tn_vnode != NULL);
2383 	KASSERT(fnode->tn_vnode != NULL);
2384 	KASSERT(tdnode->tn_vnode != NULL);
2385 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2386 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2387 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2388 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2389 	KASSERT((tnode == NULL) ||
2390 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2391 
2392 	/*
2393 	 * We need to remove or change an entry in the source directory.
2394 	 */
2395 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred, curproc);
2396 	if (error)
2397 		return error;
2398 
2399 	/*
2400 	 * If we are changing directories, then we need to write to the
2401 	 * target directory to add or change an entry.  Also, if fnode
2402 	 * is a directory, we need to write to it to change its `..'
2403 	 * entry.
2404 	 */
2405 	if (fdnode != tdnode) {
2406 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred, curproc);
2407 		if (error)
2408 			return error;
2409 		if (fnode->tn_type == VDIR) {
2410 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred,
2411 			    curproc);
2412 			if (error)
2413 				return error;
2414 		}
2415 	}
2416 
2417 	error = tmpfs_check_sticky(cred, fdnode, fnode);
2418 	if (error)
2419 		return error;
2420 
2421 	if (TMPFS_DIRSEQ_FULL(tdnode))
2422 		return (ENOSPC);
2423 
2424 	error = tmpfs_check_sticky(cred, tdnode, tnode);
2425 	if (error)
2426 		return error;
2427 
2428 	return 0;
2429 }
2430 
2431 /*
2432  * Check whether removing node's entry in dnode is possible independent
2433  * of credentials.
2434  *
2435  * Everything must be locked and referenced.
2436  */
2437 int
2438 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
2439 {
2440 
2441 	KASSERT(dnode != NULL);
2442 	KASSERT(dnode->tn_vnode != NULL);
2443 	KASSERT(node != NULL);
2444 	KASSERT(dnode != node);
2445 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2446 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2447 
2448 	/*
2449 	 * We want to delete the entry.  If dnode is immutable, we
2450 	 * can't write to it to delete the entry.  If dnode is
2451 	 * append-only, the only change we can make is to add entries,
2452 	 * so we can't delete entries.  If node is immutable, we can't
2453 	 * change the links to it, so we can't delete the entry.  If
2454 	 * node is append-only...well, this is what UFS does.
2455 	 */
2456 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
2457 		return EPERM;
2458 
2459 	return 0;
2460 }
2461 
2462 /*
2463  * Check whether removing node's entry in dnode is permitted given our
2464  * credentials.
2465  *
2466  * Everything must be locked and referenced.
2467  */
2468 int
2469 tmpfs_remove_check_permitted(struct ucred *cred,
2470     struct tmpfs_node *dnode, struct tmpfs_node *node)
2471 {
2472 	int error;
2473 
2474 	KASSERT(dnode != NULL);
2475 	KASSERT(dnode->tn_vnode != NULL);
2476 	KASSERT(node != NULL);
2477 	KASSERT(dnode != node);
2478 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2479 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2480 
2481 	/*
2482 	 * Check whether we are permitted to write to the source
2483 	 * directory in order to delete an entry from it.
2484 	 */
2485 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred, curproc);
2486 	if (error)
2487 		return error;
2488 
2489 	error = tmpfs_check_sticky(cred, dnode, node);
2490 	if (error)
2491 		return error;
2492 
2493 	return 0;
2494 }
2495 
2496 /*
2497  * Check whether we may change an entry in a sticky directory.  If the
2498  * directory is sticky, the user must own either the directory or, if
2499  * it exists, the node, in order to change the entry.
2500  *
2501  * Everything must be locked and referenced.
2502  */
2503 int
2504 tmpfs_check_sticky(struct ucred *cred,
2505     struct tmpfs_node *dnode, struct tmpfs_node *node)
2506 {
2507 
2508 	KASSERT(dnode != NULL);
2509 	KASSERT(dnode->tn_vnode != NULL);
2510 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2511 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2512 	KASSERT((node == NULL) ||
2513 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2514 
2515 	if (node == NULL)
2516 		return 0;
2517 
2518 	if (dnode->tn_mode & S_ISTXT) {
2519 		if (cred->cr_uid != 0 &&
2520 		    cred->cr_uid != dnode->tn_uid &&
2521 		    cred->cr_uid != node->tn_uid)
2522 			return EPERM;
2523 	}
2524 
2525 	return 0;
2526 }
2527 
2528 void
2529 tmpfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp,
2530     struct vnode *tdvp, struct vnode *tvp)
2531 {
2532 
2533 	KASSERT(fdvp != NULL);
2534 	KASSERT(fvp != NULL);
2535 	KASSERT(tdvp != NULL);
2536 	KASSERT(fdvp != fvp);
2537 	KASSERT(fdvp != tvp);
2538 	KASSERT(tdvp != fvp);
2539 	KASSERT(tdvp != tvp);
2540 	KASSERT(fvp != tvp);
2541 	KASSERT(fdvp->v_type == VDIR);
2542 	KASSERT(tdvp->v_type == VDIR);
2543 
2544 	/*
2545 	 * XXX What actually needs to be purged?
2546 	 */
2547 
2548 	cache_purge(fdvp);
2549 
2550 	if (fvp->v_type == VDIR)
2551 		cache_purge(fvp);
2552 
2553 	if (tdvp != fdvp)
2554 		cache_purge(tdvp);
2555 
2556 	if ((tvp != NULL) && (tvp->v_type == VDIR))
2557 		cache_purge(tvp);
2558 }
2559 
2560 void
2561 tmpfs_rename_abort(void *v)
2562 {
2563 	struct vop_rename_args *ap = v;
2564 	struct vnode *fdvp = ap->a_fdvp;
2565 	struct vnode *fvp = ap->a_fvp;
2566 	struct componentname *fcnp = ap->a_fcnp;
2567 	struct vnode *tdvp = ap->a_tdvp;
2568 	struct vnode *tvp = ap->a_tvp;
2569 	struct componentname *tcnp = ap->a_tcnp;
2570 
2571 	VOP_ABORTOP(tdvp, tcnp);
2572 	if (tdvp == tvp)
2573 		vrele(tdvp);
2574 	else
2575 		vput(tdvp);
2576 	if (tvp != NULL)
2577 		vput(tvp);
2578 	VOP_ABORTOP(fdvp, fcnp);
2579 	vrele(fdvp);
2580 	vrele(fvp);
2581 }
2582 
2583 void filt_tmpfsdetach(struct knote *kn);
2584 int filt_tmpfsread(struct knote *kn, long hint);
2585 int filt_tmpfswrite(struct knote *kn, long hint);
2586 int filt_tmpfsvnode(struct knote *kn, long hint);
2587 
2588 struct filterops tmpfsread_filtops =
2589 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsread };
2590 struct filterops tmpfswrite_filtops =
2591 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfswrite };
2592 struct filterops tmpfsvnode_filtops =
2593 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
2594 
2595 int
2596 tmpfs_kqfilter(void *v)
2597 {
2598 	struct vop_kqfilter_args *ap = v;
2599 	struct vnode *vp = ap->a_vp;
2600 	struct knote *kn = ap->a_kn;
2601 
2602 	switch (kn->kn_filter) {
2603 	case EVFILT_READ:
2604 		kn->kn_fop = &tmpfsread_filtops;
2605 		break;
2606 	case EVFILT_WRITE:
2607 		kn->kn_fop = &tmpfswrite_filtops;
2608 		break;
2609 	case EVFILT_VNODE:
2610 		kn->kn_fop = &tmpfsvnode_filtops;
2611 		break;
2612 	default:
2613 		return (EINVAL);
2614 	}
2615 
2616 	kn->kn_hook = (caddr_t)vp;
2617 
2618 	SLIST_INSERT_HEAD(&vp->v_selectinfo.si_note, kn, kn_selnext);
2619 
2620 	return (0);
2621 }
2622 
2623 void
2624 filt_tmpfsdetach(struct knote *kn)
2625 {
2626 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2627 
2628 	SLIST_REMOVE(&vp->v_selectinfo.si_note, kn, knote, kn_selnext);
2629 }
2630 
2631 int
2632 filt_tmpfsread(struct knote *kn, long hint)
2633 {
2634 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2635 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2636 
2637 	/*
2638 	 * filesystem is gone, so set the EOF flag and schedule
2639 	 * the knote for deletion.
2640 	 */
2641 	if (hint == NOTE_REVOKE) {
2642 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2643 		return (1);
2644 	}
2645 
2646         kn->kn_data = node->tn_size - kn->kn_fp->f_offset;
2647 	if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) {
2648 		kn->kn_fflags |= NOTE_EOF;
2649 		return (1);
2650 	}
2651 
2652         return (kn->kn_data != 0);
2653 }
2654 
2655 int
2656 filt_tmpfswrite(struct knote *kn, long hint)
2657 {
2658 	/*
2659 	 * filesystem is gone, so set the EOF flag and schedule
2660 	 * the knote for deletion.
2661 	 */
2662 	if (hint == NOTE_REVOKE) {
2663 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2664 		return (1);
2665 	}
2666 
2667         kn->kn_data = 0;
2668         return (1);
2669 }
2670 
2671 int
2672 filt_tmpfsvnode(struct knote *kn, long hint)
2673 {
2674 	if (kn->kn_sfflags & hint)
2675 		kn->kn_fflags |= hint;
2676 	if (hint == NOTE_REVOKE) {
2677 		kn->kn_flags |= EV_EOF;
2678 		return (1);
2679 	}
2680 	return (kn->kn_fflags != 0);
2681 }
2682