xref: /openbsd-src/sys/tmpfs/tmpfs_vnops.c (revision 5ad04d351680822078003e2b066cfc9680d6157d)
1 /*	$OpenBSD: tmpfs_vnops.c,v 1.16 2014/03/28 17:57:11 mpi Exp $	*/
2 /*	$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $	*/
3 
4 /*
5  * Copyright (c) 2005, 2006, 2007, 2012 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Taylor R Campbell.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 
39 #if 0
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $");
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/dirent.h>
46 #include <sys/fcntl.h>
47 #include <sys/event.h>
48 #include <sys/malloc.h>
49 #include <sys/namei.h>
50 #include <sys/stat.h>
51 #include <sys/uio.h>
52 #include <sys/unistd.h>
53 #include <sys/vnode.h>
54 #include <sys/lockf.h>
55 #include <sys/poll.h>
56 #include <sys/file.h>
57 
58 #include <uvm/uvm_extern.h>
59 
60 #include <miscfs/fifofs/fifo.h>
61 #include <tmpfs/tmpfs_vnops.h>
62 #include <tmpfs/tmpfs.h>
63 
64 int tmpfs_kqfilter(void *v);
65 
66 /*
67  * vnode operations vector used for files stored in a tmpfs file system.
68  */
69 struct vops tmpfs_vops = {
70 	.vop_lookup	= tmpfs_lookup,
71 	.vop_create	= tmpfs_create,
72 	.vop_mknod	= tmpfs_mknod,
73 	.vop_open	= tmpfs_open,
74 	.vop_close	= tmpfs_close,
75 	.vop_access	= tmpfs_access,
76 	.vop_getattr	= tmpfs_getattr,
77 	.vop_setattr	= tmpfs_setattr,
78 	.vop_read	= tmpfs_read,
79 	.vop_write	= tmpfs_write,
80 	.vop_ioctl	= tmpfs_ioctl,
81 	.vop_poll	= tmpfs_poll,
82 	.vop_kqfilter	= tmpfs_kqfilter,
83 	.vop_revoke	= vop_generic_revoke,
84 	.vop_fsync	= tmpfs_fsync,
85 	.vop_remove	= tmpfs_remove,
86 	.vop_link	= tmpfs_link,
87 	.vop_rename	= tmpfs_rename,
88 	.vop_mkdir	= tmpfs_mkdir,
89 	.vop_rmdir	= tmpfs_rmdir,
90 	.vop_symlink	= tmpfs_symlink,
91 	.vop_readdir	= tmpfs_readdir,
92 	.vop_readlink	= tmpfs_readlink,
93 	.vop_abortop	= vop_generic_abortop,
94 	.vop_inactive	= tmpfs_inactive,
95 	.vop_reclaim	= tmpfs_reclaim,
96 	.vop_lock	= tmpfs_lock,
97 	.vop_unlock	= tmpfs_unlock,
98 	.vop_bmap	= vop_generic_bmap,
99 	.vop_strategy	= tmpfs_strategy,
100 	.vop_print	= tmpfs_print,
101 	.vop_islocked	= tmpfs_islocked,
102 	.vop_pathconf	= tmpfs_pathconf,
103 	.vop_advlock	= tmpfs_advlock,
104 	.vop_bwrite	= tmpfs_bwrite,
105 };
106 
107 /*
108  * tmpfs_lookup: path name traversal routine.
109  *
110  * Arguments: dvp (directory being searched), vpp (result),
111  * cnp (component name - path).
112  *
113  * => Caller holds a reference and lock on dvp.
114  * => We return looked-up vnode (vpp) locked, with a reference held.
115  */
116 int
117 tmpfs_lookup(void *v)
118 {
119 	struct vop_lookup_args /* {
120 		struct vnode *a_dvp;
121 		struct vnode **a_vpp;
122 		struct componentname *a_cnp;
123 	} */ *ap = v;
124 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
125 	struct componentname *cnp = ap->a_cnp;
126 	struct ucred *cred = cnp->cn_cred;
127 	const int lastcn = (cnp->cn_flags & ISLASTCN) != 0;
128 	const int lockparent = (cnp->cn_flags & LOCKPARENT) != 0;
129 	tmpfs_node_t *dnode, *tnode;
130 	tmpfs_dirent_t *de;
131 	int cachefound;
132 	int error;
133 
134 	KASSERT(VOP_ISLOCKED(dvp));
135 
136 	dnode = VP_TO_TMPFS_DIR(dvp);
137 	cnp->cn_flags &= ~PDIRUNLOCK;
138 	*vpp = NULL;
139 
140 	/* Check accessibility of directory. */
141 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
142 	if (error) {
143 		goto out;
144 	}
145 
146 	/*
147 	 * If requesting the last path component on a read-only file system
148 	 * with a write operation, deny it.
149 	 */
150 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
151 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
152 		error = EROFS;
153 		goto out;
154 	}
155 
156 	/*
157 	 * Avoid doing a linear scan of the directory if the requested
158 	 * directory/name couple is already in the cache.
159 	 */
160 	cachefound = cache_lookup(dvp, vpp, cnp);
161 	if (cachefound == ENOENT /* && *vpp == NULLVP */)
162 		return ENOENT; /* Negative cache hit. */
163 	else if (cachefound != -1)
164 		return 0; /* Found in cache. */
165 
166 	if (cnp->cn_flags & ISDOTDOT) {
167 		tmpfs_node_t *pnode;
168 
169 		/*
170 		 * Lookup of ".." case.
171 		 */
172 		if (lastcn) {
173 			if (cnp->cn_nameiop == RENAME) {
174 				error = EINVAL;
175 				goto out;
176 			}
177 			if (cnp->cn_nameiop == DELETE) {
178 				/* Keep the name for tmpfs_rmdir(). */
179 				cnp->cn_flags |= SAVENAME;
180 			}
181 		}
182 		KASSERT(dnode->tn_type == VDIR);
183 		pnode = dnode->tn_spec.tn_dir.tn_parent;
184 		if (pnode == NULL) {
185 			error = ENOENT;
186 			goto out;
187 		}
188 
189 		/*
190 		 * Lock the parent tn_nlock before releasing the vnode lock,
191 		 * and thus prevents parent from disappearing.
192 		 */
193 		rw_enter_write(&pnode->tn_nlock);
194 		VOP_UNLOCK(dvp, 0, curproc);
195 
196 		/*
197 		 * Get a vnode of the '..' entry and re-acquire the lock.
198 		 * Release the tn_nlock.
199 		 */
200 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
201 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
202 		goto out;
203 
204 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
205 		/*
206 		 * Lookup of "." case.
207 		 */
208 		if (lastcn && cnp->cn_nameiop == RENAME) {
209 			error = EISDIR;
210 			goto out;
211 		}
212 		vref(dvp);
213 		*vpp = dvp;
214 		error = 0;
215 		goto done;
216 	}
217 
218 	/*
219 	 * Other lookup cases: perform directory scan.
220 	 */
221 	de = tmpfs_dir_lookup(dnode, cnp);
222 	if (de == NULL) {
223 		/*
224 		 * The entry was not found in the directory.  This is valid
225 		 * if we are creating or renaming an entry and are working
226 		 * on the last component of the path name.
227 		 */
228 		if (lastcn && (cnp->cn_nameiop == CREATE ||
229 		    cnp->cn_nameiop == RENAME)) {
230 			error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
231 			if (error) {
232 				goto out;
233 			}
234 			/*
235 			 * We are creating an entry in the file system, so
236 			 * save its name for further use by tmpfs_create().
237 			 */
238 			cnp->cn_flags |= SAVENAME;
239 			error = EJUSTRETURN;
240 		} else {
241 			error = ENOENT;
242 		}
243 		goto done;
244 	}
245 
246 	tnode = de->td_node;
247 
248 	/*
249 	 * If it is not the last path component and found a non-directory
250 	 * or non-link entry (which may itself be pointing to a directory),
251 	 * raise an error.
252 	 */
253 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
254 		error = ENOTDIR;
255 		goto out;
256 	}
257 
258 	/* Check the permissions. */
259 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
260 		error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
261 		if (error)
262 			goto out;
263 
264 		/*
265 		 * If not root and directory is sticky, check for permission
266 		 * on directory or on file. This implements append-only
267 		 * directories.
268 		 */
269 		if ((dnode->tn_mode & S_ISTXT) != 0) {
270 			if (cred->cr_uid != 0 &&
271 			    cred->cr_uid != dnode->tn_uid &&
272 			    cred->cr_uid != tnode->tn_uid) {
273 				error = EPERM;
274 				goto out;
275 			}
276 		}
277 
278 		/*
279 		 * XXX pedro: We might need cn_nameptr later in tmpfs_remove()
280 		 * or tmpfs_rmdir() for a tmpfs_dir_lookup(). We should really
281 		 * get rid of SAVENAME at some point.
282 		 */
283 		if (cnp->cn_nameiop == DELETE)
284 			cnp->cn_flags |= SAVENAME;
285 	}
286 
287 	/* Get a vnode for the matching entry. */
288 	rw_enter_write(&tnode->tn_nlock);
289 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
290 done:
291 	/*
292 	 * Cache the result, unless request was for creation (as it does
293 	 * not improve the performance).
294 	 */
295 	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
296 		cache_enter(dvp, *vpp, cnp);
297 	}
298 out:
299 	/*
300 	 * If (1) we succeded, (2) found a distinct vnode to return and (3) were
301 	 * either explicitely told to keep the parent locked or are in the
302 	 * middle of a lookup, unlock the parent vnode.
303 	 */
304 	if ((error == 0 || error == EJUSTRETURN) && /* (1) */
305 	    *vpp != dvp &&			    /* (2) */
306 	    (!lockparent || !lastcn)) {		    /* (3) */
307 		VOP_UNLOCK(dvp, 0, curproc);
308 		cnp->cn_flags |= PDIRUNLOCK;
309 	} else
310 		KASSERT(VOP_ISLOCKED(dvp));
311 
312 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
313 
314 	return error;
315 }
316 
317 int
318 tmpfs_create(void *v)
319 {
320 	struct vop_create_args /* {
321 		struct vnode		*a_dvp;
322 		struct vnode		**a_vpp;
323 		struct componentname	*a_cnp;
324 		struct vattr		*a_vap;
325 	} */ *ap = v;
326 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
327 	struct componentname *cnp = ap->a_cnp;
328 	struct vattr *vap = ap->a_vap;
329 
330 	KASSERT(VOP_ISLOCKED(dvp));
331 	KASSERT(cnp->cn_flags & HASBUF);
332 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
333 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
334 }
335 
336 int
337 tmpfs_mknod(void *v)
338 {
339 	struct vop_mknod_args /* {
340 		struct vnode		*a_dvp;
341 		struct vnode		**a_vpp;
342 		struct componentname	*a_cnp;
343 		struct vattr		*a_vap;
344 	} */ *ap = v;
345 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
346 	struct componentname *cnp = ap->a_cnp;
347 	struct vattr *vap = ap->a_vap;
348 	enum vtype vt = vap->va_type;
349 	int error;
350 
351 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
352 		vput(dvp);
353 		return EINVAL;
354 	}
355 
356 	/* tmpfs_alloc_file() will unlock 'dvp'. */
357 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
358 	if (error)
359 		return error;
360 
361 	vput(*vpp);
362 
363 	return 0;
364 }
365 
366 int
367 tmpfs_open(void *v)
368 {
369 	struct vop_open_args /* {
370 		struct vnode	*a_vp;
371 		int		a_mode;
372 		kauth_cred_t	a_cred;
373 	} */ *ap = v;
374 	struct vnode *vp = ap->a_vp;
375 	mode_t mode = ap->a_mode;
376 	tmpfs_node_t *node;
377 
378 	KASSERT(VOP_ISLOCKED(vp));
379 
380 	node = VP_TO_TMPFS_NODE(vp);
381 	if (node->tn_links < 1) {
382 		/*
383 		 * The file is still active, but all its names have been
384 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
385 		 * any more, as it is about to be destroyed.
386 		 */
387 		return ENOENT;
388 	}
389 
390 	/* If the file is marked append-only, deny write requests. */
391 	if ((node->tn_flags & APPEND) != 0 &&
392 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
393 		return EPERM;
394 	}
395 	return 0;
396 }
397 
398 int
399 tmpfs_close(void *v)
400 {
401 	struct vop_close_args /* {
402 		struct vnode	*a_vp;
403 		int		a_fflag;
404 		kauth_cred_t	a_cred;
405 	} */ *ap = v;
406 	struct vnode *vp = ap->a_vp;
407 
408 	KASSERT(VOP_ISLOCKED(vp));
409 
410 	return 0;
411 }
412 
413 int
414 tmpfs_access(void *v)
415 {
416 	struct vop_access_args /* {
417 		struct vnode	*a_vp;
418 		int		a_mode;
419 		kauth_cred_t	a_cred;
420 	} */ *ap = v;
421 	struct vnode *vp = ap->a_vp;
422 	mode_t mode = ap->a_mode;
423 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
424 	const int writing = (mode & VWRITE) != 0;
425 
426 	KASSERT(VOP_ISLOCKED(vp));
427 
428 	/* Possible? */
429 	switch (vp->v_type) {
430 	case VDIR:
431 	case VLNK:
432 	case VREG:
433 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
434 			return EROFS;
435 		}
436 		break;
437 	case VBLK:
438 	case VCHR:
439 	case VSOCK:
440 	case VFIFO:
441 		break;
442 	default:
443 		return EINVAL;
444 	}
445 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
446 		return EPERM;
447 	}
448 
449 	return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
450 	    mode, ap->a_cred));
451 }
452 
453 int
454 tmpfs_getattr(void *v)
455 {
456 	struct vop_getattr_args /* {
457 		struct vnode	*a_vp;
458 		struct vattr	*a_vap;
459 		kauth_cred_t	a_cred;
460 	} */ *ap = v;
461 	struct vnode *vp = ap->a_vp;
462 	struct vattr *vap = ap->a_vap;
463 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
464 
465 	vattr_null(vap);
466 
467 	vap->va_type = vp->v_type;
468 	vap->va_mode = node->tn_mode;
469 	vap->va_nlink = node->tn_links;
470 	vap->va_uid = node->tn_uid;
471 	vap->va_gid = node->tn_gid;
472 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
473 	vap->va_fileid = node->tn_id;
474 	vap->va_size = node->tn_size;
475 	vap->va_blocksize = PAGE_SIZE;
476 	vap->va_atime = node->tn_atime;
477 	vap->va_mtime = node->tn_mtime;
478 	vap->va_ctime = node->tn_ctime;
479 	/* vap->va_birthtime = node->tn_birthtime; */
480 	vap->va_gen = TMPFS_NODE_GEN(node);
481 	vap->va_flags = node->tn_flags;
482 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
483 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
484 	vap->va_bytes = round_page(node->tn_size);
485 	vap->va_filerev = VNOVAL;
486 	vap->va_vaflags = 0;
487 	vap->va_spare = VNOVAL; /* XXX */
488 
489 	return 0;
490 }
491 
492 #define GOODTIME(tv)	((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL)
493 /* XXX Should this operation be atomic?  I think it should, but code in
494  * XXX other places (e.g., ufs) doesn't seem to be... */
495 int
496 tmpfs_setattr(void *v)
497 {
498 	struct vop_setattr_args /* {
499 		struct vnode	*a_vp;
500 		struct vattr	*a_vap;
501 		kauth_cred_t	a_cred;
502 	} */ *ap = v;
503 	struct vnode *vp = ap->a_vp;
504 	struct vattr *vap = ap->a_vap;
505 	struct ucred *cred = ap->a_cred;
506 	struct proc *p = curproc;
507 	int error = 0;
508 
509 	KASSERT(VOP_ISLOCKED(vp));
510 
511 	/* Abort if any unsettable attribute is given. */
512 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
513 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
514 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
515 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
516 	    vap->va_bytes != VNOVAL) {
517 		return EINVAL;
518 	}
519 	if (error == 0 && (vap->va_flags != VNOVAL))
520 		error = tmpfs_chflags(vp, vap->va_flags, cred, p);
521 
522 	if (error == 0 && (vap->va_size != VNOVAL))
523 		error = tmpfs_chsize(vp, vap->va_size, cred, p);
524 
525 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
526 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
527 
528 	if (error == 0 && (vap->va_mode != VNOVAL))
529 		error = tmpfs_chmod(vp, vap->va_mode, cred, p);
530 
531 	if (error == 0 && (GOODTIME(&vap->va_atime)
532 	    || GOODTIME(&vap->va_mtime))) {
533 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
534 		    vap->va_vaflags, cred, p);
535 		if (error == 0)
536 			return 0;
537 	}
538 	return error;
539 }
540 
541 int
542 tmpfs_read(void *v)
543 {
544 	struct vop_read_args /* {
545 		struct vnode *a_vp;
546 		struct uio *a_uio;
547 		int a_ioflag;
548 		struct ucred *a_cred;
549 	} */ *ap = v;
550 	struct vnode *vp = ap->a_vp;
551 	struct uio *uio = ap->a_uio;
552 	/* const int ioflag = ap->a_ioflag; */
553 	tmpfs_node_t *node;
554 	int error;
555 
556 	KASSERT(VOP_ISLOCKED(vp));
557 
558 	if (vp->v_type != VREG) {
559 		return EISDIR;
560 	}
561 	if (uio->uio_offset < 0) {
562 		return EINVAL;
563 	}
564 
565 	node = VP_TO_TMPFS_NODE(vp);
566 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
567 	error = 0;
568 
569 	while (error == 0 && uio->uio_resid > 0) {
570 		vsize_t len;
571 
572 		if (node->tn_size <= uio->uio_offset) {
573 			break;
574 		}
575 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
576 		if (len == 0) {
577 			break;
578 		}
579 		error = tmpfs_uiomove(node, uio, len);
580 	}
581 
582 	return error;
583 }
584 
585 int
586 tmpfs_write(void *v)
587 {
588 	struct vop_write_args /* {
589 		struct vnode	*a_vp;
590 		struct uio	*a_uio;
591 		int		a_ioflag;
592 		kauth_cred_t	a_cred;
593 	} */ *ap = v;
594 	struct vnode *vp = ap->a_vp;
595 	struct uio *uio = ap->a_uio;
596 	const int ioflag = ap->a_ioflag;
597 	tmpfs_node_t *node;
598 	off_t oldsize;
599 	ssize_t overrun;
600 	int extended;
601 	int error;
602 
603 	KASSERT(VOP_ISLOCKED(vp));
604 
605 	node = VP_TO_TMPFS_NODE(vp);
606 	oldsize = node->tn_size;
607 
608 	if (vp->v_type != VREG)
609 		return (EINVAL);
610 
611 	if (uio->uio_resid == 0)
612 		return (0);
613 
614 	if (ioflag & IO_APPEND) {
615 		uio->uio_offset = node->tn_size;
616 	}
617 
618 	if (uio->uio_offset < 0 ||
619 	    (u_int64_t)uio->uio_offset + uio->uio_resid > LLONG_MAX)
620 		return (EFBIG);
621 
622 	/* do the filesize rlimit check */
623 	if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
624 		return (error);
625 
626 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
627 	if (extended) {
628 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
629 		if (error)
630 			goto out;
631 	}
632 
633 	error = 0;
634 	while (error == 0 && uio->uio_resid > 0) {
635 		vsize_t len;
636 
637 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
638 		if (len == 0) {
639 			break;
640 		}
641 		error = tmpfs_uiomove(node, uio, len);
642 	}
643 	if (error) {
644 		(void)tmpfs_reg_resize(vp, oldsize);
645 	}
646 
647 	tmpfs_update(node, TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
648 	    (extended ? TMPFS_NODE_CHANGED : 0));
649 	if (extended)
650 		VN_KNOTE(vp, NOTE_WRITE | NOTE_EXTEND);
651 	else
652 		VN_KNOTE(vp, NOTE_WRITE);
653 out:
654 	if (error) {
655 		KASSERT(oldsize == node->tn_size);
656 	} else {
657 		KASSERT(uio->uio_resid == 0);
658 
659 		/* correct the result for writes clamped by vn_fsizechk() */
660 		uio->uio_resid += overrun;
661 
662 	}
663 	return error;
664 }
665 
666 int
667 tmpfs_fsync(void *v)
668 {
669 	struct vop_fsync_args /* {
670 		struct vnode *a_vp;
671 		struct ucred *a_cred;
672 		int a_flags;
673 		off_t a_offlo;
674 		off_t a_offhi;
675 		struct lwp *a_l;
676 	} */ *ap = v;
677 	struct vnode *vp = ap->a_vp;
678 
679 	/* Nothing to do.  Just update. */
680 	KASSERT(VOP_ISLOCKED(vp));
681 	return 0;
682 }
683 
684 /*
685  * tmpfs_remove: unlink a file.
686  *
687  * => Both directory (dvp) and file (vp) are locked.
688  * => We unlock and drop the reference on both.
689  */
690 int
691 tmpfs_remove(void *v)
692 {
693 	struct vop_remove_args /* {
694 		struct vnode *a_dvp;
695 		struct vnode *a_vp;
696 		struct componentname *a_cnp;
697 	} */ *ap = v;
698 	struct vnode *dvp = ap->a_dvp, *vp = ap->a_vp;
699 	struct componentname *cnp = ap->a_cnp;
700 	tmpfs_node_t *dnode, *node;
701 	tmpfs_dirent_t *de;
702 	int error;
703 
704 	KASSERT(VOP_ISLOCKED(dvp));
705 	KASSERT(VOP_ISLOCKED(vp));
706 	KASSERT(cnp->cn_flags & HASBUF);
707 
708 	if (vp->v_type == VDIR) {
709 		error = EPERM;
710 		goto out;
711 	}
712 
713 	dnode = VP_TO_TMPFS_NODE(dvp);
714 	node = VP_TO_TMPFS_NODE(vp);
715 
716 	/* Files marked as immutable or append-only cannot be deleted. */
717 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
718 		error = EPERM;
719 		goto out;
720 	}
721 
722 	/*
723 	 * Likewise, files residing on directories marked as append-only cannot
724 	 * be deleted.
725 	 */
726 	if (dnode->tn_flags & APPEND) {
727 		error = EPERM;
728 		goto out;
729 	}
730 
731 	/* Lookup the directory entry (check the cached hint first). */
732 	de = tmpfs_dir_cached(node);
733 	if (de == NULL) {
734 		de = tmpfs_dir_lookup(dnode, cnp);
735 	}
736 
737 	KASSERT(de && de->td_node == node);
738 
739 	/*
740 	 * Remove the entry from the directory (drops the link count) and
741 	 * destroy it.
742 	 * Note: the inode referred by it will not be destroyed
743 	 * until the vnode is reclaimed/recycled.
744 	 */
745 	tmpfs_dir_detach(dnode, de);
746 	tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
747 	if (node->tn_links > 0)  {
748 		/* We removed a hard link. */
749 		tmpfs_update(node, TMPFS_NODE_CHANGED);
750 	}
751 	error = 0;
752 out:
753 	pool_put(&namei_pool, cnp->cn_pnbuf);
754 	/* Drop the references and unlock the vnodes. */
755 	vput(vp);
756 	if (dvp == vp) {
757 		vrele(dvp);
758 	} else {
759 		vput(dvp);
760 	}
761 	return error;
762 }
763 
764 /*
765  * tmpfs_link: create a hard link.
766  */
767 int
768 tmpfs_link(void *v)
769 {
770 	struct vop_link_args /* {
771 		struct vnode *a_dvp;
772 		struct vnode *a_vp;
773 		struct componentname *a_cnp;
774 	} */ *ap = v;
775 	struct vnode *dvp = ap->a_dvp;
776 	struct vnode *vp = ap->a_vp;
777 	struct componentname *cnp = ap->a_cnp;
778 	tmpfs_node_t *dnode, *node;
779 	tmpfs_dirent_t *de;
780 	int error;
781 
782 	KASSERT(VOP_ISLOCKED(dvp));
783 
784 	if (vp->v_type == VDIR) {
785 		VOP_ABORTOP(dvp, cnp);
786 		vput(dvp);
787 		return EPERM;
788 	}
789 
790 	KASSERT(dvp != vp);
791 
792 	if (dvp->v_mount != vp->v_mount) {
793 		VOP_ABORTOP(dvp, cnp);
794 		vput(dvp);
795 		return EXDEV;
796 	}
797 
798 	dnode = VP_TO_TMPFS_DIR(dvp);
799 	node = VP_TO_TMPFS_NODE(vp);
800 
801 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
802 
803 	/* Check for maximum number of links limit. */
804 	if (node->tn_links == LINK_MAX) {
805 		error = EMLINK;
806 		goto out;
807 	}
808 	KASSERT(node->tn_links < LINK_MAX);
809 
810 	/* We cannot create links of files marked immutable or append-only. */
811 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
812 		error = EPERM;
813 		goto out;
814 	}
815 
816 	if (TMPFS_DIRSEQ_FULL(dnode)) {
817 		error = ENOSPC;
818 		goto out;
819 	}
820 
821 	/* Allocate a new directory entry to represent the inode. */
822 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
823 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
824 	if (error) {
825 		goto out;
826 	}
827 
828 	/*
829 	 * Insert the entry into the directory.
830 	 * It will increase the inode link count.
831 	 */
832 	tmpfs_dir_attach(dnode, de, node);
833 
834 	/* Update the timestamps and trigger the event. */
835 	if (node->tn_vnode) {
836 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
837 	}
838 	tmpfs_update(node, TMPFS_NODE_CHANGED);
839 	error = 0;
840 out:
841 	pool_put(&namei_pool, cnp->cn_pnbuf);
842 	VOP_UNLOCK(vp, 0, curproc);
843 	vput(dvp);
844 	return error;
845 }
846 
847 int
848 tmpfs_mkdir(void *v)
849 {
850 	struct vop_mkdir_args /* {
851 		struct vnode		*a_dvp;
852 		struct vnode		**a_vpp;
853 		struct componentname	*a_cnp;
854 		struct vattr		*a_vap;
855 	} */ *ap = v;
856 	struct vnode *dvp = ap->a_dvp;
857 	struct vnode **vpp = ap->a_vpp;
858 	struct componentname *cnp = ap->a_cnp;
859 	struct vattr *vap = ap->a_vap;
860 
861 	KASSERT(vap->va_type == VDIR);
862 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
863 }
864 
865 int
866 tmpfs_rmdir(void *v)
867 {
868 	struct vop_rmdir_args /* {
869 		struct vnode		*a_dvp;
870 		struct vnode		*a_vp;
871 		struct componentname	*a_cnp;
872 	} */ *ap = v;
873 	struct vnode *dvp = ap->a_dvp;
874 	struct vnode *vp = ap->a_vp;
875 	struct componentname *cnp = ap->a_cnp;
876 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
877 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
878 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
879 	tmpfs_dirent_t *de;
880 	int error = 0;
881 
882 	KASSERT(VOP_ISLOCKED(dvp));
883 	KASSERT(VOP_ISLOCKED(vp));
884 	KASSERT(cnp->cn_flags & HASBUF);
885 
886 	if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
887 	    cnp->cn_nameptr[1] == '.') {
888 		error = ENOTEMPTY;
889 		goto out;
890 	}
891 
892 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
893 
894 	/*
895 	 * Directories with more than two entries ('.' and '..') cannot be
896 	 * removed.
897 	 */
898 	if (node->tn_size > 0) {
899 		KASSERT(error == 0);
900 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
901 			error = ENOTEMPTY;
902 			break;
903 		}
904 		if (error)
905 			goto out;
906 	}
907 
908 	/* Lookup the directory entry (check the cached hint first). */
909 	de = tmpfs_dir_cached(node);
910 	if (de == NULL)
911 		de = tmpfs_dir_lookup(dnode, cnp);
912 
913 	KASSERT(de && de->td_node == node);
914 
915 	/* Check flags to see if we are allowed to remove the directory. */
916 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
917 		error = EPERM;
918 		goto out;
919 	}
920 
921 	/* Decrement the link count for the virtual '.' entry. */
922 	node->tn_links--;
923 	tmpfs_update(node, TMPFS_NODE_STATUSALL);
924 
925 	/* Detach the directory entry from the directory. */
926 	tmpfs_dir_detach(dnode, de);
927 
928 	/* Purge the cache for parent. */
929 	cache_purge(dvp);
930 
931 	/*
932 	 * Destroy the directory entry.
933 	 * Note: the inode referred by it will not be destroyed
934 	 * until the vnode is reclaimed.
935 	 */
936 	tmpfs_free_dirent(tmp, de);
937 	KASSERT(TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir) == NULL);
938 
939 	KASSERT(node->tn_links == 0);
940 out:
941 	pool_put(&namei_pool, cnp->cn_pnbuf);
942 	/* Release the nodes. */
943 	vput(dvp);
944 	vput(vp);
945 	return error;
946 }
947 
948 int
949 tmpfs_symlink(void *v)
950 {
951 	struct vop_symlink_args /* {
952 		struct vnode		*a_dvp;
953 		struct vnode		**a_vpp;
954 		struct componentname	*a_cnp;
955 		struct vattr		*a_vap;
956 		char			*a_target;
957 	} */ *ap = v;
958 	struct vnode *dvp = ap->a_dvp;
959 	struct vnode **vpp = ap->a_vpp;
960 	struct componentname *cnp = ap->a_cnp;
961 	struct vattr *vap = ap->a_vap;
962 	char *target = ap->a_target;
963 	int error;
964 
965 	KASSERT(vap->va_type == 0);
966 	vap->va_type = VLNK;
967 
968 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
969 	if (error == 0)
970 		vput(*vpp);
971 
972 	return error;
973 }
974 
975 int
976 tmpfs_readdir(void *v)
977 {
978 	struct vop_readdir_args /* {
979 		struct vnode	*a_vp;
980 		struct uio	*a_uio;
981 		kauth_cred_t	a_cred;
982 		int		*a_eofflag;
983 	} */ *ap = v;
984 	struct vnode *vp = ap->a_vp;
985 	struct uio *uio = ap->a_uio;
986 	int *eofflag = ap->a_eofflag;
987 	tmpfs_node_t *node;
988 	int error;
989 
990 	KASSERT(VOP_ISLOCKED(vp));
991 
992 	/* This operation only makes sense on directory nodes. */
993 	if (vp->v_type != VDIR) {
994 		return ENOTDIR;
995 	}
996 	node = VP_TO_TMPFS_DIR(vp);
997 	/*
998 	 * Retrieve the directory entries, unless it is being destroyed.
999 	 */
1000 	if (node->tn_links) {
1001 		error = tmpfs_dir_getdents(node, uio);
1002 	} else {
1003 		error = 0;
1004 	}
1005 
1006 	if (eofflag != NULL) {
1007 		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
1008 	}
1009 	return error;
1010 }
1011 
1012 int
1013 tmpfs_readlink(void *v)
1014 {
1015 	struct vop_readlink_args /* {
1016 		struct vnode	*a_vp;
1017 		struct uio	*a_uio;
1018 		kauth_cred_t	a_cred;
1019 	} */ *ap = v;
1020 	struct vnode *vp = ap->a_vp;
1021 	struct uio *uio = ap->a_uio;
1022 	tmpfs_node_t *node;
1023 	int error;
1024 
1025 	KASSERT(VOP_ISLOCKED(vp));
1026 	KASSERT(uio->uio_offset == 0);
1027 	KASSERT(vp->v_type == VLNK);
1028 
1029 	node = VP_TO_TMPFS_NODE(vp);
1030 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
1031 	    MIN(node->tn_size, uio->uio_resid), uio);
1032 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
1033 
1034 	return error;
1035 }
1036 
1037 int
1038 tmpfs_inactive(void *v)
1039 {
1040 	struct vop_inactive_args /* {
1041 		struct vnode *a_vp;
1042 		int *a_recycle;
1043 	} */ *ap = v;
1044 	struct vnode *vp = ap->a_vp;
1045 	tmpfs_node_t *node;
1046 
1047 	KASSERT(VOP_ISLOCKED(vp));
1048 
1049 	node = VP_TO_TMPFS_NODE(vp);
1050 
1051 	if (vp->v_type == VREG && tmpfs_uio_cached(node))
1052 		tmpfs_uio_uncache(node);
1053 
1054 	VOP_UNLOCK(vp, 0, curproc);
1055 
1056 	/*
1057 	 * If we are done with the node, reclaim it so that it can be reused
1058 	 * immediately.
1059 	 */
1060 	if (node->tn_links == 0)
1061 		vrecycle(vp, curproc);
1062 
1063 	return 0;
1064 }
1065 
1066 int
1067 tmpfs_reclaim(void *v)
1068 {
1069 	struct vop_reclaim_args /* {
1070 		struct vnode *a_vp;
1071 	} */ *ap = v;
1072 	struct vnode *vp = ap->a_vp;
1073 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1074 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1075 	int racing;
1076 
1077 	/* Disassociate inode from vnode. */
1078 	rw_enter_write(&node->tn_nlock);
1079 	node->tn_vnode = NULL;
1080 	vp->v_data = NULL;
1081 	/* Check if tmpfs_vnode_get() is racing with us. */
1082 	racing = TMPFS_NODE_RECLAIMING(node);
1083 	rw_exit_write(&node->tn_nlock);
1084 
1085 	/*
1086 	 * If inode is not referenced, i.e. no links, then destroy it.
1087 	 * Note: if racing - inode is about to get a new vnode, leave it.
1088 	 */
1089 	if (node->tn_links == 0 && !racing) {
1090 		tmpfs_free_node(tmp, node);
1091 	}
1092 	return 0;
1093 }
1094 
1095 int
1096 tmpfs_pathconf(void *v)
1097 {
1098 	struct vop_pathconf_args /* {
1099 		struct vnode	*a_vp;
1100 		int		a_name;
1101 		register_t	*a_retval;
1102 	} */ *ap = v;
1103 	const int name = ap->a_name;
1104 	register_t *retval = ap->a_retval;
1105 	int error = 0;
1106 
1107 	switch (name) {
1108 	case _PC_LINK_MAX:
1109 		*retval = LINK_MAX;
1110 		break;
1111 	case _PC_NAME_MAX:
1112 		*retval = TMPFS_MAXNAMLEN;
1113 		break;
1114 	case _PC_CHOWN_RESTRICTED:
1115 		*retval = 1;
1116 		break;
1117 	case _PC_NO_TRUNC:
1118 		*retval = 1;
1119 		break;
1120 	case _PC_FILESIZEBITS:
1121 		*retval = 64;
1122 		break;
1123 	case _PC_TIMESTAMP_RESOLUTION:
1124 		*retval = 1;
1125 		break;
1126 	default:
1127 		error = EINVAL;
1128 	}
1129 	return error;
1130 }
1131 
1132 int
1133 tmpfs_advlock(void *v)
1134 {
1135 	struct vop_advlock_args /* {
1136 		struct vnode	*a_vp;
1137 		void *		a_id;
1138 		int		a_op;
1139 		struct flock	*a_fl;
1140 		int		a_flags;
1141 	} */ *ap = v;
1142 	struct vnode *vp = ap->a_vp;
1143 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1144 
1145 	return lf_advlock(&node->tn_lockf, node->tn_size, ap->a_id, ap->a_op,
1146 	    ap->a_fl, ap->a_flags);
1147 }
1148 
1149 int
1150 tmpfs_print(void *v)
1151 {
1152 	struct vop_print_args /* {
1153 		struct vnode	*a_vp;
1154 	} */ *ap = v;
1155 	struct vnode *vp = ap->a_vp;
1156 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1157 
1158 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1159 	    "\tmode 0%o, owner %d, group %d, size %lld",
1160 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1161 	    node->tn_gid, node->tn_size);
1162 #ifdef FIFO
1163 	if (vp->v_type == VFIFO)
1164 		fifo_printinfo(vp);
1165 #endif
1166 	printf("\n");
1167 	return 0;
1168 }
1169 
1170 /* a null op */
1171 int
1172 tmpfs_bwrite(void *v)
1173 {
1174 	return 0;
1175 }
1176 
1177 int
1178 tmpfs_poll(void *v)
1179 {
1180 	struct vop_poll_args *ap = v;
1181 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1182 }
1183 
1184 int
1185 tmpfs_strategy(void *v)
1186 {
1187 	return EOPNOTSUPP;
1188 }
1189 
1190 int
1191 tmpfs_ioctl(void *v)
1192 {
1193 	return ENOTTY;
1194 }
1195 
1196 int
1197 tmpfs_lock(void *v)
1198 {
1199 	struct vop_lock_args *ap = v;
1200 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1201 
1202 	return lockmgr(&tnp->tn_vlock, ap->a_flags, NULL);
1203 }
1204 
1205 int
1206 tmpfs_unlock(void *v)
1207 {
1208 	struct vop_unlock_args *ap = v;
1209 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1210 
1211 	return lockmgr(&tnp->tn_vlock, ap->a_flags | LK_RELEASE, NULL);
1212 }
1213 
1214 int
1215 tmpfs_islocked(void *v)
1216 {
1217 	struct vop_islocked_args *ap = v;
1218 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1219 
1220 	return lockstatus(&tnp->tn_vlock);
1221 }
1222 
1223 /*
1224  * tmpfs_rename: rename routine, the hairiest system call, with the
1225  * insane API.
1226  *
1227  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
1228  * and tvp (to-leaf), if exists (NULL if not).
1229  *
1230  * => Caller holds a reference on fdvp and fvp, they are unlocked.
1231  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
1232  *
1233  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
1234  *    to release the references and unlock them (or destroy).
1235  */
1236 
1237 /*
1238  * First, some forward declarations of subroutines.
1239  */
1240 
1241 int tmpfs_sane_rename(struct vnode *, struct componentname *,
1242     struct vnode *, struct componentname *, struct ucred *, int);
1243 int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
1244     struct ucred *,
1245     struct vnode *, struct tmpfs_node *, struct componentname *,
1246     struct tmpfs_dirent **, struct vnode **,
1247     struct vnode *, struct tmpfs_node *, struct componentname *,
1248     struct tmpfs_dirent **, struct vnode **);
1249 int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
1250     struct ucred *,
1251     struct vnode *, struct tmpfs_node *,
1252     struct componentname *, struct tmpfs_dirent **, struct vnode **,
1253     struct componentname *, struct tmpfs_dirent **, struct vnode **);
1254 int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
1255     struct ucred *,
1256     struct vnode *, struct tmpfs_node *, struct componentname *,
1257     struct tmpfs_dirent **, struct vnode **,
1258     struct vnode *, struct tmpfs_node *, struct componentname *,
1259     struct tmpfs_dirent **, struct vnode **);
1260 void tmpfs_rename_exit(struct tmpfs_mount *,
1261     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
1262 int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
1263 int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
1264     struct tmpfs_node **);
1265 int tmpfs_rename_lock(struct mount *, struct ucred *, int,
1266     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1267     struct tmpfs_dirent **, struct vnode **,
1268     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1269     struct tmpfs_dirent **, struct vnode **);
1270 void tmpfs_rename_attachdetach(struct tmpfs_mount *,
1271     struct vnode *, struct tmpfs_dirent *, struct vnode *,
1272     struct vnode *, struct tmpfs_dirent *, struct vnode *);
1273 int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
1274     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, struct ucred *);
1275 int tmpfs_rename_check_possible(struct tmpfs_node *,
1276     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
1277 int tmpfs_rename_check_permitted(struct ucred *,
1278     struct tmpfs_node *, struct tmpfs_node *,
1279     struct tmpfs_node *, struct tmpfs_node *);
1280 int tmpfs_remove_check_possible(struct tmpfs_node *,
1281     struct tmpfs_node *);
1282 int tmpfs_remove_check_permitted(struct ucred *,
1283     struct tmpfs_node *, struct tmpfs_node *);
1284 int tmpfs_check_sticky(struct ucred *,
1285     struct tmpfs_node *, struct tmpfs_node *);
1286 void tmpfs_rename_cache_purge(struct vnode *, struct vnode *, struct vnode *,
1287     struct vnode *);
1288 void tmpfs_rename_abort(void *);
1289 
1290 int
1291 tmpfs_rename(void *v)
1292 {
1293 	struct vop_rename_args  /* {
1294 		struct vnode		*a_fdvp;
1295 		struct vnode		*a_fvp;
1296 		struct componentname	*a_fcnp;
1297 		struct vnode		*a_tdvp;
1298 		struct vnode		*a_tvp;
1299 		struct componentname	*a_tcnp;
1300 	} */ *ap = v;
1301 	struct vnode *fdvp = ap->a_fdvp;
1302 	struct vnode *fvp = ap->a_fvp;
1303 	struct componentname *fcnp = ap->a_fcnp;
1304 	struct vnode *tdvp = ap->a_tdvp;
1305 	struct vnode *tvp = ap->a_tvp;
1306 	struct componentname *tcnp = ap->a_tcnp;
1307 	struct ucred *cred;
1308 	int error;
1309 
1310 	KASSERT(fdvp != NULL);
1311 	KASSERT(fvp != NULL);
1312 	KASSERT(fcnp != NULL);
1313 	KASSERT(fcnp->cn_nameptr != NULL);
1314 	KASSERT(tdvp != NULL);
1315 	KASSERT(tcnp != NULL);
1316 	KASSERT(fcnp->cn_nameptr != NULL);
1317 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1318 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
1319 	KASSERT(fdvp->v_type == VDIR);
1320 	KASSERT(tdvp->v_type == VDIR);
1321 	KASSERT(fcnp->cn_flags & HASBUF);
1322 	KASSERT(tcnp->cn_flags & HASBUF);
1323 
1324 	cred = fcnp->cn_cred;
1325 	KASSERT(tcnp->cn_cred == cred);
1326 
1327 	/*
1328 	 * Check for cross-device rename.
1329 	 */
1330 	if (fvp->v_mount != tdvp->v_mount ||
1331 	    (tvp != NULL && (fvp->v_mount != tvp->v_mount))) {
1332 	    	tmpfs_rename_abort(v);
1333 		return EXDEV;
1334 	}
1335 
1336 	/*
1337 	 * Can't check the locks on these until we know they're on
1338 	 * the same FS, as not all FS do locking the same way.
1339 	 */
1340 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1341 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1342 
1343 	/*
1344 	 * Reject renaming '.' and '..'.
1345 	 */
1346 	if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1347 	    (fcnp->cn_namelen == 2 && fcnp->cn_nameptr[0] == '.' &&
1348 	     fcnp->cn_nameptr[1] == '.')) {
1349 	     	tmpfs_rename_abort(v);
1350 	     	return EINVAL;
1351 	}
1352 
1353 	/*
1354 	 * Sanitize our world from the VFS insanity.  Unlock the target
1355 	 * directory and node, which are locked.  Release the children,
1356 	 * which are referenced.  Check for rename("x", "y/."), which
1357 	 * it is our responsibility to reject, not the caller's.  (But
1358 	 * the caller does reject rename("x/.", "y").  Go figure.)
1359 	 */
1360 
1361 	VOP_UNLOCK(tdvp, 0, curproc);
1362 	if ((tvp != NULL) && (tvp != tdvp))
1363 		VOP_UNLOCK(tvp, 0, curproc);
1364 
1365 	vrele(fvp);
1366 	if (tvp != NULL)
1367 		vrele(tvp);
1368 
1369 	if (tvp == tdvp) {
1370 		error = EINVAL;
1371 		goto out;
1372 	}
1373 
1374 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, 0);
1375 
1376 out:	/*
1377 	 * All done, whether with success or failure.  Release the
1378 	 * directory nodes now, as the caller expects from the VFS
1379 	 * protocol.
1380 	 */
1381 	vrele(fdvp);
1382 	vrele(tdvp);
1383 
1384 	return error;
1385 }
1386 
1387 /*
1388  * tmpfs_sane_rename: rename routine, the hairiest system call, with
1389  * the sane API.
1390  *
1391  * Arguments:
1392  *
1393  * . fdvp (from directory vnode),
1394  * . fcnp (from component name),
1395  * . tdvp (to directory vnode), and
1396  * . tcnp (to component name).
1397  *
1398  * fdvp and tdvp must be referenced and unlocked.
1399  */
1400 int
1401 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
1402     struct vnode *tdvp, struct componentname *tcnp, struct ucred *cred,
1403     int posixly_correct)
1404 {
1405 	struct mount *mount;
1406 	struct tmpfs_mount *tmpfs;
1407 	struct tmpfs_node *fdnode, *tdnode;
1408 	struct tmpfs_dirent *fde, *tde;
1409 	struct vnode *fvp, *tvp;
1410 	char *newname;
1411 	int error;
1412 
1413 	KASSERT(fdvp != NULL);
1414 	KASSERT(fcnp != NULL);
1415 	KASSERT(tdvp != NULL);
1416 	KASSERT(tcnp != NULL);
1417 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1418 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1419 	KASSERT(fdvp->v_type == VDIR);
1420 	KASSERT(tdvp->v_type == VDIR);
1421 	KASSERT(fdvp->v_mount == tdvp->v_mount);
1422 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
1423 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
1424 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
1425 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
1426 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
1427 	    (fcnp->cn_nameptr[1] != '.'));
1428 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
1429 	    (tcnp->cn_nameptr[1] != '.'));
1430 
1431 	/*
1432 	 * Pull out the tmpfs data structures.
1433 	 */
1434 	fdnode = VP_TO_TMPFS_NODE(fdvp);
1435 	tdnode = VP_TO_TMPFS_NODE(tdvp);
1436 	KASSERT(fdnode != NULL);
1437 	KASSERT(tdnode != NULL);
1438 	KASSERT(fdnode->tn_vnode == fdvp);
1439 	KASSERT(tdnode->tn_vnode == tdvp);
1440 	KASSERT(fdnode->tn_type == VDIR);
1441 	KASSERT(tdnode->tn_type == VDIR);
1442 
1443 	mount = fdvp->v_mount;
1444 	KASSERT(mount != NULL);
1445 	KASSERT(mount == tdvp->v_mount);
1446 	/* XXX How can we be sure this stays true?  (Not that you're
1447 	 * likely to mount a tmpfs read-only...)  */
1448 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
1449 	tmpfs = VFS_TO_TMPFS(mount);
1450 	KASSERT(tmpfs != NULL);
1451 
1452 	/*
1453 	 * Decide whether we need a new name, and allocate memory for
1454 	 * it if so.  Do this before locking anything or taking
1455 	 * destructive actions so that we can back out safely and sleep
1456 	 * safely.  XXX Is sleeping an issue here?  Can this just be
1457 	 * moved into tmpfs_rename_attachdetach?
1458 	 */
1459 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
1460 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
1461 		if (newname == NULL) {
1462 			error = ENOSPC;
1463 			goto out_unlocked;
1464 		}
1465 	} else {
1466 		newname = NULL;
1467 	}
1468 
1469 	/*
1470 	 * Lock and look up everything.  GCC is not very clever.
1471 	 */
1472 	fde = tde = NULL;
1473 	fvp = tvp = NULL;
1474 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1475 	    fdvp, fdnode, fcnp, &fde, &fvp,
1476 	    tdvp, tdnode, tcnp, &tde, &tvp);
1477 	if (error)
1478 		goto out_unlocked;
1479 
1480 	/*
1481 	 * Check that everything is locked and looks right.
1482 	 */
1483 	KASSERT(fde != NULL);
1484 	KASSERT(fvp != NULL);
1485 	KASSERT(fde->td_node != NULL);
1486 	KASSERT(fde->td_node->tn_vnode == fvp);
1487 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1488 	KASSERT((tde == NULL) == (tvp == NULL));
1489 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1490 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1491 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1492 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1493 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1494 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1495 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1496 
1497 	/*
1498 	 * If the source and destination are the same object, we need
1499 	 * only at most delete the source entry.
1500 	 */
1501 	if (fvp == tvp) {
1502 		KASSERT(tvp != NULL);
1503 		if (fde->td_node->tn_type == VDIR) {
1504 			/* XXX How can this possibly happen?  */
1505 			error = EINVAL;
1506 			goto out_locked;
1507 		}
1508 		if (!posixly_correct && (fde != tde)) {
1509 			/* XXX Doesn't work because of locking.
1510 			 * error = VOP_REMOVE(fdvp, fvp);
1511 			 */
1512 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1513 			    cred);
1514 			if (error)
1515 				goto out_locked;
1516 		}
1517 		goto success;
1518 	}
1519 	KASSERT(fde != tde);
1520 	KASSERT(fvp != tvp);
1521 
1522 	/*
1523 	 * If the target exists, refuse to rename a directory over a
1524 	 * non-directory or vice versa, or to clobber a non-empty
1525 	 * directory.
1526 	 */
1527 	if (tvp != NULL) {
1528 		KASSERT(tde != NULL);
1529 		KASSERT(tde->td_node != NULL);
1530 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1531 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1532 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1533 			error = ENOTDIR;
1534 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1535 			error = EISDIR;
1536 		else
1537 			error = 0;
1538 		if (error)
1539 			goto out_locked;
1540 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1541 	}
1542 
1543 	/*
1544 	 * Authorize the rename.
1545 	 */
1546 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1547 	    tdnode, (tde? tde->td_node : NULL));
1548 	if (error)
1549 		goto out_locked;
1550 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1551 	    tdnode, (tde? tde->td_node : NULL));
1552 	if (error)
1553 		goto out_locked;
1554 
1555 	/*
1556 	 * Everything is hunky-dory.  Shuffle the directory entries.
1557 	 */
1558 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1559 
1560 	/*
1561 	 * Update the directory entry's name necessary, and flag
1562 	 * metadata updates.  A memory allocation failure here is not
1563 	 * OK because we've already committed some changes that we
1564 	 * can't back out at this point, and we have things locked so
1565 	 * we can't sleep, hence the early allocation above.
1566 	 */
1567 	if (newname != NULL) {
1568 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1569 
1570 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1571 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1572 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1573 		/* Commit newname and don't free it on the way out.  */
1574 		fde->td_name = newname;
1575 		newname = NULL;
1576 
1577 		tmpfs_update(fde->td_node, TMPFS_NODE_CHANGED);
1578 		tmpfs_update(tdnode, TMPFS_NODE_MODIFIED);
1579 	}
1580 
1581 success:
1582 	VN_KNOTE(fvp, NOTE_RENAME);
1583 	tmpfs_rename_cache_purge(fdvp, fvp, tdvp, tvp);
1584 	error = 0;
1585 
1586 out_locked:
1587 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1588 
1589 out_unlocked:
1590 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1591 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1592 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1593 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1594 
1595 	if (newname != NULL)
1596 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1597 
1598 	return error;
1599 }
1600 
1601 /*
1602  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1603  * and the associated vnode in fvp_ret; fail if not found.  Look up
1604  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1605  * associated vnode in tvp_ret; store null instead if not found.  Fail
1606  * if anything has been mounted on any of the nodes involved.
1607  *
1608  * fdvp and tdvp must be referenced.
1609  *
1610  * On entry, nothing is locked.
1611  *
1612  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1613  * nonnull, are referenced.  The only pairs of vnodes that may be
1614  * identical are {fdvp, tdvp} and {fvp, tvp}.
1615  *
1616  * On failure, everything remains as was.
1617  *
1618  * Locking everything including the source and target nodes is
1619  * necessary to make sure that, e.g., link count updates are OK.  The
1620  * locking order is, in general, ancestor-first, matching the order you
1621  * need to use to look up a descendant anyway.
1622  */
1623 int
1624 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1625     struct ucred *cred,
1626     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1627     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1628     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1629     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1630 {
1631 	int error;
1632 
1633 	KASSERT(mount != NULL);
1634 	KASSERT(tmpfs != NULL);
1635 	KASSERT(fdvp != NULL);
1636 	KASSERT(fdnode != NULL);
1637 	KASSERT(fcnp != NULL);
1638 	KASSERT(fde_ret != NULL);
1639 	KASSERT(fvp_ret != NULL);
1640 	KASSERT(tdvp != NULL);
1641 	KASSERT(tdnode != NULL);
1642 	KASSERT(tcnp != NULL);
1643 	KASSERT(tde_ret != NULL);
1644 	KASSERT(tvp_ret != NULL);
1645 	KASSERT(fdnode->tn_vnode == fdvp);
1646 	KASSERT(tdnode->tn_vnode == tdvp);
1647 	KASSERT(fdnode->tn_type == VDIR);
1648 	KASSERT(tdnode->tn_type == VDIR);
1649 
1650 	if (fdvp == tdvp) {
1651 		KASSERT(fdnode == tdnode);
1652 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1653 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1654 	} else {
1655 		KASSERT(fdnode != tdnode);
1656 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1657 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1658 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1659 	}
1660 
1661 	if (error)
1662 		return error;
1663 
1664 	KASSERT(*fde_ret != NULL);
1665 	KASSERT(*fvp_ret != NULL);
1666 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1667 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1668 	KASSERT((*tde_ret == NULL) ||
1669 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1670 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1671 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1672 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1673 	KASSERT((*tvp_ret == NULL) ||
1674 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1675 	KASSERT(*fvp_ret != fdvp);
1676 	KASSERT(*fvp_ret != tdvp);
1677 	KASSERT(*tvp_ret != fdvp);
1678 	KASSERT(*tvp_ret != tdvp);
1679 	return 0;
1680 }
1681 
1682 /*
1683  * Lock and look up with a common source/target directory.
1684  */
1685 int
1686 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1687     struct ucred *cred,
1688     struct vnode *dvp, struct tmpfs_node *dnode,
1689     struct componentname *fcnp,
1690     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1691     struct componentname *tcnp,
1692     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1693 {
1694 	struct tmpfs_dirent *fde, *tde;
1695 	struct vnode *fvp, *tvp;
1696 	int error;
1697 
1698 	error = tmpfs_rename_lock_directory(dvp, dnode);
1699 	if (error)
1700 		goto fail0;
1701 
1702 	/* Did we lose a race with mount?  */
1703 	if (dvp->v_mountedhere != NULL) {
1704 		error = EBUSY;
1705 		goto fail1;
1706 	}
1707 
1708 	/* Make sure the caller may read the directory.  */
1709 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
1710 	if (error)
1711 		goto fail1;
1712 
1713 	/*
1714 	 * The order in which we lock the source and target nodes is
1715 	 * irrelevant because there can only be one rename on this
1716 	 * directory in flight at a time, and we have it locked.
1717 	 */
1718 
1719 	fde = tmpfs_dir_lookup(dnode, fcnp);
1720 	if (fde == NULL) {
1721 		error = ENOENT;
1722 		goto fail1;
1723 	}
1724 
1725 	KASSERT(fde->td_node != NULL);
1726 	/* We ruled out `.' earlier.  */
1727 	KASSERT(fde->td_node != dnode);
1728 	/* We ruled out `..' earlier.  */
1729 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1730 	rw_enter_write(&fde->td_node->tn_nlock);
1731 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1732 	if (error)
1733 		goto fail1;
1734 	KASSERT(fvp != NULL);
1735 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1736 	KASSERT(fvp != dvp);
1737 	KASSERT(fvp->v_mount == mount);
1738 
1739 	/* Refuse to rename a mount point.  */
1740 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1741 		error = EBUSY;
1742 		goto fail2;
1743 	}
1744 
1745 	tde = tmpfs_dir_lookup(dnode, tcnp);
1746 	if (tde == NULL) {
1747 		tvp = NULL;
1748 	} else {
1749 		KASSERT(tde->td_node != NULL);
1750 		/* We ruled out `.' earlier.  */
1751 		KASSERT(tde->td_node != dnode);
1752 		/* We ruled out `..' earlier.  */
1753 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1754 		if (tde->td_node != fde->td_node) {
1755 			rw_enter_write(&tde->td_node->tn_nlock);
1756 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1757 			if (error)
1758 				goto fail2;
1759 			KASSERT(tvp->v_mount == mount);
1760 			/* Refuse to rename over a mount point.  */
1761 			if ((tvp->v_type == VDIR) &&
1762 			    (tvp->v_mountedhere != NULL)) {
1763 				error = EBUSY;
1764 				goto fail3;
1765 			}
1766 		} else {
1767 			tvp = fvp;
1768 			vref(tvp);
1769 		}
1770 		KASSERT(tvp != NULL);
1771 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1772 	}
1773 	KASSERT(tvp != dvp);
1774 
1775 	*fde_ret = fde;
1776 	*fvp_ret = fvp;
1777 	*tde_ret = tde;
1778 	*tvp_ret = tvp;
1779 	return 0;
1780 
1781 fail3:	if (tvp != NULL) {
1782 		if (tvp != fvp)
1783 			vput(tvp);
1784 		else
1785 			vrele(tvp);
1786 	}
1787 
1788 fail2:	vput(fvp);
1789 fail1:	VOP_UNLOCK(dvp, 0, curproc);
1790 fail0:	return error;
1791 }
1792 
1793 /*
1794  * Lock and look up with separate source and target directories.
1795  */
1796 int
1797 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1798     struct ucred *cred,
1799     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1800     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1801     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1802     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1803 {
1804 	struct tmpfs_node *intermediate_node;
1805 	struct tmpfs_dirent *fde, *tde;
1806 	struct vnode *fvp, *tvp;
1807 	int error;
1808 
1809 	KASSERT(fdvp != tdvp);
1810 	KASSERT(fdnode != tdnode);
1811 
1812 #if 0				/* XXX */
1813 	mutex_enter(&tmpfs->tm_rename_lock);
1814 #endif
1815 
1816 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1817 	if (error)
1818 		goto fail;
1819 
1820 	/*
1821 	 * intermediate_node == NULL means fdnode is not an ancestor of
1822 	 * tdnode.
1823 	 */
1824 	if (intermediate_node == NULL)
1825 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1826 		    tdvp, tdnode, tcnp, 1, &tde, &tvp,
1827 		    fdvp, fdnode, fcnp, 0, &fde, &fvp);
1828 	else
1829 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1830 		    fdvp, fdnode, fcnp, 0, &fde, &fvp,
1831 		    tdvp, tdnode, tcnp, 1, &tde, &tvp);
1832 	if (error)
1833 		goto fail;
1834 
1835 	KASSERT(fde != NULL);
1836 	KASSERT(fde->td_node != NULL);
1837 
1838 	/*
1839 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1840 	 */
1841 	if (fde->td_node == intermediate_node) {
1842 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1843 		return EINVAL;
1844 	}
1845 
1846 	*fde_ret = fde;
1847 	*fvp_ret = fvp;
1848 	*tde_ret = tde;
1849 	*tvp_ret = tvp;
1850 	return 0;
1851 
1852 fail:
1853 #if 0				/* XXX */
1854 	mutex_exit(&tmpfs->tm_rename_lock);
1855 #endif
1856 	return error;
1857 }
1858 
1859 /*
1860  * Unlock everything we locked for rename.
1861  *
1862  * fdvp and tdvp must be referenced.
1863  *
1864  * On entry, everything is locked, and fvp and tvp referenced.
1865  *
1866  * On exit, everything is unlocked, and fvp and tvp are released.
1867  */
1868 void
1869 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1870     struct vnode *fdvp, struct vnode *fvp,
1871     struct vnode *tdvp, struct vnode *tvp)
1872 {
1873 
1874 	KASSERT(tmpfs != NULL);
1875 	KASSERT(fdvp != NULL);
1876 	KASSERT(fvp != NULL);
1877 	KASSERT(fdvp != fvp);
1878 	KASSERT(fdvp != tvp);
1879 	KASSERT(tdvp != tvp);
1880 	KASSERT(tdvp != fvp);
1881 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1882 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1883 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1884 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1885 
1886 	if (tvp != NULL) {
1887 		if (tvp != fvp)
1888 			vput(tvp);
1889 		else
1890 			vrele(tvp);
1891 	}
1892 	VOP_UNLOCK(tdvp, 0, curproc);
1893 	vput(fvp);
1894 	if (fdvp != tdvp)
1895 		VOP_UNLOCK(fdvp, 0, curproc);
1896 
1897 #if 0				/* XXX */
1898 	if (fdvp != tdvp)
1899 		mutex_exit(&tmpfs->tm_rename_lock);
1900 #endif
1901 }
1902 
1903 /*
1904  * Lock a directory, but fail if it has been rmdir'd.
1905  *
1906  * vp must be referenced.
1907  */
1908 int
1909 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1910 {
1911 
1912 	KASSERT(vp != NULL);
1913 	KASSERT(node != NULL);
1914 	KASSERT(node->tn_vnode == vp);
1915 	KASSERT(node->tn_type == VDIR);
1916 
1917 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
1918 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1919 		VOP_UNLOCK(vp, 0, curproc);
1920 		return ENOENT;
1921 	}
1922 
1923 	return 0;
1924 }
1925 
1926 /*
1927  * Analyze the genealogy of the source and target nodes.
1928  *
1929  * On success, stores in *intermediate_node_ret either the child of
1930  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1931  * descendant of fdnode at all.
1932  *
1933  * fdnode and tdnode must be unlocked and referenced.  The file
1934  * system's rename lock must also be held, to exclude concurrent
1935  * changes to the file system's genealogy other than rmdir.
1936  *
1937  * XXX This causes an extra lock/unlock of tdnode in the case when
1938  * we're just about to lock it again before locking anything else.
1939  * However, changing that requires reorganizing the code to make it
1940  * even more horrifically obscure.
1941  */
1942 int
1943 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1944     struct tmpfs_node **intermediate_node_ret)
1945 {
1946 	struct tmpfs_node *node = tdnode, *parent;
1947 	int error;
1948 
1949 	KASSERT(fdnode != NULL);
1950 	KASSERT(tdnode != NULL);
1951 	KASSERT(fdnode != tdnode);
1952 	KASSERT(intermediate_node_ret != NULL);
1953 
1954 	KASSERT(fdnode->tn_vnode != NULL);
1955 	KASSERT(tdnode->tn_vnode != NULL);
1956 	KASSERT(fdnode->tn_type == VDIR);
1957 	KASSERT(tdnode->tn_type == VDIR);
1958 
1959 	/*
1960 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1961 	 * from deleting it -- or any ancestor -- at an inopportune
1962 	 * moment.
1963 	 */
1964 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1965 	if (error)
1966 		return error;
1967 
1968 	for (;;) {
1969 		parent = node->tn_spec.tn_dir.tn_parent;
1970 		KASSERT(parent != NULL);
1971 		KASSERT(parent->tn_type == VDIR);
1972 
1973 		/* Did we hit the root without finding fdnode?  */
1974 		if (parent == node) {
1975 			*intermediate_node_ret = NULL;
1976 			break;
1977 		}
1978 
1979 		/* Did we find that fdnode is an ancestor?  */
1980 		if (parent == fdnode) {
1981 			*intermediate_node_ret = node;
1982 			break;
1983 		}
1984 
1985 		/* Neither -- keep ascending the family tree.  */
1986 		node = parent;
1987 	}
1988 
1989 	VOP_UNLOCK(tdnode->tn_vnode, 0, curproc);
1990 	return 0;
1991 }
1992 
1993 /*
1994  * Lock directories a and b, which must be distinct, and look up and
1995  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1996  * an ancestor of directory a, although directory a may be an ancestor
1997  * of directory b.  Fail with overlap_error if node a is directory b.
1998  * Neither componentname may be `.' or `..'.
1999  *
2000  * a_dvp and b_dvp must be referenced.
2001  *
2002  * On entry, a_dvp and b_dvp are unlocked.
2003  *
2004  * On success,
2005  * . a_dvp and b_dvp are locked,
2006  * . *a_dirent_ret is filled with a directory entry whose node is
2007  *     locked and referenced,
2008  * . *b_vp_ret is filled with the corresponding vnode,
2009  * . *b_dirent_ret is filled either with null or with a directory entry
2010  *     whose node is locked and referenced,
2011  * . *b_vp is filled either with null or with the corresponding vnode,
2012  *     and
2013  * . the only pair of vnodes that may be identical is a_vp and b_vp.
2014  *
2015  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
2016  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
2017  */
2018 int
2019 tmpfs_rename_lock(struct mount *mount, struct ucred *cred, int overlap_error,
2020     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
2021     struct componentname *a_cnp, int a_missing_ok,
2022     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
2023     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
2024     struct componentname *b_cnp, int b_missing_ok,
2025     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
2026 {
2027 	struct tmpfs_dirent *a_dirent, *b_dirent;
2028 	struct vnode *a_vp, *b_vp;
2029 	int error;
2030 
2031 	KASSERT(a_dvp != NULL);
2032 	KASSERT(a_dnode != NULL);
2033 	KASSERT(a_cnp != NULL);
2034 	KASSERT(a_dirent_ret != NULL);
2035 	KASSERT(a_vp_ret != NULL);
2036 	KASSERT(b_dvp != NULL);
2037 	KASSERT(b_dnode != NULL);
2038 	KASSERT(b_cnp != NULL);
2039 	KASSERT(b_dirent_ret != NULL);
2040 	KASSERT(b_vp_ret != NULL);
2041 	KASSERT(a_dvp != b_dvp);
2042 	KASSERT(a_dnode != b_dnode);
2043 	KASSERT(a_dnode->tn_vnode == a_dvp);
2044 	KASSERT(b_dnode->tn_vnode == b_dvp);
2045 	KASSERT(a_dnode->tn_type == VDIR);
2046 	KASSERT(b_dnode->tn_type == VDIR);
2047 	KASSERT(a_missing_ok != b_missing_ok);
2048 
2049 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
2050 	if (error)
2051 		goto fail0;
2052 
2053 	/* Did we lose a race with mount?  */
2054 	if (a_dvp->v_mountedhere != NULL) {
2055 		error = EBUSY;
2056 		goto fail1;
2057 	}
2058 
2059 	/* Make sure the caller may read the directory.  */
2060 	error = VOP_ACCESS(a_dvp, VEXEC, cred, curproc);
2061 	if (error)
2062 		goto fail1;
2063 
2064 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
2065 	if (a_dirent != NULL) {
2066 		KASSERT(a_dirent->td_node != NULL);
2067 		/* We ruled out `.' earlier.  */
2068 		KASSERT(a_dirent->td_node != a_dnode);
2069 		/* We ruled out `..' earlier.  */
2070 		KASSERT(a_dirent->td_node !=
2071 		    a_dnode->tn_spec.tn_dir.tn_parent);
2072 		if (a_dirent->td_node == b_dnode) {
2073 			error = overlap_error;
2074 			goto fail1;
2075 		}
2076 		rw_enter_write(&a_dirent->td_node->tn_nlock);
2077 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
2078 		if (error)
2079 			goto fail1;
2080 		KASSERT(a_vp->v_mount == mount);
2081 		/* Refuse to rename (over) a mount point.  */
2082 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
2083 			error = EBUSY;
2084 			goto fail2;
2085 		}
2086 	} else if (!a_missing_ok) {
2087 		error = ENOENT;
2088 		goto fail1;
2089 	} else {
2090 		a_vp = NULL;
2091 	}
2092 	KASSERT(a_vp != a_dvp);
2093 	KASSERT(a_vp != b_dvp);
2094 
2095 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
2096 	if (error)
2097 		goto fail2;
2098 
2099 	/* Did we lose a race with mount?  */
2100 	if (b_dvp->v_mountedhere != NULL) {
2101 		error = EBUSY;
2102 		goto fail3;
2103 	}
2104 
2105 	/* Make sure the caller may read the directory.  */
2106 	error = VOP_ACCESS(b_dvp, VEXEC, cred, curproc);
2107 	if (error)
2108 		goto fail3;
2109 
2110 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
2111 	if (b_dirent != NULL) {
2112 		KASSERT(b_dirent->td_node != NULL);
2113 		/* We ruled out `.' earlier.  */
2114 		KASSERT(b_dirent->td_node != b_dnode);
2115 		/* We ruled out `..' earlier.  */
2116 		KASSERT(b_dirent->td_node !=
2117 		    b_dnode->tn_spec.tn_dir.tn_parent);
2118 		/* b is not an ancestor of a.  */
2119 		KASSERT(b_dirent->td_node != a_dnode);
2120 		/* But the source and target nodes might be the same.  */
2121 		if ((a_dirent == NULL) ||
2122 		    (a_dirent->td_node != b_dirent->td_node)) {
2123 			rw_enter_write(&b_dirent->td_node->tn_nlock);
2124 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
2125 			    &b_vp);
2126 			if (error)
2127 				goto fail3;
2128 			KASSERT(b_vp->v_mount == mount);
2129 			KASSERT(a_vp != b_vp);
2130 			/* Refuse to rename (over) a mount point.  */
2131 			if ((b_vp->v_type == VDIR) &&
2132 			    (b_vp->v_mountedhere != NULL)) {
2133 				error = EBUSY;
2134 				goto fail4;
2135 			}
2136 		} else {
2137 			b_vp = a_vp;
2138 			vref(b_vp);
2139 		}
2140 	} else if (!b_missing_ok) {
2141 		error = ENOENT;
2142 		goto fail3;
2143 	} else {
2144 		b_vp = NULL;
2145 	}
2146 	KASSERT(b_vp != a_dvp);
2147 	KASSERT(b_vp != b_dvp);
2148 
2149 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2150 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2151 	KASSERT(a_missing_ok || (a_dirent != NULL));
2152 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
2153 	KASSERT(b_missing_ok || (b_dirent != NULL));
2154 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
2155 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
2156 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
2157 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
2158 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
2159 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
2160 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
2161 
2162 	*a_dirent_ret = a_dirent;
2163 	*b_dirent_ret = b_dirent;
2164 	*a_vp_ret = a_vp;
2165 	*b_vp_ret = b_vp;
2166 	return 0;
2167 
2168 fail4:	if (b_vp != NULL) {
2169 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
2170 		if (b_vp != a_vp)
2171 			vput(b_vp);
2172 		else
2173 			vrele(a_vp);
2174 	}
2175 
2176 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2177 	VOP_UNLOCK(b_dvp, 0, curproc);
2178 
2179 fail2:	if (a_vp != NULL) {
2180 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
2181 		vput(a_vp);
2182 	}
2183 
2184 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2185 	VOP_UNLOCK(a_dvp, 0, curproc);
2186 
2187 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
2188 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
2189 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
2190 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
2191 	return error;
2192 }
2193 
2194 /*
2195  * Shuffle the directory entries to move fvp from the directory fdvp
2196  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
2197  * we are overwriting a target node, it is tvp, and tde is its
2198  * directory entry in tdvp.
2199  *
2200  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
2201  */
2202 void
2203 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
2204     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
2205     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
2206 {
2207 
2208 	KASSERT(tmpfs != NULL);
2209 	KASSERT(fdvp != NULL);
2210 	KASSERT(fde != NULL);
2211 	KASSERT(fvp != NULL);
2212 	KASSERT(tdvp != NULL);
2213 	KASSERT(fde->td_node != NULL);
2214 	KASSERT(fde->td_node->tn_vnode == fvp);
2215 	KASSERT((tde == NULL) == (tvp == NULL));
2216 	KASSERT((tde == NULL) || (tde->td_node != NULL));
2217 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
2218 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
2219 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
2220 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
2221 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
2222 
2223 	/*
2224 	 * If we are moving from one directory to another, detach the
2225 	 * source entry and reattach it to the target directory.
2226 	 */
2227 	if (fdvp != tdvp) {
2228 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
2229 		struct tmpfs_node *fnode = fde->td_node;
2230 		tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp);
2231 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2232 		tmpfs_dir_detach(fdnode, fde);
2233 		tmpfs_dir_attach(tdnode, fde, fnode);
2234 	} else if (tvp == NULL) {
2235 		/*
2236 		 * We are changing the directory.  tmpfs_dir_attach and
2237 		 * tmpfs_dir_detach note the events for us, but for
2238 		 * this case we don't call them, so we must note the
2239 		 * event explicitly.
2240 		 */
2241 		VN_KNOTE(fdvp, NOTE_WRITE);
2242 	}
2243 
2244 	/*
2245 	 * If we are replacing an existing target entry, delete it.
2246 	 */
2247 	if (tde != NULL) {
2248 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2249 		KASSERT(tvp != NULL);
2250 		KASSERT(tde->td_node != NULL);
2251 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
2252 		if (tde->td_node->tn_type == VDIR) {
2253 			KASSERT(tde->td_node->tn_size == 0);
2254 			KASSERT(tde->td_node->tn_links == 2);
2255 			/* Decrement the extra link count for `.' so
2256 			 * the vnode will be recycled when released.  */
2257 			tde->td_node->tn_links--;
2258 		}
2259 		tmpfs_dir_detach(tdnode, tde);
2260 		tmpfs_free_dirent(tmpfs, tde);
2261 	}
2262 }
2263 
2264 /*
2265  * Remove the entry de for the non-directory vp from the directory dvp.
2266  *
2267  * Everything must be locked and referenced.
2268  */
2269 int
2270 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
2271     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
2272     struct ucred *cred)
2273 {
2274 	int error;
2275 
2276 	KASSERT(tmpfs != NULL);
2277 	KASSERT(dvp != NULL);
2278 	KASSERT(dnode != NULL);
2279 	KASSERT(de != NULL);
2280 	KASSERT(vp != NULL);
2281 	KASSERT(dnode->tn_vnode == dvp);
2282 	KASSERT(de->td_node != NULL);
2283 	KASSERT(de->td_node->tn_vnode == vp);
2284 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
2285 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2286 
2287 	error = tmpfs_remove_check_possible(dnode, de->td_node);
2288 	if (error)
2289 		return error;
2290 
2291 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
2292 	if (error)
2293 		return error;
2294 
2295 	/*
2296 	 * If not root and directory is sticky, check for permission on
2297 	 * directory or on file. This implements append-only directories.
2298 	 */
2299 	if ((dnode->tn_mode & S_ISTXT) != 0)
2300 		if (cred->cr_uid != 0 && cred->cr_uid != dnode->tn_uid &&
2301 		    cred->cr_uid != de->td_node->tn_uid)
2302 			return EPERM;
2303 
2304 	tmpfs_dir_detach(dnode, de);
2305 	tmpfs_free_dirent(tmpfs, de);
2306 
2307 	return 0;
2308 }
2309 
2310 /*
2311  * Check whether a rename is possible independent of credentials.
2312  *
2313  * Everything must be locked and referenced.
2314  */
2315 int
2316 tmpfs_rename_check_possible(
2317     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2318     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2319 {
2320 
2321 	KASSERT(fdnode != NULL);
2322 	KASSERT(fnode != NULL);
2323 	KASSERT(tdnode != NULL);
2324 	KASSERT(fdnode != fnode);
2325 	KASSERT(tdnode != tnode);
2326 	KASSERT(fnode != tnode);
2327 	KASSERT(fdnode->tn_vnode != NULL);
2328 	KASSERT(fnode->tn_vnode != NULL);
2329 	KASSERT(tdnode->tn_vnode != NULL);
2330 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2331 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2332 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2333 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2334 	KASSERT((tnode == NULL) ||
2335 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2336 
2337 	/*
2338 	 * If fdnode is immutable, we can't write to it.  If fdnode is
2339 	 * append-only, the only change we can make is to add entries
2340 	 * to it.  If fnode is immutable, we can't change the links to
2341 	 * it.  If fnode is append-only...well, this is what UFS does.
2342 	 */
2343 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
2344 		return EPERM;
2345 
2346 	/*
2347 	 * If tdnode is immutable, we can't write to it.  If tdnode is
2348 	 * append-only, we can add entries, but we can't change
2349 	 * existing entries.
2350 	 */
2351 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
2352 		return EPERM;
2353 
2354 	/*
2355 	 * If tnode is immutable, we can't replace links to it.  If
2356 	 * tnode is append-only...well, this is what UFS does.
2357 	 */
2358 	if (tnode != NULL) {
2359 		KASSERT(tnode != NULL);
2360 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
2361 			return EPERM;
2362 	}
2363 
2364 	return 0;
2365 }
2366 
2367 /*
2368  * Check whether a rename is permitted given our credentials.
2369  *
2370  * Everything must be locked and referenced.
2371  */
2372 int
2373 tmpfs_rename_check_permitted(struct ucred *cred,
2374     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2375     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2376 {
2377 	int error;
2378 
2379 	KASSERT(fdnode != NULL);
2380 	KASSERT(fnode != NULL);
2381 	KASSERT(tdnode != NULL);
2382 	KASSERT(fdnode != fnode);
2383 	KASSERT(tdnode != tnode);
2384 	KASSERT(fnode != tnode);
2385 	KASSERT(fdnode->tn_vnode != NULL);
2386 	KASSERT(fnode->tn_vnode != NULL);
2387 	KASSERT(tdnode->tn_vnode != NULL);
2388 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2389 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2390 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2391 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2392 	KASSERT((tnode == NULL) ||
2393 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2394 
2395 	/*
2396 	 * We need to remove or change an entry in the source directory.
2397 	 */
2398 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred, curproc);
2399 	if (error)
2400 		return error;
2401 
2402 	/*
2403 	 * If we are changing directories, then we need to write to the
2404 	 * target directory to add or change an entry.  Also, if fnode
2405 	 * is a directory, we need to write to it to change its `..'
2406 	 * entry.
2407 	 */
2408 	if (fdnode != tdnode) {
2409 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred, curproc);
2410 		if (error)
2411 			return error;
2412 		if (fnode->tn_type == VDIR) {
2413 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred,
2414 			    curproc);
2415 			if (error)
2416 				return error;
2417 		}
2418 	}
2419 
2420 	error = tmpfs_check_sticky(cred, fdnode, fnode);
2421 	if (error)
2422 		return error;
2423 
2424 	if (TMPFS_DIRSEQ_FULL(tdnode))
2425 		return (ENOSPC);
2426 
2427 	error = tmpfs_check_sticky(cred, tdnode, tnode);
2428 	if (error)
2429 		return error;
2430 
2431 	return 0;
2432 }
2433 
2434 /*
2435  * Check whether removing node's entry in dnode is possible independent
2436  * of credentials.
2437  *
2438  * Everything must be locked and referenced.
2439  */
2440 int
2441 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
2442 {
2443 
2444 	KASSERT(dnode != NULL);
2445 	KASSERT(dnode->tn_vnode != NULL);
2446 	KASSERT(node != NULL);
2447 	KASSERT(dnode != node);
2448 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2449 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2450 
2451 	/*
2452 	 * We want to delete the entry.  If dnode is immutable, we
2453 	 * can't write to it to delete the entry.  If dnode is
2454 	 * append-only, the only change we can make is to add entries,
2455 	 * so we can't delete entries.  If node is immutable, we can't
2456 	 * change the links to it, so we can't delete the entry.  If
2457 	 * node is append-only...well, this is what UFS does.
2458 	 */
2459 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
2460 		return EPERM;
2461 
2462 	return 0;
2463 }
2464 
2465 /*
2466  * Check whether removing node's entry in dnode is permitted given our
2467  * credentials.
2468  *
2469  * Everything must be locked and referenced.
2470  */
2471 int
2472 tmpfs_remove_check_permitted(struct ucred *cred,
2473     struct tmpfs_node *dnode, struct tmpfs_node *node)
2474 {
2475 	int error;
2476 
2477 	KASSERT(dnode != NULL);
2478 	KASSERT(dnode->tn_vnode != NULL);
2479 	KASSERT(node != NULL);
2480 	KASSERT(dnode != node);
2481 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2482 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2483 
2484 	/*
2485 	 * Check whether we are permitted to write to the source
2486 	 * directory in order to delete an entry from it.
2487 	 */
2488 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred, curproc);
2489 	if (error)
2490 		return error;
2491 
2492 	error = tmpfs_check_sticky(cred, dnode, node);
2493 	if (error)
2494 		return error;
2495 
2496 	return 0;
2497 }
2498 
2499 /*
2500  * Check whether we may change an entry in a sticky directory.  If the
2501  * directory is sticky, the user must own either the directory or, if
2502  * it exists, the node, in order to change the entry.
2503  *
2504  * Everything must be locked and referenced.
2505  */
2506 int
2507 tmpfs_check_sticky(struct ucred *cred,
2508     struct tmpfs_node *dnode, struct tmpfs_node *node)
2509 {
2510 
2511 	KASSERT(dnode != NULL);
2512 	KASSERT(dnode->tn_vnode != NULL);
2513 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2514 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2515 	KASSERT((node == NULL) ||
2516 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2517 
2518 	if (node == NULL)
2519 		return 0;
2520 
2521 	if (dnode->tn_mode & S_ISTXT) {
2522 		if (cred->cr_uid != 0 &&
2523 		    cred->cr_uid != dnode->tn_uid &&
2524 		    cred->cr_uid != node->tn_uid)
2525 			return EPERM;
2526 	}
2527 
2528 	return 0;
2529 }
2530 
2531 void
2532 tmpfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp,
2533     struct vnode *tdvp, struct vnode *tvp)
2534 {
2535 
2536 	KASSERT(fdvp != NULL);
2537 	KASSERT(fvp != NULL);
2538 	KASSERT(tdvp != NULL);
2539 	KASSERT(fdvp != fvp);
2540 	KASSERT(fdvp != tvp);
2541 	KASSERT(tdvp != fvp);
2542 	KASSERT(tdvp != tvp);
2543 	KASSERT(fvp != tvp);
2544 	KASSERT(fdvp->v_type == VDIR);
2545 	KASSERT(tdvp->v_type == VDIR);
2546 
2547 	/*
2548 	 * XXX What actually needs to be purged?
2549 	 */
2550 
2551 	cache_purge(fdvp);
2552 
2553 	if (fvp->v_type == VDIR)
2554 		cache_purge(fvp);
2555 
2556 	if (tdvp != fdvp)
2557 		cache_purge(tdvp);
2558 
2559 	if ((tvp != NULL) && (tvp->v_type == VDIR))
2560 		cache_purge(tvp);
2561 }
2562 
2563 void
2564 tmpfs_rename_abort(void *v)
2565 {
2566 	struct vop_rename_args *ap = v;
2567 	struct vnode *fdvp = ap->a_fdvp;
2568 	struct vnode *fvp = ap->a_fvp;
2569 	struct componentname *fcnp = ap->a_fcnp;
2570 	struct vnode *tdvp = ap->a_tdvp;
2571 	struct vnode *tvp = ap->a_tvp;
2572 	struct componentname *tcnp = ap->a_tcnp;
2573 
2574 	VOP_ABORTOP(tdvp, tcnp);
2575 	if (tdvp == tvp)
2576 		vrele(tdvp);
2577 	else
2578 		vput(tdvp);
2579 	if (tvp != NULL)
2580 		vput(tvp);
2581 	VOP_ABORTOP(fdvp, fcnp);
2582 	vrele(fdvp);
2583 	vrele(fvp);
2584 }
2585 
2586 void filt_tmpfsdetach(struct knote *kn);
2587 int filt_tmpfsread(struct knote *kn, long hint);
2588 int filt_tmpfswrite(struct knote *kn, long hint);
2589 int filt_tmpfsvnode(struct knote *kn, long hint);
2590 
2591 struct filterops tmpfsread_filtops =
2592 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsread };
2593 struct filterops tmpfswrite_filtops =
2594 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfswrite };
2595 struct filterops tmpfsvnode_filtops =
2596 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
2597 
2598 int
2599 tmpfs_kqfilter(void *v)
2600 {
2601 	struct vop_kqfilter_args *ap = v;
2602 	struct vnode *vp = ap->a_vp;
2603 	struct knote *kn = ap->a_kn;
2604 
2605 	switch (kn->kn_filter) {
2606 	case EVFILT_READ:
2607 		kn->kn_fop = &tmpfsread_filtops;
2608 		break;
2609 	case EVFILT_WRITE:
2610 		kn->kn_fop = &tmpfswrite_filtops;
2611 		break;
2612 	case EVFILT_VNODE:
2613 		kn->kn_fop = &tmpfsvnode_filtops;
2614 		break;
2615 	default:
2616 		return (EINVAL);
2617 	}
2618 
2619 	kn->kn_hook = (caddr_t)vp;
2620 
2621 	SLIST_INSERT_HEAD(&vp->v_selectinfo.si_note, kn, kn_selnext);
2622 
2623 	return (0);
2624 }
2625 
2626 void
2627 filt_tmpfsdetach(struct knote *kn)
2628 {
2629 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2630 
2631 	SLIST_REMOVE(&vp->v_selectinfo.si_note, kn, knote, kn_selnext);
2632 }
2633 
2634 int
2635 filt_tmpfsread(struct knote *kn, long hint)
2636 {
2637 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2638 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2639 
2640 	/*
2641 	 * filesystem is gone, so set the EOF flag and schedule
2642 	 * the knote for deletion.
2643 	 */
2644 	if (hint == NOTE_REVOKE) {
2645 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2646 		return (1);
2647 	}
2648 
2649         kn->kn_data = node->tn_size - kn->kn_fp->f_offset;
2650 	if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) {
2651 		kn->kn_fflags |= NOTE_EOF;
2652 		return (1);
2653 	}
2654 
2655         return (kn->kn_data != 0);
2656 }
2657 
2658 int
2659 filt_tmpfswrite(struct knote *kn, long hint)
2660 {
2661 	/*
2662 	 * filesystem is gone, so set the EOF flag and schedule
2663 	 * the knote for deletion.
2664 	 */
2665 	if (hint == NOTE_REVOKE) {
2666 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2667 		return (1);
2668 	}
2669 
2670         kn->kn_data = 0;
2671         return (1);
2672 }
2673 
2674 int
2675 filt_tmpfsvnode(struct knote *kn, long hint)
2676 {
2677 	if (kn->kn_sfflags & hint)
2678 		kn->kn_fflags |= hint;
2679 	if (hint == NOTE_REVOKE) {
2680 		kn->kn_flags |= EV_EOF;
2681 		return (1);
2682 	}
2683 	return (kn->kn_fflags != 0);
2684 }
2685