xref: /openbsd-src/sys/tmpfs/tmpfs_vnops.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: tmpfs_vnops.c,v 1.27 2016/06/19 11:54:33 natano Exp $	*/
2 /*	$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $	*/
3 
4 /*
5  * Copyright (c) 2005, 2006, 2007, 2012 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Taylor R Campbell.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/event.h>
42 #include <sys/namei.h>
43 #include <sys/stat.h>
44 #include <sys/uio.h>
45 #include <sys/unistd.h>
46 #include <sys/vnode.h>
47 #include <sys/lockf.h>
48 #include <sys/poll.h>
49 #include <sys/file.h>
50 
51 #include <miscfs/fifofs/fifo.h>
52 #include <tmpfs/tmpfs_vnops.h>
53 #include <tmpfs/tmpfs.h>
54 
55 int tmpfs_kqfilter(void *v);
56 
57 /*
58  * vnode operations vector used for files stored in a tmpfs file system.
59  */
60 struct vops tmpfs_vops = {
61 	.vop_lookup	= tmpfs_lookup,
62 	.vop_create	= tmpfs_create,
63 	.vop_mknod	= tmpfs_mknod,
64 	.vop_open	= tmpfs_open,
65 	.vop_close	= tmpfs_close,
66 	.vop_access	= tmpfs_access,
67 	.vop_getattr	= tmpfs_getattr,
68 	.vop_setattr	= tmpfs_setattr,
69 	.vop_read	= tmpfs_read,
70 	.vop_write	= tmpfs_write,
71 	.vop_ioctl	= tmpfs_ioctl,
72 	.vop_poll	= tmpfs_poll,
73 	.vop_kqfilter	= tmpfs_kqfilter,
74 	.vop_revoke	= vop_generic_revoke,
75 	.vop_fsync	= tmpfs_fsync,
76 	.vop_remove	= tmpfs_remove,
77 	.vop_link	= tmpfs_link,
78 	.vop_rename	= tmpfs_rename,
79 	.vop_mkdir	= tmpfs_mkdir,
80 	.vop_rmdir	= tmpfs_rmdir,
81 	.vop_symlink	= tmpfs_symlink,
82 	.vop_readdir	= tmpfs_readdir,
83 	.vop_readlink	= tmpfs_readlink,
84 	.vop_abortop	= vop_generic_abortop,
85 	.vop_inactive	= tmpfs_inactive,
86 	.vop_reclaim	= tmpfs_reclaim,
87 	.vop_lock	= tmpfs_lock,
88 	.vop_unlock	= tmpfs_unlock,
89 	.vop_bmap	= vop_generic_bmap,
90 	.vop_strategy	= tmpfs_strategy,
91 	.vop_print	= tmpfs_print,
92 	.vop_islocked	= tmpfs_islocked,
93 	.vop_pathconf	= tmpfs_pathconf,
94 	.vop_advlock	= tmpfs_advlock,
95 	.vop_bwrite	= tmpfs_bwrite,
96 };
97 
98 /*
99  * tmpfs_lookup: path name traversal routine.
100  *
101  * Arguments: dvp (directory being searched), vpp (result),
102  * cnp (component name - path).
103  *
104  * => Caller holds a reference and lock on dvp.
105  * => We return looked-up vnode (vpp) locked, with a reference held.
106  */
107 int
108 tmpfs_lookup(void *v)
109 {
110 	struct vop_lookup_args /* {
111 		struct vnode *a_dvp;
112 		struct vnode **a_vpp;
113 		struct componentname *a_cnp;
114 	} */ *ap = v;
115 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
116 	struct componentname *cnp = ap->a_cnp;
117 	struct ucred *cred = cnp->cn_cred;
118 	const int lastcn = (cnp->cn_flags & ISLASTCN) != 0;
119 	const int lockparent = (cnp->cn_flags & LOCKPARENT) != 0;
120 	tmpfs_node_t *dnode, *tnode;
121 	tmpfs_dirent_t *de;
122 	int cachefound;
123 	int error;
124 
125 	KASSERT(VOP_ISLOCKED(dvp));
126 
127 	dnode = VP_TO_TMPFS_DIR(dvp);
128 	cnp->cn_flags &= ~PDIRUNLOCK;
129 	*vpp = NULL;
130 
131 	/* Check accessibility of directory. */
132 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
133 	if (error) {
134 		goto out;
135 	}
136 
137 	/*
138 	 * If requesting the last path component on a read-only file system
139 	 * with a write operation, deny it.
140 	 */
141 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
142 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
143 		error = EROFS;
144 		goto out;
145 	}
146 
147 	/*
148 	 * Avoid doing a linear scan of the directory if the requested
149 	 * directory/name couple is already in the cache.
150 	 */
151 	cachefound = cache_lookup(dvp, vpp, cnp);
152 	if (cachefound == ENOENT /* && *vpp == NULLVP */)
153 		return ENOENT; /* Negative cache hit. */
154 	else if (cachefound != -1)
155 		return 0; /* Found in cache. */
156 
157 	if (cnp->cn_flags & ISDOTDOT) {
158 		tmpfs_node_t *pnode;
159 
160 		/*
161 		 * Lookup of ".." case.
162 		 */
163 		if (lastcn) {
164 			if (cnp->cn_nameiop == RENAME) {
165 				error = EINVAL;
166 				goto out;
167 			}
168 			if (cnp->cn_nameiop == DELETE) {
169 				/* Keep the name for tmpfs_rmdir(). */
170 				cnp->cn_flags |= SAVENAME;
171 			}
172 		}
173 		KASSERT(dnode->tn_type == VDIR);
174 		pnode = dnode->tn_spec.tn_dir.tn_parent;
175 		if (pnode == NULL) {
176 			error = ENOENT;
177 			goto out;
178 		}
179 
180 		/*
181 		 * Lock the parent tn_nlock before releasing the vnode lock,
182 		 * and thus prevents parent from disappearing.
183 		 */
184 		rw_enter_write(&pnode->tn_nlock);
185 		VOP_UNLOCK(dvp, curproc);
186 
187 		/*
188 		 * Get a vnode of the '..' entry and re-acquire the lock.
189 		 * Release the tn_nlock.
190 		 */
191 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
192 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
193 		goto out;
194 
195 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
196 		/*
197 		 * Lookup of "." case.
198 		 */
199 		if (lastcn && cnp->cn_nameiop == RENAME) {
200 			error = EISDIR;
201 			goto out;
202 		}
203 		vref(dvp);
204 		*vpp = dvp;
205 		error = 0;
206 		goto done;
207 	}
208 
209 	/*
210 	 * Other lookup cases: perform directory scan.
211 	 */
212 	de = tmpfs_dir_lookup(dnode, cnp);
213 	if (de == NULL) {
214 		/*
215 		 * The entry was not found in the directory.  This is valid
216 		 * if we are creating or renaming an entry and are working
217 		 * on the last component of the path name.
218 		 */
219 		if (lastcn && (cnp->cn_nameiop == CREATE ||
220 		    cnp->cn_nameiop == RENAME)) {
221 			error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
222 			if (error) {
223 				goto out;
224 			}
225 			/*
226 			 * We are creating an entry in the file system, so
227 			 * save its name for further use by tmpfs_create().
228 			 */
229 			cnp->cn_flags |= SAVENAME;
230 			error = EJUSTRETURN;
231 		} else {
232 			error = ENOENT;
233 		}
234 		goto done;
235 	}
236 
237 	tnode = de->td_node;
238 
239 	/*
240 	 * If it is not the last path component and found a non-directory
241 	 * or non-link entry (which may itself be pointing to a directory),
242 	 * raise an error.
243 	 */
244 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
245 		error = ENOTDIR;
246 		goto out;
247 	}
248 
249 	/* Check the permissions. */
250 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
251 		error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
252 		if (error)
253 			goto out;
254 
255 		/*
256 		 * If not root and directory is sticky, check for permission
257 		 * on directory or on file. This implements append-only
258 		 * directories.
259 		 */
260 		if ((dnode->tn_mode & S_ISTXT) != 0) {
261 			if (cred->cr_uid != 0 &&
262 			    cred->cr_uid != dnode->tn_uid &&
263 			    cred->cr_uid != tnode->tn_uid) {
264 				error = EPERM;
265 				goto out;
266 			}
267 		}
268 
269 		/*
270 		 * XXX pedro: We might need cn_nameptr later in tmpfs_remove()
271 		 * or tmpfs_rmdir() for a tmpfs_dir_lookup(). We should really
272 		 * get rid of SAVENAME at some point.
273 		 */
274 		if (cnp->cn_nameiop == DELETE)
275 			cnp->cn_flags |= SAVENAME;
276 	}
277 
278 	/* Get a vnode for the matching entry. */
279 	rw_enter_write(&tnode->tn_nlock);
280 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
281 done:
282 	/*
283 	 * Cache the result, unless request was for creation (as it does
284 	 * not improve the performance).
285 	 */
286 	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
287 		cache_enter(dvp, *vpp, cnp);
288 	}
289 out:
290 	/*
291 	 * If (1) we succeded, (2) found a distinct vnode to return and (3) were
292 	 * either explicitely told to keep the parent locked or are in the
293 	 * middle of a lookup, unlock the parent vnode.
294 	 */
295 	if ((error == 0 || error == EJUSTRETURN) && /* (1) */
296 	    *vpp != dvp &&			    /* (2) */
297 	    (!lockparent || !lastcn)) {		    /* (3) */
298 		VOP_UNLOCK(dvp, curproc);
299 		cnp->cn_flags |= PDIRUNLOCK;
300 	} else
301 		KASSERT(VOP_ISLOCKED(dvp));
302 
303 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
304 
305 	return error;
306 }
307 
308 int
309 tmpfs_create(void *v)
310 {
311 	struct vop_create_args /* {
312 		struct vnode		*a_dvp;
313 		struct vnode		**a_vpp;
314 		struct componentname	*a_cnp;
315 		struct vattr		*a_vap;
316 	} */ *ap = v;
317 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
318 	struct componentname *cnp = ap->a_cnp;
319 	struct vattr *vap = ap->a_vap;
320 
321 	KASSERT(VOP_ISLOCKED(dvp));
322 	KASSERT(cnp->cn_flags & HASBUF);
323 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
324 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
325 }
326 
327 int
328 tmpfs_mknod(void *v)
329 {
330 	struct vop_mknod_args /* {
331 		struct vnode		*a_dvp;
332 		struct vnode		**a_vpp;
333 		struct componentname	*a_cnp;
334 		struct vattr		*a_vap;
335 	} */ *ap = v;
336 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
337 	struct componentname *cnp = ap->a_cnp;
338 	struct vattr *vap = ap->a_vap;
339 	enum vtype vt = vap->va_type;
340 	int error;
341 
342 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
343 		vput(dvp);
344 		return EINVAL;
345 	}
346 
347 	/* tmpfs_alloc_file() will unlock 'dvp'. */
348 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
349 	if (error)
350 		return error;
351 
352 	vput(*vpp);
353 
354 	return 0;
355 }
356 
357 int
358 tmpfs_open(void *v)
359 {
360 	struct vop_open_args /* {
361 		struct vnode	*a_vp;
362 		int		a_mode;
363 		kauth_cred_t	a_cred;
364 	} */ *ap = v;
365 	struct vnode *vp = ap->a_vp;
366 	mode_t mode = ap->a_mode;
367 	tmpfs_node_t *node;
368 
369 	KASSERT(VOP_ISLOCKED(vp));
370 
371 	node = VP_TO_TMPFS_NODE(vp);
372 	if (node->tn_links < 1) {
373 		/*
374 		 * The file is still active, but all its names have been
375 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
376 		 * any more, as it is about to be destroyed.
377 		 */
378 		return ENOENT;
379 	}
380 
381 	/* If the file is marked append-only, deny write requests. */
382 	if ((node->tn_flags & APPEND) != 0 &&
383 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
384 		return EPERM;
385 	}
386 	return 0;
387 }
388 
389 int
390 tmpfs_close(void *v)
391 {
392 	struct vop_close_args /* {
393 		struct vnode	*a_vp;
394 		int		a_fflag;
395 		kauth_cred_t	a_cred;
396 	} */ *ap = v;
397 	struct vnode *vp = ap->a_vp;
398 
399 	KASSERT(VOP_ISLOCKED(vp));
400 
401 	return 0;
402 }
403 
404 int
405 tmpfs_access(void *v)
406 {
407 	struct vop_access_args /* {
408 		struct vnode	*a_vp;
409 		int		a_mode;
410 		kauth_cred_t	a_cred;
411 	} */ *ap = v;
412 	struct vnode *vp = ap->a_vp;
413 	mode_t mode = ap->a_mode;
414 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
415 	const int writing = (mode & VWRITE) != 0;
416 
417 	KASSERT(VOP_ISLOCKED(vp));
418 
419 	/* Possible? */
420 	switch (vp->v_type) {
421 	case VDIR:
422 	case VLNK:
423 	case VREG:
424 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
425 			return EROFS;
426 		}
427 		break;
428 	case VBLK:
429 	case VCHR:
430 	case VSOCK:
431 	case VFIFO:
432 		break;
433 	default:
434 		return EINVAL;
435 	}
436 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
437 		return EPERM;
438 	}
439 
440 	return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
441 	    mode, ap->a_cred));
442 }
443 
444 int
445 tmpfs_getattr(void *v)
446 {
447 	struct vop_getattr_args /* {
448 		struct vnode	*a_vp;
449 		struct vattr	*a_vap;
450 		kauth_cred_t	a_cred;
451 	} */ *ap = v;
452 	struct vnode *vp = ap->a_vp;
453 	struct vattr *vap = ap->a_vap;
454 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
455 
456 	vattr_null(vap);
457 
458 	vap->va_type = vp->v_type;
459 	vap->va_mode = node->tn_mode;
460 	vap->va_nlink = node->tn_links;
461 	vap->va_uid = node->tn_uid;
462 	vap->va_gid = node->tn_gid;
463 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
464 	vap->va_fileid = node->tn_id;
465 	vap->va_size = node->tn_size;
466 	vap->va_blocksize = PAGE_SIZE;
467 	vap->va_atime = node->tn_atime;
468 	vap->va_mtime = node->tn_mtime;
469 	vap->va_ctime = node->tn_ctime;
470 	/* vap->va_birthtime = node->tn_birthtime; */
471 	vap->va_gen = TMPFS_NODE_GEN(node);
472 	vap->va_flags = node->tn_flags;
473 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
474 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
475 	vap->va_bytes = round_page(node->tn_size);
476 	vap->va_filerev = VNOVAL;
477 	vap->va_vaflags = 0;
478 	vap->va_spare = VNOVAL; /* XXX */
479 
480 	return 0;
481 }
482 
483 #define GOODTIME(tv)	((tv)->tv_nsec != VNOVAL)
484 /* XXX Should this operation be atomic?  I think it should, but code in
485  * XXX other places (e.g., ufs) doesn't seem to be... */
486 int
487 tmpfs_setattr(void *v)
488 {
489 	struct vop_setattr_args /* {
490 		struct vnode	*a_vp;
491 		struct vattr	*a_vap;
492 		kauth_cred_t	a_cred;
493 	} */ *ap = v;
494 	struct vnode *vp = ap->a_vp;
495 	struct vattr *vap = ap->a_vap;
496 	struct ucred *cred = ap->a_cred;
497 	struct proc *p = curproc;
498 	int error = 0;
499 
500 	KASSERT(VOP_ISLOCKED(vp));
501 
502 	/* Abort if any unsettable attribute is given. */
503 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
504 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
505 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
506 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
507 	    vap->va_bytes != VNOVAL) {
508 		return EINVAL;
509 	}
510 	if (error == 0 && (vap->va_flags != VNOVAL))
511 		error = tmpfs_chflags(vp, vap->va_flags, cred, p);
512 
513 	if (error == 0 && (vap->va_size != VNOVAL))
514 		error = tmpfs_chsize(vp, vap->va_size, cred, p);
515 
516 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
517 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
518 
519 	if (error == 0 && (vap->va_mode != VNOVAL))
520 		error = tmpfs_chmod(vp, vap->va_mode, cred, p);
521 
522 	if (error == 0 && ((vap->va_vaflags & VA_UTIMES_CHANGE)
523 	    || GOODTIME(&vap->va_atime)
524 	    || GOODTIME(&vap->va_mtime)))
525 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
526 		    vap->va_vaflags, cred, p);
527 
528 	return error;
529 }
530 
531 int
532 tmpfs_read(void *v)
533 {
534 	struct vop_read_args /* {
535 		struct vnode *a_vp;
536 		struct uio *a_uio;
537 		int a_ioflag;
538 		struct ucred *a_cred;
539 	} */ *ap = v;
540 	struct vnode *vp = ap->a_vp;
541 	struct uio *uio = ap->a_uio;
542 	/* const int ioflag = ap->a_ioflag; */
543 	tmpfs_node_t *node;
544 	int error;
545 
546 	KASSERT(VOP_ISLOCKED(vp));
547 
548 	if (vp->v_type != VREG) {
549 		return EISDIR;
550 	}
551 	if (uio->uio_offset < 0) {
552 		return EINVAL;
553 	}
554 	if (uio->uio_resid == 0)
555 		return 0;
556 
557 	node = VP_TO_TMPFS_NODE(vp);
558 	error = 0;
559 
560 	while (error == 0 && uio->uio_resid > 0) {
561 		vsize_t len;
562 
563 		if (node->tn_size <= uio->uio_offset) {
564 			break;
565 		}
566 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
567 		if (len == 0) {
568 			break;
569 		}
570 		error = tmpfs_uiomove(node, uio, len);
571 	}
572 
573 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
574 		tmpfs_update(node, TMPFS_NODE_ACCESSED);
575 
576 	return error;
577 }
578 
579 int
580 tmpfs_write(void *v)
581 {
582 	struct vop_write_args /* {
583 		struct vnode	*a_vp;
584 		struct uio	*a_uio;
585 		int		a_ioflag;
586 		kauth_cred_t	a_cred;
587 	} */ *ap = v;
588 	struct vnode *vp = ap->a_vp;
589 	struct uio *uio = ap->a_uio;
590 	const int ioflag = ap->a_ioflag;
591 	tmpfs_node_t *node;
592 	off_t oldsize;
593 	ssize_t overrun;
594 	int extended;
595 	int error;
596 
597 	KASSERT(VOP_ISLOCKED(vp));
598 
599 	node = VP_TO_TMPFS_NODE(vp);
600 	oldsize = node->tn_size;
601 
602 	if (vp->v_type != VREG)
603 		return (EINVAL);
604 
605 	if (uio->uio_resid == 0)
606 		return (0);
607 
608 	if (ioflag & IO_APPEND) {
609 		uio->uio_offset = node->tn_size;
610 	}
611 
612 	if (uio->uio_offset < 0 ||
613 	    (u_int64_t)uio->uio_offset + uio->uio_resid > LLONG_MAX)
614 		return (EFBIG);
615 
616 	/* do the filesize rlimit check */
617 	if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
618 		return (error);
619 
620 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
621 	if (extended) {
622 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
623 		if (error)
624 			goto out;
625 	}
626 
627 	error = 0;
628 	while (error == 0 && uio->uio_resid > 0) {
629 		vsize_t len;
630 
631 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
632 		if (len == 0) {
633 			break;
634 		}
635 		error = tmpfs_uiomove(node, uio, len);
636 	}
637 	if (error) {
638 		(void)tmpfs_reg_resize(vp, oldsize);
639 	}
640 
641 	tmpfs_update(node, TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
642 	if (extended)
643 		VN_KNOTE(vp, NOTE_WRITE | NOTE_EXTEND);
644 	else
645 		VN_KNOTE(vp, NOTE_WRITE);
646 out:
647 	if (error) {
648 		KASSERT(oldsize == node->tn_size);
649 	} else {
650 		KASSERT(uio->uio_resid == 0);
651 
652 		/* correct the result for writes clamped by vn_fsizechk() */
653 		uio->uio_resid += overrun;
654 
655 	}
656 	return error;
657 }
658 
659 int
660 tmpfs_fsync(void *v)
661 {
662 	struct vop_fsync_args /* {
663 		struct vnode *a_vp;
664 		struct ucred *a_cred;
665 		int a_flags;
666 		off_t a_offlo;
667 		off_t a_offhi;
668 		struct lwp *a_l;
669 	} */ *ap = v;
670 	struct vnode *vp = ap->a_vp;
671 
672 	/* Nothing to do.  Just update. */
673 	KASSERT(VOP_ISLOCKED(vp));
674 	return 0;
675 }
676 
677 /*
678  * tmpfs_remove: unlink a file.
679  *
680  * => Both directory (dvp) and file (vp) are locked.
681  * => We unlock and drop the reference on both.
682  */
683 int
684 tmpfs_remove(void *v)
685 {
686 	struct vop_remove_args /* {
687 		struct vnode *a_dvp;
688 		struct vnode *a_vp;
689 		struct componentname *a_cnp;
690 	} */ *ap = v;
691 	struct vnode *dvp = ap->a_dvp, *vp = ap->a_vp;
692 	struct componentname *cnp = ap->a_cnp;
693 	tmpfs_node_t *dnode, *node;
694 	tmpfs_dirent_t *de;
695 	int error;
696 
697 	KASSERT(VOP_ISLOCKED(dvp));
698 	KASSERT(VOP_ISLOCKED(vp));
699 	KASSERT(cnp->cn_flags & HASBUF);
700 
701 	if (vp->v_type == VDIR) {
702 		error = EPERM;
703 		goto out;
704 	}
705 
706 	dnode = VP_TO_TMPFS_NODE(dvp);
707 	node = VP_TO_TMPFS_NODE(vp);
708 
709 	/* Files marked as immutable or append-only cannot be deleted. */
710 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
711 		error = EPERM;
712 		goto out;
713 	}
714 
715 	/*
716 	 * Likewise, files residing on directories marked as append-only cannot
717 	 * be deleted.
718 	 */
719 	if (dnode->tn_flags & APPEND) {
720 		error = EPERM;
721 		goto out;
722 	}
723 
724 	/* Lookup the directory entry (check the cached hint first). */
725 	de = tmpfs_dir_cached(node);
726 	if (de == NULL) {
727 		de = tmpfs_dir_lookup(dnode, cnp);
728 	}
729 
730 	KASSERT(de && de->td_node == node);
731 
732 	/*
733 	 * Remove the entry from the directory (drops the link count) and
734 	 * destroy it.
735 	 * Note: the inode referred by it will not be destroyed
736 	 * until the vnode is reclaimed/recycled.
737 	 */
738 	tmpfs_dir_detach(dnode, de);
739 	tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
740 	if (node->tn_links > 0)  {
741 		/* We removed a hard link. */
742 		tmpfs_update(node, TMPFS_NODE_CHANGED);
743 	}
744 	error = 0;
745 out:
746 	pool_put(&namei_pool, cnp->cn_pnbuf);
747 	/* Drop the references and unlock the vnodes. */
748 	vput(vp);
749 	if (dvp == vp) {
750 		vrele(dvp);
751 	} else {
752 		vput(dvp);
753 	}
754 	return error;
755 }
756 
757 /*
758  * tmpfs_link: create a hard link.
759  */
760 int
761 tmpfs_link(void *v)
762 {
763 	struct vop_link_args /* {
764 		struct vnode *a_dvp;
765 		struct vnode *a_vp;
766 		struct componentname *a_cnp;
767 	} */ *ap = v;
768 	struct vnode *dvp = ap->a_dvp;
769 	struct vnode *vp = ap->a_vp;
770 	struct componentname *cnp = ap->a_cnp;
771 	tmpfs_node_t *dnode, *node;
772 	tmpfs_dirent_t *de;
773 	int error;
774 
775 	KASSERT(VOP_ISLOCKED(dvp));
776 
777 	if (vp->v_type == VDIR) {
778 		VOP_ABORTOP(dvp, cnp);
779 		vput(dvp);
780 		return EPERM;
781 	}
782 
783 	KASSERT(dvp != vp);
784 
785 	if (dvp->v_mount != vp->v_mount) {
786 		VOP_ABORTOP(dvp, cnp);
787 		vput(dvp);
788 		return EXDEV;
789 	}
790 
791 	dnode = VP_TO_TMPFS_DIR(dvp);
792 	node = VP_TO_TMPFS_NODE(vp);
793 
794 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
795 
796 	/* Check for maximum number of links limit. */
797 	if (node->tn_links == LINK_MAX) {
798 		error = EMLINK;
799 		goto out;
800 	}
801 	KASSERT(node->tn_links < LINK_MAX);
802 
803 	/* We cannot create links of files marked immutable or append-only. */
804 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
805 		error = EPERM;
806 		goto out;
807 	}
808 
809 	if (TMPFS_DIRSEQ_FULL(dnode)) {
810 		error = ENOSPC;
811 		goto out;
812 	}
813 
814 	/* Allocate a new directory entry to represent the inode. */
815 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
816 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
817 	if (error) {
818 		goto out;
819 	}
820 
821 	/*
822 	 * Insert the entry into the directory.
823 	 * It will increase the inode link count.
824 	 */
825 	tmpfs_dir_attach(dnode, de, node);
826 
827 	/* Update the timestamps and trigger the event. */
828 	if (node->tn_vnode) {
829 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
830 	}
831 	tmpfs_update(node, TMPFS_NODE_CHANGED);
832 	error = 0;
833 out:
834 	pool_put(&namei_pool, cnp->cn_pnbuf);
835 	VOP_UNLOCK(vp, curproc);
836 	vput(dvp);
837 	return error;
838 }
839 
840 int
841 tmpfs_mkdir(void *v)
842 {
843 	struct vop_mkdir_args /* {
844 		struct vnode		*a_dvp;
845 		struct vnode		**a_vpp;
846 		struct componentname	*a_cnp;
847 		struct vattr		*a_vap;
848 	} */ *ap = v;
849 	struct vnode *dvp = ap->a_dvp;
850 	struct vnode **vpp = ap->a_vpp;
851 	struct componentname *cnp = ap->a_cnp;
852 	struct vattr *vap = ap->a_vap;
853 
854 	KASSERT(vap->va_type == VDIR);
855 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
856 }
857 
858 int
859 tmpfs_rmdir(void *v)
860 {
861 	struct vop_rmdir_args /* {
862 		struct vnode		*a_dvp;
863 		struct vnode		*a_vp;
864 		struct componentname	*a_cnp;
865 	} */ *ap = v;
866 	struct vnode *dvp = ap->a_dvp;
867 	struct vnode *vp = ap->a_vp;
868 	struct componentname *cnp = ap->a_cnp;
869 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
870 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
871 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
872 	tmpfs_dirent_t *de;
873 	int error = 0;
874 
875 	KASSERT(VOP_ISLOCKED(dvp));
876 	KASSERT(VOP_ISLOCKED(vp));
877 	KASSERT(cnp->cn_flags & HASBUF);
878 
879 	if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
880 	    cnp->cn_nameptr[1] == '.') {
881 		error = ENOTEMPTY;
882 		goto out;
883 	}
884 
885 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
886 
887 	/*
888 	 * Directories with more than two entries ('.' and '..') cannot be
889 	 * removed.
890 	 */
891 	if (node->tn_size > 0) {
892 		KASSERT(error == 0);
893 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
894 			error = ENOTEMPTY;
895 			break;
896 		}
897 		if (error)
898 			goto out;
899 	}
900 
901 	/* Lookup the directory entry (check the cached hint first). */
902 	de = tmpfs_dir_cached(node);
903 	if (de == NULL)
904 		de = tmpfs_dir_lookup(dnode, cnp);
905 
906 	KASSERT(de && de->td_node == node);
907 
908 	/* Check flags to see if we are allowed to remove the directory. */
909 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
910 		error = EPERM;
911 		goto out;
912 	}
913 
914 	/* Decrement the link count for the virtual '.' entry. */
915 	node->tn_links--;
916 	tmpfs_update(node, TMPFS_NODE_STATUSALL);
917 
918 	/* Detach the directory entry from the directory. */
919 	tmpfs_dir_detach(dnode, de);
920 
921 	/* Purge the cache for parent. */
922 	cache_purge(dvp);
923 
924 	/*
925 	 * Destroy the directory entry.
926 	 * Note: the inode referred by it will not be destroyed
927 	 * until the vnode is reclaimed.
928 	 */
929 	tmpfs_free_dirent(tmp, de);
930 	KASSERT(TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir) == NULL);
931 
932 	KASSERT(node->tn_links == 0);
933 out:
934 	pool_put(&namei_pool, cnp->cn_pnbuf);
935 	/* Release the nodes. */
936 	vput(dvp);
937 	vput(vp);
938 	return error;
939 }
940 
941 int
942 tmpfs_symlink(void *v)
943 {
944 	struct vop_symlink_args /* {
945 		struct vnode		*a_dvp;
946 		struct vnode		**a_vpp;
947 		struct componentname	*a_cnp;
948 		struct vattr		*a_vap;
949 		char			*a_target;
950 	} */ *ap = v;
951 	struct vnode *dvp = ap->a_dvp;
952 	struct vnode **vpp = ap->a_vpp;
953 	struct componentname *cnp = ap->a_cnp;
954 	struct vattr *vap = ap->a_vap;
955 	char *target = ap->a_target;
956 	int error;
957 
958 	KASSERT(vap->va_type == 0);
959 	vap->va_type = VLNK;
960 
961 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
962 	if (error == 0)
963 		vput(*vpp);
964 
965 	return error;
966 }
967 
968 int
969 tmpfs_readdir(void *v)
970 {
971 	struct vop_readdir_args /* {
972 		struct vnode	*a_vp;
973 		struct uio	*a_uio;
974 		kauth_cred_t	a_cred;
975 		int		*a_eofflag;
976 	} */ *ap = v;
977 	struct vnode *vp = ap->a_vp;
978 	struct uio *uio = ap->a_uio;
979 	int *eofflag = ap->a_eofflag;
980 	tmpfs_node_t *node;
981 	int error;
982 
983 	KASSERT(VOP_ISLOCKED(vp));
984 
985 	/* This operation only makes sense on directory nodes. */
986 	if (vp->v_type != VDIR) {
987 		return ENOTDIR;
988 	}
989 	node = VP_TO_TMPFS_DIR(vp);
990 	/*
991 	 * Retrieve the directory entries, unless it is being destroyed.
992 	 */
993 	if (node->tn_links) {
994 		error = tmpfs_dir_getdents(node, uio);
995 	} else {
996 		error = 0;
997 	}
998 
999 	if (eofflag != NULL) {
1000 		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
1001 	}
1002 	return error;
1003 }
1004 
1005 int
1006 tmpfs_readlink(void *v)
1007 {
1008 	struct vop_readlink_args /* {
1009 		struct vnode	*a_vp;
1010 		struct uio	*a_uio;
1011 		kauth_cred_t	a_cred;
1012 	} */ *ap = v;
1013 	struct vnode *vp = ap->a_vp;
1014 	struct uio *uio = ap->a_uio;
1015 	tmpfs_node_t *node;
1016 	int error;
1017 
1018 	KASSERT(VOP_ISLOCKED(vp));
1019 	KASSERT(uio->uio_offset == 0);
1020 	KASSERT(vp->v_type == VLNK);
1021 
1022 	node = VP_TO_TMPFS_NODE(vp);
1023 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
1024 	    MIN((size_t)node->tn_size, uio->uio_resid), uio);
1025 
1026 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
1027 		tmpfs_update(node, TMPFS_NODE_ACCESSED);
1028 
1029 	return error;
1030 }
1031 
1032 int
1033 tmpfs_inactive(void *v)
1034 {
1035 	struct vop_inactive_args /* {
1036 		struct vnode *a_vp;
1037 		int *a_recycle;
1038 	} */ *ap = v;
1039 	struct vnode *vp = ap->a_vp;
1040 	tmpfs_node_t *node;
1041 
1042 	KASSERT(VOP_ISLOCKED(vp));
1043 
1044 	node = VP_TO_TMPFS_NODE(vp);
1045 
1046 	if (vp->v_type == VREG && tmpfs_uio_cached(node))
1047 		tmpfs_uio_uncache(node);
1048 
1049 	VOP_UNLOCK(vp, curproc);
1050 
1051 	/*
1052 	 * If we are done with the node, reclaim it so that it can be reused
1053 	 * immediately.
1054 	 */
1055 	if (node->tn_links == 0)
1056 		vrecycle(vp, curproc);
1057 
1058 	return 0;
1059 }
1060 
1061 int
1062 tmpfs_reclaim(void *v)
1063 {
1064 	struct vop_reclaim_args /* {
1065 		struct vnode *a_vp;
1066 	} */ *ap = v;
1067 	struct vnode *vp = ap->a_vp;
1068 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1069 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1070 	int racing;
1071 
1072 	/* Disassociate inode from vnode. */
1073 	rw_enter_write(&node->tn_nlock);
1074 	node->tn_vnode = NULL;
1075 	vp->v_data = NULL;
1076 	/* Check if tmpfs_vnode_get() is racing with us. */
1077 	racing = TMPFS_NODE_RECLAIMING(node);
1078 	rw_exit_write(&node->tn_nlock);
1079 
1080 	/*
1081 	 * If inode is not referenced, i.e. no links, then destroy it.
1082 	 * Note: if racing - inode is about to get a new vnode, leave it.
1083 	 */
1084 	if (node->tn_links == 0 && !racing) {
1085 		tmpfs_free_node(tmp, node);
1086 	}
1087 	return 0;
1088 }
1089 
1090 int
1091 tmpfs_pathconf(void *v)
1092 {
1093 	struct vop_pathconf_args /* {
1094 		struct vnode	*a_vp;
1095 		int		a_name;
1096 		register_t	*a_retval;
1097 	} */ *ap = v;
1098 	const int name = ap->a_name;
1099 	register_t *retval = ap->a_retval;
1100 	int error = 0;
1101 
1102 	switch (name) {
1103 	case _PC_LINK_MAX:
1104 		*retval = LINK_MAX;
1105 		break;
1106 	case _PC_NAME_MAX:
1107 		*retval = TMPFS_MAXNAMLEN;
1108 		break;
1109 	case _PC_CHOWN_RESTRICTED:
1110 		*retval = 1;
1111 		break;
1112 	case _PC_NO_TRUNC:
1113 		*retval = 1;
1114 		break;
1115 	case _PC_FILESIZEBITS:
1116 		*retval = 64;
1117 		break;
1118 	case _PC_TIMESTAMP_RESOLUTION:
1119 		*retval = 1;
1120 		break;
1121 	default:
1122 		error = EINVAL;
1123 	}
1124 	return error;
1125 }
1126 
1127 int
1128 tmpfs_advlock(void *v)
1129 {
1130 	struct vop_advlock_args /* {
1131 		struct vnode	*a_vp;
1132 		void *		a_id;
1133 		int		a_op;
1134 		struct flock	*a_fl;
1135 		int		a_flags;
1136 	} */ *ap = v;
1137 	struct vnode *vp = ap->a_vp;
1138 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1139 
1140 	return lf_advlock(&node->tn_lockf, node->tn_size, ap->a_id, ap->a_op,
1141 	    ap->a_fl, ap->a_flags);
1142 }
1143 
1144 int
1145 tmpfs_print(void *v)
1146 {
1147 	struct vop_print_args /* {
1148 		struct vnode	*a_vp;
1149 	} */ *ap = v;
1150 	struct vnode *vp = ap->a_vp;
1151 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1152 
1153 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1154 	    "\tmode 0%o, owner %d, group %d, size %lld",
1155 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1156 	    node->tn_gid, node->tn_size);
1157 #ifdef FIFO
1158 	if (vp->v_type == VFIFO)
1159 		fifo_printinfo(vp);
1160 #endif
1161 	printf("\n");
1162 	return 0;
1163 }
1164 
1165 /* a null op */
1166 int
1167 tmpfs_bwrite(void *v)
1168 {
1169 	return 0;
1170 }
1171 
1172 int
1173 tmpfs_poll(void *v)
1174 {
1175 	struct vop_poll_args *ap = v;
1176 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1177 }
1178 
1179 int
1180 tmpfs_strategy(void *v)
1181 {
1182 	return EOPNOTSUPP;
1183 }
1184 
1185 int
1186 tmpfs_ioctl(void *v)
1187 {
1188 	return ENOTTY;
1189 }
1190 
1191 int
1192 tmpfs_lock(void *v)
1193 {
1194 	struct vop_lock_args *ap = v;
1195 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1196 
1197 	return rrw_enter(&tnp->tn_vlock, ap->a_flags & LK_RWFLAGS);
1198 }
1199 
1200 int
1201 tmpfs_unlock(void *v)
1202 {
1203 	struct vop_unlock_args *ap = v;
1204 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1205 
1206 	rrw_exit(&tnp->tn_vlock);
1207 	return 0;
1208 }
1209 
1210 int
1211 tmpfs_islocked(void *v)
1212 {
1213 	struct vop_islocked_args *ap = v;
1214 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1215 
1216 	return rrw_status(&tnp->tn_vlock);
1217 }
1218 
1219 /*
1220  * tmpfs_rename: rename routine, the hairiest system call, with the
1221  * insane API.
1222  *
1223  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
1224  * and tvp (to-leaf), if exists (NULL if not).
1225  *
1226  * => Caller holds a reference on fdvp and fvp, they are unlocked.
1227  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
1228  *
1229  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
1230  *    to release the references and unlock them (or destroy).
1231  */
1232 
1233 /*
1234  * First, some forward declarations of subroutines.
1235  */
1236 
1237 int tmpfs_sane_rename(struct vnode *, struct componentname *,
1238     struct vnode *, struct componentname *, struct ucred *, int);
1239 int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
1240     struct ucred *,
1241     struct vnode *, struct tmpfs_node *, struct componentname *,
1242     struct tmpfs_dirent **, struct vnode **,
1243     struct vnode *, struct tmpfs_node *, struct componentname *,
1244     struct tmpfs_dirent **, struct vnode **);
1245 int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
1246     struct ucred *,
1247     struct vnode *, struct tmpfs_node *,
1248     struct componentname *, struct tmpfs_dirent **, struct vnode **,
1249     struct componentname *, struct tmpfs_dirent **, struct vnode **);
1250 int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
1251     struct ucred *,
1252     struct vnode *, struct tmpfs_node *, struct componentname *,
1253     struct tmpfs_dirent **, struct vnode **,
1254     struct vnode *, struct tmpfs_node *, struct componentname *,
1255     struct tmpfs_dirent **, struct vnode **);
1256 void tmpfs_rename_exit(struct tmpfs_mount *,
1257     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
1258 int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
1259 int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
1260     struct tmpfs_node **);
1261 int tmpfs_rename_lock(struct mount *, struct ucred *, int,
1262     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1263     struct tmpfs_dirent **, struct vnode **,
1264     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1265     struct tmpfs_dirent **, struct vnode **);
1266 void tmpfs_rename_attachdetach(struct tmpfs_mount *,
1267     struct vnode *, struct tmpfs_dirent *, struct vnode *,
1268     struct vnode *, struct tmpfs_dirent *, struct vnode *);
1269 int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
1270     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, struct ucred *);
1271 int tmpfs_rename_check_possible(struct tmpfs_node *,
1272     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
1273 int tmpfs_rename_check_permitted(struct ucred *,
1274     struct tmpfs_node *, struct tmpfs_node *,
1275     struct tmpfs_node *, struct tmpfs_node *);
1276 int tmpfs_remove_check_possible(struct tmpfs_node *,
1277     struct tmpfs_node *);
1278 int tmpfs_remove_check_permitted(struct ucred *,
1279     struct tmpfs_node *, struct tmpfs_node *);
1280 int tmpfs_check_sticky(struct ucred *,
1281     struct tmpfs_node *, struct tmpfs_node *);
1282 void tmpfs_rename_cache_purge(struct vnode *, struct vnode *, struct vnode *,
1283     struct vnode *);
1284 void tmpfs_rename_abort(void *);
1285 
1286 int
1287 tmpfs_rename(void *v)
1288 {
1289 	struct vop_rename_args  /* {
1290 		struct vnode		*a_fdvp;
1291 		struct vnode		*a_fvp;
1292 		struct componentname	*a_fcnp;
1293 		struct vnode		*a_tdvp;
1294 		struct vnode		*a_tvp;
1295 		struct componentname	*a_tcnp;
1296 	} */ *ap = v;
1297 	struct vnode *fdvp = ap->a_fdvp;
1298 	struct vnode *fvp = ap->a_fvp;
1299 	struct componentname *fcnp = ap->a_fcnp;
1300 	struct vnode *tdvp = ap->a_tdvp;
1301 	struct vnode *tvp = ap->a_tvp;
1302 	struct componentname *tcnp = ap->a_tcnp;
1303 	struct ucred *cred;
1304 	int error;
1305 
1306 	KASSERT(fdvp != NULL);
1307 	KASSERT(fvp != NULL);
1308 	KASSERT(fcnp != NULL);
1309 	KASSERT(fcnp->cn_nameptr != NULL);
1310 	KASSERT(tdvp != NULL);
1311 	KASSERT(tcnp != NULL);
1312 	KASSERT(fcnp->cn_nameptr != NULL);
1313 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1314 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
1315 	KASSERT(fdvp->v_type == VDIR);
1316 	KASSERT(tdvp->v_type == VDIR);
1317 	KASSERT(fcnp->cn_flags & HASBUF);
1318 	KASSERT(tcnp->cn_flags & HASBUF);
1319 
1320 	cred = fcnp->cn_cred;
1321 	KASSERT(tcnp->cn_cred == cred);
1322 
1323 	/*
1324 	 * Check for cross-device rename.
1325 	 */
1326 	if (fvp->v_mount != tdvp->v_mount ||
1327 	    (tvp != NULL && (fvp->v_mount != tvp->v_mount))) {
1328 	    	tmpfs_rename_abort(v);
1329 		return EXDEV;
1330 	}
1331 
1332 	/*
1333 	 * Can't check the locks on these until we know they're on
1334 	 * the same FS, as not all FS do locking the same way.
1335 	 */
1336 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1337 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1338 
1339 	/*
1340 	 * Reject renaming '.' and '..'.
1341 	 */
1342 	if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1343 	    (fcnp->cn_namelen == 2 && fcnp->cn_nameptr[0] == '.' &&
1344 	    fcnp->cn_nameptr[1] == '.')) {
1345 		tmpfs_rename_abort(v);
1346 		return EINVAL;
1347 	}
1348 
1349 	/*
1350 	 * Sanitize our world from the VFS insanity.  Unlock the target
1351 	 * directory and node, which are locked.  Release the children,
1352 	 * which are referenced.  Check for rename("x", "y/."), which
1353 	 * it is our responsibility to reject, not the caller's.  (But
1354 	 * the caller does reject rename("x/.", "y").  Go figure.)
1355 	 */
1356 
1357 	VOP_UNLOCK(tdvp, curproc);
1358 	if ((tvp != NULL) && (tvp != tdvp))
1359 		VOP_UNLOCK(tvp, curproc);
1360 
1361 	vrele(fvp);
1362 	if (tvp != NULL)
1363 		vrele(tvp);
1364 
1365 	if (tvp == tdvp) {
1366 		error = EINVAL;
1367 		goto out;
1368 	}
1369 
1370 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, 0);
1371 
1372 out:	/*
1373 	 * All done, whether with success or failure.  Release the
1374 	 * directory nodes now, as the caller expects from the VFS
1375 	 * protocol.
1376 	 */
1377 	vrele(fdvp);
1378 	vrele(tdvp);
1379 
1380 	return error;
1381 }
1382 
1383 /*
1384  * tmpfs_sane_rename: rename routine, the hairiest system call, with
1385  * the sane API.
1386  *
1387  * Arguments:
1388  *
1389  * . fdvp (from directory vnode),
1390  * . fcnp (from component name),
1391  * . tdvp (to directory vnode), and
1392  * . tcnp (to component name).
1393  *
1394  * fdvp and tdvp must be referenced and unlocked.
1395  */
1396 int
1397 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
1398     struct vnode *tdvp, struct componentname *tcnp, struct ucred *cred,
1399     int posixly_correct)
1400 {
1401 	struct mount *mount;
1402 	struct tmpfs_mount *tmpfs;
1403 	struct tmpfs_node *fdnode, *tdnode;
1404 	struct tmpfs_dirent *fde, *tde;
1405 	struct vnode *fvp, *tvp;
1406 	char *newname;
1407 	int error;
1408 
1409 	KASSERT(fdvp != NULL);
1410 	KASSERT(fcnp != NULL);
1411 	KASSERT(tdvp != NULL);
1412 	KASSERT(tcnp != NULL);
1413 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1414 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1415 	KASSERT(fdvp->v_type == VDIR);
1416 	KASSERT(tdvp->v_type == VDIR);
1417 	KASSERT(fdvp->v_mount == tdvp->v_mount);
1418 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
1419 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
1420 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
1421 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
1422 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
1423 	    (fcnp->cn_nameptr[1] != '.'));
1424 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
1425 	    (tcnp->cn_nameptr[1] != '.'));
1426 
1427 	/*
1428 	 * Pull out the tmpfs data structures.
1429 	 */
1430 	fdnode = VP_TO_TMPFS_NODE(fdvp);
1431 	tdnode = VP_TO_TMPFS_NODE(tdvp);
1432 	KASSERT(fdnode != NULL);
1433 	KASSERT(tdnode != NULL);
1434 	KASSERT(fdnode->tn_vnode == fdvp);
1435 	KASSERT(tdnode->tn_vnode == tdvp);
1436 	KASSERT(fdnode->tn_type == VDIR);
1437 	KASSERT(tdnode->tn_type == VDIR);
1438 
1439 	mount = fdvp->v_mount;
1440 	KASSERT(mount != NULL);
1441 	KASSERT(mount == tdvp->v_mount);
1442 	/* XXX How can we be sure this stays true?  (Not that you're
1443 	 * likely to mount a tmpfs read-only...)  */
1444 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
1445 	tmpfs = VFS_TO_TMPFS(mount);
1446 	KASSERT(tmpfs != NULL);
1447 
1448 	/*
1449 	 * Decide whether we need a new name, and allocate memory for
1450 	 * it if so.  Do this before locking anything or taking
1451 	 * destructive actions so that we can back out safely and sleep
1452 	 * safely.  XXX Is sleeping an issue here?  Can this just be
1453 	 * moved into tmpfs_rename_attachdetach?
1454 	 */
1455 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
1456 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
1457 		if (newname == NULL) {
1458 			error = ENOSPC;
1459 			goto out_unlocked;
1460 		}
1461 	} else {
1462 		newname = NULL;
1463 	}
1464 
1465 	/*
1466 	 * Lock and look up everything.  GCC is not very clever.
1467 	 */
1468 	fde = tde = NULL;
1469 	fvp = tvp = NULL;
1470 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1471 	    fdvp, fdnode, fcnp, &fde, &fvp,
1472 	    tdvp, tdnode, tcnp, &tde, &tvp);
1473 	if (error)
1474 		goto out_unlocked;
1475 
1476 	/*
1477 	 * Check that everything is locked and looks right.
1478 	 */
1479 	KASSERT(fde != NULL);
1480 	KASSERT(fvp != NULL);
1481 	KASSERT(fde->td_node != NULL);
1482 	KASSERT(fde->td_node->tn_vnode == fvp);
1483 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1484 	KASSERT((tde == NULL) == (tvp == NULL));
1485 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1486 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1487 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1488 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1489 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1490 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1491 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1492 
1493 	/*
1494 	 * If the source and destination are the same object, we need
1495 	 * only at most delete the source entry.
1496 	 */
1497 	if (fvp == tvp) {
1498 		KASSERT(tvp != NULL);
1499 		if (fde->td_node->tn_type == VDIR) {
1500 			/* XXX How can this possibly happen?  */
1501 			error = EINVAL;
1502 			goto out_locked;
1503 		}
1504 		if (!posixly_correct && (fde != tde)) {
1505 			/* XXX Doesn't work because of locking.
1506 			 * error = VOP_REMOVE(fdvp, fvp);
1507 			 */
1508 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1509 			    cred);
1510 			if (error)
1511 				goto out_locked;
1512 		}
1513 		goto success;
1514 	}
1515 	KASSERT(fde != tde);
1516 	KASSERT(fvp != tvp);
1517 
1518 	/*
1519 	 * If the target exists, refuse to rename a directory over a
1520 	 * non-directory or vice versa, or to clobber a non-empty
1521 	 * directory.
1522 	 */
1523 	if (tvp != NULL) {
1524 		KASSERT(tde != NULL);
1525 		KASSERT(tde->td_node != NULL);
1526 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1527 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1528 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1529 			error = ENOTDIR;
1530 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1531 			error = EISDIR;
1532 		else
1533 			error = 0;
1534 		if (error)
1535 			goto out_locked;
1536 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1537 	}
1538 
1539 	/*
1540 	 * Authorize the rename.
1541 	 */
1542 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1543 	    tdnode, (tde? tde->td_node : NULL));
1544 	if (error)
1545 		goto out_locked;
1546 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1547 	    tdnode, (tde? tde->td_node : NULL));
1548 	if (error)
1549 		goto out_locked;
1550 
1551 	/*
1552 	 * Everything is hunky-dory.  Shuffle the directory entries.
1553 	 */
1554 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1555 
1556 	/*
1557 	 * Update the directory entry's name necessary, and flag
1558 	 * metadata updates.  A memory allocation failure here is not
1559 	 * OK because we've already committed some changes that we
1560 	 * can't back out at this point, and we have things locked so
1561 	 * we can't sleep, hence the early allocation above.
1562 	 */
1563 	if (newname != NULL) {
1564 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1565 
1566 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1567 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1568 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1569 		/* Commit newname and don't free it on the way out.  */
1570 		fde->td_name = newname;
1571 		newname = NULL;
1572 
1573 		tmpfs_update(fde->td_node, TMPFS_NODE_CHANGED);
1574 		tmpfs_update(tdnode, TMPFS_NODE_MODIFIED);
1575 	}
1576 
1577 success:
1578 	VN_KNOTE(fvp, NOTE_RENAME);
1579 	tmpfs_rename_cache_purge(fdvp, fvp, tdvp, tvp);
1580 	error = 0;
1581 
1582 out_locked:
1583 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1584 
1585 out_unlocked:
1586 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1587 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1588 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1589 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1590 
1591 	if (newname != NULL)
1592 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1593 
1594 	return error;
1595 }
1596 
1597 /*
1598  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1599  * and the associated vnode in fvp_ret; fail if not found.  Look up
1600  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1601  * associated vnode in tvp_ret; store null instead if not found.  Fail
1602  * if anything has been mounted on any of the nodes involved.
1603  *
1604  * fdvp and tdvp must be referenced.
1605  *
1606  * On entry, nothing is locked.
1607  *
1608  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1609  * nonnull, are referenced.  The only pairs of vnodes that may be
1610  * identical are {fdvp, tdvp} and {fvp, tvp}.
1611  *
1612  * On failure, everything remains as was.
1613  *
1614  * Locking everything including the source and target nodes is
1615  * necessary to make sure that, e.g., link count updates are OK.  The
1616  * locking order is, in general, ancestor-first, matching the order you
1617  * need to use to look up a descendant anyway.
1618  */
1619 int
1620 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1621     struct ucred *cred,
1622     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1623     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1624     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1625     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1626 {
1627 	int error;
1628 
1629 	KASSERT(mount != NULL);
1630 	KASSERT(tmpfs != NULL);
1631 	KASSERT(fdvp != NULL);
1632 	KASSERT(fdnode != NULL);
1633 	KASSERT(fcnp != NULL);
1634 	KASSERT(fde_ret != NULL);
1635 	KASSERT(fvp_ret != NULL);
1636 	KASSERT(tdvp != NULL);
1637 	KASSERT(tdnode != NULL);
1638 	KASSERT(tcnp != NULL);
1639 	KASSERT(tde_ret != NULL);
1640 	KASSERT(tvp_ret != NULL);
1641 	KASSERT(fdnode->tn_vnode == fdvp);
1642 	KASSERT(tdnode->tn_vnode == tdvp);
1643 	KASSERT(fdnode->tn_type == VDIR);
1644 	KASSERT(tdnode->tn_type == VDIR);
1645 
1646 	if (fdvp == tdvp) {
1647 		KASSERT(fdnode == tdnode);
1648 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1649 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1650 	} else {
1651 		KASSERT(fdnode != tdnode);
1652 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1653 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1654 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1655 	}
1656 
1657 	if (error)
1658 		return error;
1659 
1660 	KASSERT(*fde_ret != NULL);
1661 	KASSERT(*fvp_ret != NULL);
1662 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1663 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1664 	KASSERT((*tde_ret == NULL) ||
1665 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1666 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1667 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1668 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1669 	KASSERT((*tvp_ret == NULL) ||
1670 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1671 	KASSERT(*fvp_ret != fdvp);
1672 	KASSERT(*fvp_ret != tdvp);
1673 	KASSERT(*tvp_ret != fdvp);
1674 	KASSERT(*tvp_ret != tdvp);
1675 	return 0;
1676 }
1677 
1678 /*
1679  * Lock and look up with a common source/target directory.
1680  */
1681 int
1682 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1683     struct ucred *cred,
1684     struct vnode *dvp, struct tmpfs_node *dnode,
1685     struct componentname *fcnp,
1686     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1687     struct componentname *tcnp,
1688     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1689 {
1690 	struct tmpfs_dirent *fde, *tde;
1691 	struct vnode *fvp, *tvp;
1692 	int error;
1693 
1694 	error = tmpfs_rename_lock_directory(dvp, dnode);
1695 	if (error)
1696 		goto fail0;
1697 
1698 	/* Did we lose a race with mount?  */
1699 	if (dvp->v_mountedhere != NULL) {
1700 		error = EBUSY;
1701 		goto fail1;
1702 	}
1703 
1704 	/* Make sure the caller may read the directory.  */
1705 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
1706 	if (error)
1707 		goto fail1;
1708 
1709 	/*
1710 	 * The order in which we lock the source and target nodes is
1711 	 * irrelevant because there can only be one rename on this
1712 	 * directory in flight at a time, and we have it locked.
1713 	 */
1714 
1715 	fde = tmpfs_dir_lookup(dnode, fcnp);
1716 	if (fde == NULL) {
1717 		error = ENOENT;
1718 		goto fail1;
1719 	}
1720 
1721 	KASSERT(fde->td_node != NULL);
1722 	/* We ruled out `.' earlier.  */
1723 	KASSERT(fde->td_node != dnode);
1724 	/* We ruled out `..' earlier.  */
1725 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1726 	rw_enter_write(&fde->td_node->tn_nlock);
1727 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1728 	if (error)
1729 		goto fail1;
1730 	KASSERT(fvp != NULL);
1731 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1732 	KASSERT(fvp != dvp);
1733 	KASSERT(fvp->v_mount == mount);
1734 
1735 	/* Refuse to rename a mount point.  */
1736 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1737 		error = EBUSY;
1738 		goto fail2;
1739 	}
1740 
1741 	tde = tmpfs_dir_lookup(dnode, tcnp);
1742 	if (tde == NULL) {
1743 		tvp = NULL;
1744 	} else {
1745 		KASSERT(tde->td_node != NULL);
1746 		/* We ruled out `.' earlier.  */
1747 		KASSERT(tde->td_node != dnode);
1748 		/* We ruled out `..' earlier.  */
1749 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1750 		if (tde->td_node != fde->td_node) {
1751 			rw_enter_write(&tde->td_node->tn_nlock);
1752 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1753 			if (error)
1754 				goto fail2;
1755 			KASSERT(tvp->v_mount == mount);
1756 			/* Refuse to rename over a mount point.  */
1757 			if ((tvp->v_type == VDIR) &&
1758 			    (tvp->v_mountedhere != NULL)) {
1759 				error = EBUSY;
1760 				goto fail3;
1761 			}
1762 		} else {
1763 			tvp = fvp;
1764 			vref(tvp);
1765 		}
1766 		KASSERT(tvp != NULL);
1767 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1768 	}
1769 	KASSERT(tvp != dvp);
1770 
1771 	*fde_ret = fde;
1772 	*fvp_ret = fvp;
1773 	*tde_ret = tde;
1774 	*tvp_ret = tvp;
1775 	return 0;
1776 
1777 fail3:	if (tvp != NULL) {
1778 		if (tvp != fvp)
1779 			vput(tvp);
1780 		else
1781 			vrele(tvp);
1782 	}
1783 
1784 fail2:	vput(fvp);
1785 fail1:	VOP_UNLOCK(dvp, curproc);
1786 fail0:	return error;
1787 }
1788 
1789 /*
1790  * Lock and look up with separate source and target directories.
1791  */
1792 int
1793 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1794     struct ucred *cred,
1795     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1796     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1797     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1798     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1799 {
1800 	struct tmpfs_node *intermediate_node;
1801 	struct tmpfs_dirent *fde, *tde;
1802 	struct vnode *fvp, *tvp;
1803 	int error;
1804 
1805 	KASSERT(fdvp != tdvp);
1806 	KASSERT(fdnode != tdnode);
1807 
1808 #if 0				/* XXX */
1809 	mutex_enter(&tmpfs->tm_rename_lock);
1810 #endif
1811 
1812 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1813 	if (error)
1814 		goto fail;
1815 
1816 	/*
1817 	 * intermediate_node == NULL means fdnode is not an ancestor of
1818 	 * tdnode.
1819 	 */
1820 	if (intermediate_node == NULL)
1821 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1822 		    tdvp, tdnode, tcnp, 1, &tde, &tvp,
1823 		    fdvp, fdnode, fcnp, 0, &fde, &fvp);
1824 	else
1825 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1826 		    fdvp, fdnode, fcnp, 0, &fde, &fvp,
1827 		    tdvp, tdnode, tcnp, 1, &tde, &tvp);
1828 	if (error)
1829 		goto fail;
1830 
1831 	KASSERT(fde != NULL);
1832 	KASSERT(fde->td_node != NULL);
1833 
1834 	/*
1835 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1836 	 */
1837 	if (fde->td_node == intermediate_node) {
1838 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1839 		return EINVAL;
1840 	}
1841 
1842 	*fde_ret = fde;
1843 	*fvp_ret = fvp;
1844 	*tde_ret = tde;
1845 	*tvp_ret = tvp;
1846 	return 0;
1847 
1848 fail:
1849 #if 0				/* XXX */
1850 	mutex_exit(&tmpfs->tm_rename_lock);
1851 #endif
1852 	return error;
1853 }
1854 
1855 /*
1856  * Unlock everything we locked for rename.
1857  *
1858  * fdvp and tdvp must be referenced.
1859  *
1860  * On entry, everything is locked, and fvp and tvp referenced.
1861  *
1862  * On exit, everything is unlocked, and fvp and tvp are released.
1863  */
1864 void
1865 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1866     struct vnode *fdvp, struct vnode *fvp,
1867     struct vnode *tdvp, struct vnode *tvp)
1868 {
1869 
1870 	KASSERT(tmpfs != NULL);
1871 	KASSERT(fdvp != NULL);
1872 	KASSERT(fvp != NULL);
1873 	KASSERT(fdvp != fvp);
1874 	KASSERT(fdvp != tvp);
1875 	KASSERT(tdvp != tvp);
1876 	KASSERT(tdvp != fvp);
1877 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1878 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1879 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1880 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1881 
1882 	if (tvp != NULL) {
1883 		if (tvp != fvp)
1884 			vput(tvp);
1885 		else
1886 			vrele(tvp);
1887 	}
1888 	VOP_UNLOCK(tdvp, curproc);
1889 	vput(fvp);
1890 	if (fdvp != tdvp)
1891 		VOP_UNLOCK(fdvp, curproc);
1892 
1893 #if 0				/* XXX */
1894 	if (fdvp != tdvp)
1895 		mutex_exit(&tmpfs->tm_rename_lock);
1896 #endif
1897 }
1898 
1899 /*
1900  * Lock a directory, but fail if it has been rmdir'd.
1901  *
1902  * vp must be referenced.
1903  */
1904 int
1905 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1906 {
1907 
1908 	KASSERT(vp != NULL);
1909 	KASSERT(node != NULL);
1910 	KASSERT(node->tn_vnode == vp);
1911 	KASSERT(node->tn_type == VDIR);
1912 
1913 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
1914 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1915 		VOP_UNLOCK(vp, curproc);
1916 		return ENOENT;
1917 	}
1918 
1919 	return 0;
1920 }
1921 
1922 /*
1923  * Analyze the genealogy of the source and target nodes.
1924  *
1925  * On success, stores in *intermediate_node_ret either the child of
1926  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1927  * descendant of fdnode at all.
1928  *
1929  * fdnode and tdnode must be unlocked and referenced.  The file
1930  * system's rename lock must also be held, to exclude concurrent
1931  * changes to the file system's genealogy other than rmdir.
1932  *
1933  * XXX This causes an extra lock/unlock of tdnode in the case when
1934  * we're just about to lock it again before locking anything else.
1935  * However, changing that requires reorganizing the code to make it
1936  * even more horrifically obscure.
1937  */
1938 int
1939 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1940     struct tmpfs_node **intermediate_node_ret)
1941 {
1942 	struct tmpfs_node *node = tdnode, *parent;
1943 	int error;
1944 
1945 	KASSERT(fdnode != NULL);
1946 	KASSERT(tdnode != NULL);
1947 	KASSERT(fdnode != tdnode);
1948 	KASSERT(intermediate_node_ret != NULL);
1949 
1950 	KASSERT(fdnode->tn_vnode != NULL);
1951 	KASSERT(tdnode->tn_vnode != NULL);
1952 	KASSERT(fdnode->tn_type == VDIR);
1953 	KASSERT(tdnode->tn_type == VDIR);
1954 
1955 	/*
1956 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1957 	 * from deleting it -- or any ancestor -- at an inopportune
1958 	 * moment.
1959 	 */
1960 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1961 	if (error)
1962 		return error;
1963 
1964 	for (;;) {
1965 		parent = node->tn_spec.tn_dir.tn_parent;
1966 		KASSERT(parent != NULL);
1967 		KASSERT(parent->tn_type == VDIR);
1968 
1969 		/* Did we hit the root without finding fdnode?  */
1970 		if (parent == node) {
1971 			*intermediate_node_ret = NULL;
1972 			break;
1973 		}
1974 
1975 		/* Did we find that fdnode is an ancestor?  */
1976 		if (parent == fdnode) {
1977 			*intermediate_node_ret = node;
1978 			break;
1979 		}
1980 
1981 		/* Neither -- keep ascending the family tree.  */
1982 		node = parent;
1983 	}
1984 
1985 	VOP_UNLOCK(tdnode->tn_vnode, curproc);
1986 	return 0;
1987 }
1988 
1989 /*
1990  * Lock directories a and b, which must be distinct, and look up and
1991  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1992  * an ancestor of directory a, although directory a may be an ancestor
1993  * of directory b.  Fail with overlap_error if node a is directory b.
1994  * Neither componentname may be `.' or `..'.
1995  *
1996  * a_dvp and b_dvp must be referenced.
1997  *
1998  * On entry, a_dvp and b_dvp are unlocked.
1999  *
2000  * On success,
2001  * . a_dvp and b_dvp are locked,
2002  * . *a_dirent_ret is filled with a directory entry whose node is
2003  *     locked and referenced,
2004  * . *b_vp_ret is filled with the corresponding vnode,
2005  * . *b_dirent_ret is filled either with null or with a directory entry
2006  *     whose node is locked and referenced,
2007  * . *b_vp is filled either with null or with the corresponding vnode,
2008  *     and
2009  * . the only pair of vnodes that may be identical is a_vp and b_vp.
2010  *
2011  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
2012  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
2013  */
2014 int
2015 tmpfs_rename_lock(struct mount *mount, struct ucred *cred, int overlap_error,
2016     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
2017     struct componentname *a_cnp, int a_missing_ok,
2018     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
2019     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
2020     struct componentname *b_cnp, int b_missing_ok,
2021     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
2022 {
2023 	struct tmpfs_dirent *a_dirent, *b_dirent;
2024 	struct vnode *a_vp, *b_vp;
2025 	int error;
2026 
2027 	KASSERT(a_dvp != NULL);
2028 	KASSERT(a_dnode != NULL);
2029 	KASSERT(a_cnp != NULL);
2030 	KASSERT(a_dirent_ret != NULL);
2031 	KASSERT(a_vp_ret != NULL);
2032 	KASSERT(b_dvp != NULL);
2033 	KASSERT(b_dnode != NULL);
2034 	KASSERT(b_cnp != NULL);
2035 	KASSERT(b_dirent_ret != NULL);
2036 	KASSERT(b_vp_ret != NULL);
2037 	KASSERT(a_dvp != b_dvp);
2038 	KASSERT(a_dnode != b_dnode);
2039 	KASSERT(a_dnode->tn_vnode == a_dvp);
2040 	KASSERT(b_dnode->tn_vnode == b_dvp);
2041 	KASSERT(a_dnode->tn_type == VDIR);
2042 	KASSERT(b_dnode->tn_type == VDIR);
2043 	KASSERT(a_missing_ok != b_missing_ok);
2044 
2045 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
2046 	if (error)
2047 		goto fail0;
2048 
2049 	/* Did we lose a race with mount?  */
2050 	if (a_dvp->v_mountedhere != NULL) {
2051 		error = EBUSY;
2052 		goto fail1;
2053 	}
2054 
2055 	/* Make sure the caller may read the directory.  */
2056 	error = VOP_ACCESS(a_dvp, VEXEC, cred, curproc);
2057 	if (error)
2058 		goto fail1;
2059 
2060 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
2061 	if (a_dirent != NULL) {
2062 		KASSERT(a_dirent->td_node != NULL);
2063 		/* We ruled out `.' earlier.  */
2064 		KASSERT(a_dirent->td_node != a_dnode);
2065 		/* We ruled out `..' earlier.  */
2066 		KASSERT(a_dirent->td_node !=
2067 		    a_dnode->tn_spec.tn_dir.tn_parent);
2068 		if (a_dirent->td_node == b_dnode) {
2069 			error = overlap_error;
2070 			goto fail1;
2071 		}
2072 		rw_enter_write(&a_dirent->td_node->tn_nlock);
2073 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
2074 		if (error)
2075 			goto fail1;
2076 		KASSERT(a_vp->v_mount == mount);
2077 		/* Refuse to rename (over) a mount point.  */
2078 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
2079 			error = EBUSY;
2080 			goto fail2;
2081 		}
2082 	} else if (!a_missing_ok) {
2083 		error = ENOENT;
2084 		goto fail1;
2085 	} else {
2086 		a_vp = NULL;
2087 	}
2088 	KASSERT(a_vp != a_dvp);
2089 	KASSERT(a_vp != b_dvp);
2090 
2091 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
2092 	if (error)
2093 		goto fail2;
2094 
2095 	/* Did we lose a race with mount?  */
2096 	if (b_dvp->v_mountedhere != NULL) {
2097 		error = EBUSY;
2098 		goto fail3;
2099 	}
2100 
2101 	/* Make sure the caller may read the directory.  */
2102 	error = VOP_ACCESS(b_dvp, VEXEC, cred, curproc);
2103 	if (error)
2104 		goto fail3;
2105 
2106 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
2107 	if (b_dirent != NULL) {
2108 		KASSERT(b_dirent->td_node != NULL);
2109 		/* We ruled out `.' earlier.  */
2110 		KASSERT(b_dirent->td_node != b_dnode);
2111 		/* We ruled out `..' earlier.  */
2112 		KASSERT(b_dirent->td_node !=
2113 		    b_dnode->tn_spec.tn_dir.tn_parent);
2114 		/* b is not an ancestor of a.  */
2115 		KASSERT(b_dirent->td_node != a_dnode);
2116 		/* But the source and target nodes might be the same.  */
2117 		if ((a_dirent == NULL) ||
2118 		    (a_dirent->td_node != b_dirent->td_node)) {
2119 			rw_enter_write(&b_dirent->td_node->tn_nlock);
2120 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
2121 			    &b_vp);
2122 			if (error)
2123 				goto fail3;
2124 			KASSERT(b_vp->v_mount == mount);
2125 			KASSERT(a_vp != b_vp);
2126 			/* Refuse to rename (over) a mount point.  */
2127 			if ((b_vp->v_type == VDIR) &&
2128 			    (b_vp->v_mountedhere != NULL)) {
2129 				error = EBUSY;
2130 				goto fail4;
2131 			}
2132 		} else {
2133 			b_vp = a_vp;
2134 			vref(b_vp);
2135 		}
2136 	} else if (!b_missing_ok) {
2137 		error = ENOENT;
2138 		goto fail3;
2139 	} else {
2140 		b_vp = NULL;
2141 	}
2142 	KASSERT(b_vp != a_dvp);
2143 	KASSERT(b_vp != b_dvp);
2144 
2145 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2146 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2147 	KASSERT(a_missing_ok || (a_dirent != NULL));
2148 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
2149 	KASSERT(b_missing_ok || (b_dirent != NULL));
2150 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
2151 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
2152 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
2153 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
2154 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
2155 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
2156 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
2157 
2158 	*a_dirent_ret = a_dirent;
2159 	*b_dirent_ret = b_dirent;
2160 	*a_vp_ret = a_vp;
2161 	*b_vp_ret = b_vp;
2162 	return 0;
2163 
2164 fail4:	if (b_vp != NULL) {
2165 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
2166 		if (b_vp != a_vp)
2167 			vput(b_vp);
2168 		else
2169 			vrele(a_vp);
2170 	}
2171 
2172 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2173 	VOP_UNLOCK(b_dvp, curproc);
2174 
2175 fail2:	if (a_vp != NULL) {
2176 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
2177 		vput(a_vp);
2178 	}
2179 
2180 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2181 	VOP_UNLOCK(a_dvp, curproc);
2182 
2183 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
2184 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
2185 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
2186 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
2187 	return error;
2188 }
2189 
2190 /*
2191  * Shuffle the directory entries to move fvp from the directory fdvp
2192  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
2193  * we are overwriting a target node, it is tvp, and tde is its
2194  * directory entry in tdvp.
2195  *
2196  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
2197  */
2198 void
2199 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
2200     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
2201     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
2202 {
2203 
2204 	KASSERT(tmpfs != NULL);
2205 	KASSERT(fdvp != NULL);
2206 	KASSERT(fde != NULL);
2207 	KASSERT(fvp != NULL);
2208 	KASSERT(tdvp != NULL);
2209 	KASSERT(fde->td_node != NULL);
2210 	KASSERT(fde->td_node->tn_vnode == fvp);
2211 	KASSERT((tde == NULL) == (tvp == NULL));
2212 	KASSERT((tde == NULL) || (tde->td_node != NULL));
2213 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
2214 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
2215 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
2216 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
2217 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
2218 
2219 	/*
2220 	 * If we are moving from one directory to another, detach the
2221 	 * source entry and reattach it to the target directory.
2222 	 */
2223 	if (fdvp != tdvp) {
2224 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
2225 		struct tmpfs_node *fnode = fde->td_node;
2226 		tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp);
2227 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2228 		tmpfs_dir_detach(fdnode, fde);
2229 		tmpfs_dir_attach(tdnode, fde, fnode);
2230 	} else if (tvp == NULL) {
2231 		/*
2232 		 * We are changing the directory.  tmpfs_dir_attach and
2233 		 * tmpfs_dir_detach note the events for us, but for
2234 		 * this case we don't call them, so we must note the
2235 		 * event explicitly.
2236 		 */
2237 		VN_KNOTE(fdvp, NOTE_WRITE);
2238 	}
2239 
2240 	/*
2241 	 * If we are replacing an existing target entry, delete it.
2242 	 */
2243 	if (tde != NULL) {
2244 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2245 		KASSERT(tvp != NULL);
2246 		KASSERT(tde->td_node != NULL);
2247 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
2248 		if (tde->td_node->tn_type == VDIR) {
2249 			KASSERT(tde->td_node->tn_size == 0);
2250 			KASSERT(tde->td_node->tn_links == 2);
2251 			/* Decrement the extra link count for `.' so
2252 			 * the vnode will be recycled when released.  */
2253 			tde->td_node->tn_links--;
2254 		}
2255 		tmpfs_dir_detach(tdnode, tde);
2256 		tmpfs_free_dirent(tmpfs, tde);
2257 	}
2258 }
2259 
2260 /*
2261  * Remove the entry de for the non-directory vp from the directory dvp.
2262  *
2263  * Everything must be locked and referenced.
2264  */
2265 int
2266 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
2267     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
2268     struct ucred *cred)
2269 {
2270 	int error;
2271 
2272 	KASSERT(tmpfs != NULL);
2273 	KASSERT(dvp != NULL);
2274 	KASSERT(dnode != NULL);
2275 	KASSERT(de != NULL);
2276 	KASSERT(vp != NULL);
2277 	KASSERT(dnode->tn_vnode == dvp);
2278 	KASSERT(de->td_node != NULL);
2279 	KASSERT(de->td_node->tn_vnode == vp);
2280 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
2281 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2282 
2283 	error = tmpfs_remove_check_possible(dnode, de->td_node);
2284 	if (error)
2285 		return error;
2286 
2287 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
2288 	if (error)
2289 		return error;
2290 
2291 	/*
2292 	 * If not root and directory is sticky, check for permission on
2293 	 * directory or on file. This implements append-only directories.
2294 	 */
2295 	if ((dnode->tn_mode & S_ISTXT) != 0)
2296 		if (cred->cr_uid != 0 && cred->cr_uid != dnode->tn_uid &&
2297 		    cred->cr_uid != de->td_node->tn_uid)
2298 			return EPERM;
2299 
2300 	tmpfs_dir_detach(dnode, de);
2301 	tmpfs_free_dirent(tmpfs, de);
2302 
2303 	return 0;
2304 }
2305 
2306 /*
2307  * Check whether a rename is possible independent of credentials.
2308  *
2309  * Everything must be locked and referenced.
2310  */
2311 int
2312 tmpfs_rename_check_possible(
2313     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2314     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2315 {
2316 
2317 	KASSERT(fdnode != NULL);
2318 	KASSERT(fnode != NULL);
2319 	KASSERT(tdnode != NULL);
2320 	KASSERT(fdnode != fnode);
2321 	KASSERT(tdnode != tnode);
2322 	KASSERT(fnode != tnode);
2323 	KASSERT(fdnode->tn_vnode != NULL);
2324 	KASSERT(fnode->tn_vnode != NULL);
2325 	KASSERT(tdnode->tn_vnode != NULL);
2326 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2327 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2328 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2329 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2330 	KASSERT((tnode == NULL) ||
2331 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2332 
2333 	/*
2334 	 * If fdnode is immutable, we can't write to it.  If fdnode is
2335 	 * append-only, the only change we can make is to add entries
2336 	 * to it.  If fnode is immutable, we can't change the links to
2337 	 * it.  If fnode is append-only...well, this is what UFS does.
2338 	 */
2339 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
2340 		return EPERM;
2341 
2342 	/*
2343 	 * If tdnode is immutable, we can't write to it.  If tdnode is
2344 	 * append-only, we can add entries, but we can't change
2345 	 * existing entries.
2346 	 */
2347 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
2348 		return EPERM;
2349 
2350 	/*
2351 	 * If tnode is immutable, we can't replace links to it.  If
2352 	 * tnode is append-only...well, this is what UFS does.
2353 	 */
2354 	if (tnode != NULL) {
2355 		KASSERT(tnode != NULL);
2356 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
2357 			return EPERM;
2358 	}
2359 
2360 	return 0;
2361 }
2362 
2363 /*
2364  * Check whether a rename is permitted given our credentials.
2365  *
2366  * Everything must be locked and referenced.
2367  */
2368 int
2369 tmpfs_rename_check_permitted(struct ucred *cred,
2370     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2371     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2372 {
2373 	int error;
2374 
2375 	KASSERT(fdnode != NULL);
2376 	KASSERT(fnode != NULL);
2377 	KASSERT(tdnode != NULL);
2378 	KASSERT(fdnode != fnode);
2379 	KASSERT(tdnode != tnode);
2380 	KASSERT(fnode != tnode);
2381 	KASSERT(fdnode->tn_vnode != NULL);
2382 	KASSERT(fnode->tn_vnode != NULL);
2383 	KASSERT(tdnode->tn_vnode != NULL);
2384 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2385 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2386 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2387 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2388 	KASSERT((tnode == NULL) ||
2389 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2390 
2391 	/*
2392 	 * We need to remove or change an entry in the source directory.
2393 	 */
2394 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred, curproc);
2395 	if (error)
2396 		return error;
2397 
2398 	/*
2399 	 * If we are changing directories, then we need to write to the
2400 	 * target directory to add or change an entry.  Also, if fnode
2401 	 * is a directory, we need to write to it to change its `..'
2402 	 * entry.
2403 	 */
2404 	if (fdnode != tdnode) {
2405 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred, curproc);
2406 		if (error)
2407 			return error;
2408 		if (fnode->tn_type == VDIR) {
2409 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred,
2410 			    curproc);
2411 			if (error)
2412 				return error;
2413 		}
2414 	}
2415 
2416 	error = tmpfs_check_sticky(cred, fdnode, fnode);
2417 	if (error)
2418 		return error;
2419 
2420 	if (TMPFS_DIRSEQ_FULL(tdnode))
2421 		return (ENOSPC);
2422 
2423 	error = tmpfs_check_sticky(cred, tdnode, tnode);
2424 	if (error)
2425 		return error;
2426 
2427 	return 0;
2428 }
2429 
2430 /*
2431  * Check whether removing node's entry in dnode is possible independent
2432  * of credentials.
2433  *
2434  * Everything must be locked and referenced.
2435  */
2436 int
2437 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
2438 {
2439 
2440 	KASSERT(dnode != NULL);
2441 	KASSERT(dnode->tn_vnode != NULL);
2442 	KASSERT(node != NULL);
2443 	KASSERT(dnode != node);
2444 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2445 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2446 
2447 	/*
2448 	 * We want to delete the entry.  If dnode is immutable, we
2449 	 * can't write to it to delete the entry.  If dnode is
2450 	 * append-only, the only change we can make is to add entries,
2451 	 * so we can't delete entries.  If node is immutable, we can't
2452 	 * change the links to it, so we can't delete the entry.  If
2453 	 * node is append-only...well, this is what UFS does.
2454 	 */
2455 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
2456 		return EPERM;
2457 
2458 	return 0;
2459 }
2460 
2461 /*
2462  * Check whether removing node's entry in dnode is permitted given our
2463  * credentials.
2464  *
2465  * Everything must be locked and referenced.
2466  */
2467 int
2468 tmpfs_remove_check_permitted(struct ucred *cred,
2469     struct tmpfs_node *dnode, struct tmpfs_node *node)
2470 {
2471 	int error;
2472 
2473 	KASSERT(dnode != NULL);
2474 	KASSERT(dnode->tn_vnode != NULL);
2475 	KASSERT(node != NULL);
2476 	KASSERT(dnode != node);
2477 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2478 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2479 
2480 	/*
2481 	 * Check whether we are permitted to write to the source
2482 	 * directory in order to delete an entry from it.
2483 	 */
2484 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred, curproc);
2485 	if (error)
2486 		return error;
2487 
2488 	error = tmpfs_check_sticky(cred, dnode, node);
2489 	if (error)
2490 		return error;
2491 
2492 	return 0;
2493 }
2494 
2495 /*
2496  * Check whether we may change an entry in a sticky directory.  If the
2497  * directory is sticky, the user must own either the directory or, if
2498  * it exists, the node, in order to change the entry.
2499  *
2500  * Everything must be locked and referenced.
2501  */
2502 int
2503 tmpfs_check_sticky(struct ucred *cred,
2504     struct tmpfs_node *dnode, struct tmpfs_node *node)
2505 {
2506 
2507 	KASSERT(dnode != NULL);
2508 	KASSERT(dnode->tn_vnode != NULL);
2509 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2510 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2511 	KASSERT((node == NULL) ||
2512 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2513 
2514 	if (node == NULL)
2515 		return 0;
2516 
2517 	if (dnode->tn_mode & S_ISTXT) {
2518 		if (cred->cr_uid != 0 &&
2519 		    cred->cr_uid != dnode->tn_uid &&
2520 		    cred->cr_uid != node->tn_uid)
2521 			return EPERM;
2522 	}
2523 
2524 	return 0;
2525 }
2526 
2527 void
2528 tmpfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp,
2529     struct vnode *tdvp, struct vnode *tvp)
2530 {
2531 
2532 	KASSERT(fdvp != NULL);
2533 	KASSERT(fvp != NULL);
2534 	KASSERT(tdvp != NULL);
2535 	KASSERT(fdvp != fvp);
2536 	KASSERT(fdvp != tvp);
2537 	KASSERT(tdvp != fvp);
2538 	KASSERT(tdvp != tvp);
2539 	KASSERT(fvp != tvp);
2540 	KASSERT(fdvp->v_type == VDIR);
2541 	KASSERT(tdvp->v_type == VDIR);
2542 
2543 	/*
2544 	 * XXX What actually needs to be purged?
2545 	 */
2546 
2547 	cache_purge(fdvp);
2548 
2549 	if (fvp->v_type == VDIR)
2550 		cache_purge(fvp);
2551 
2552 	if (tdvp != fdvp)
2553 		cache_purge(tdvp);
2554 
2555 	if ((tvp != NULL) && (tvp->v_type == VDIR))
2556 		cache_purge(tvp);
2557 }
2558 
2559 void
2560 tmpfs_rename_abort(void *v)
2561 {
2562 	struct vop_rename_args *ap = v;
2563 	struct vnode *fdvp = ap->a_fdvp;
2564 	struct vnode *fvp = ap->a_fvp;
2565 	struct componentname *fcnp = ap->a_fcnp;
2566 	struct vnode *tdvp = ap->a_tdvp;
2567 	struct vnode *tvp = ap->a_tvp;
2568 	struct componentname *tcnp = ap->a_tcnp;
2569 
2570 	VOP_ABORTOP(tdvp, tcnp);
2571 	if (tdvp == tvp)
2572 		vrele(tdvp);
2573 	else
2574 		vput(tdvp);
2575 	if (tvp != NULL)
2576 		vput(tvp);
2577 	VOP_ABORTOP(fdvp, fcnp);
2578 	vrele(fdvp);
2579 	vrele(fvp);
2580 }
2581 
2582 void filt_tmpfsdetach(struct knote *kn);
2583 int filt_tmpfsread(struct knote *kn, long hint);
2584 int filt_tmpfswrite(struct knote *kn, long hint);
2585 int filt_tmpfsvnode(struct knote *kn, long hint);
2586 
2587 struct filterops tmpfsread_filtops =
2588 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsread };
2589 struct filterops tmpfswrite_filtops =
2590 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfswrite };
2591 struct filterops tmpfsvnode_filtops =
2592 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
2593 
2594 int
2595 tmpfs_kqfilter(void *v)
2596 {
2597 	struct vop_kqfilter_args *ap = v;
2598 	struct vnode *vp = ap->a_vp;
2599 	struct knote *kn = ap->a_kn;
2600 
2601 	switch (kn->kn_filter) {
2602 	case EVFILT_READ:
2603 		kn->kn_fop = &tmpfsread_filtops;
2604 		break;
2605 	case EVFILT_WRITE:
2606 		kn->kn_fop = &tmpfswrite_filtops;
2607 		break;
2608 	case EVFILT_VNODE:
2609 		kn->kn_fop = &tmpfsvnode_filtops;
2610 		break;
2611 	default:
2612 		return (EINVAL);
2613 	}
2614 
2615 	kn->kn_hook = (caddr_t)vp;
2616 
2617 	SLIST_INSERT_HEAD(&vp->v_selectinfo.si_note, kn, kn_selnext);
2618 
2619 	return (0);
2620 }
2621 
2622 void
2623 filt_tmpfsdetach(struct knote *kn)
2624 {
2625 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2626 
2627 	SLIST_REMOVE(&vp->v_selectinfo.si_note, kn, knote, kn_selnext);
2628 }
2629 
2630 int
2631 filt_tmpfsread(struct knote *kn, long hint)
2632 {
2633 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2634 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2635 
2636 	/*
2637 	 * filesystem is gone, so set the EOF flag and schedule
2638 	 * the knote for deletion.
2639 	 */
2640 	if (hint == NOTE_REVOKE) {
2641 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2642 		return (1);
2643 	}
2644 
2645 	kn->kn_data = node->tn_size - kn->kn_fp->f_offset;
2646 	if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) {
2647 		kn->kn_fflags |= NOTE_EOF;
2648 		return (1);
2649 	}
2650 
2651 	return (kn->kn_data != 0);
2652 }
2653 
2654 int
2655 filt_tmpfswrite(struct knote *kn, long hint)
2656 {
2657 	/*
2658 	 * filesystem is gone, so set the EOF flag and schedule
2659 	 * the knote for deletion.
2660 	 */
2661 	if (hint == NOTE_REVOKE) {
2662 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2663 		return (1);
2664 	}
2665 
2666 	kn->kn_data = 0;
2667 	return (1);
2668 }
2669 
2670 int
2671 filt_tmpfsvnode(struct knote *kn, long hint)
2672 {
2673 	if (kn->kn_sfflags & hint)
2674 		kn->kn_fflags |= hint;
2675 	if (hint == NOTE_REVOKE) {
2676 		kn->kn_flags |= EV_EOF;
2677 		return (1);
2678 	}
2679 	return (kn->kn_fflags != 0);
2680 }
2681