xref: /openbsd-src/sys/tmpfs/tmpfs_vnops.c (revision cb39b41371628601fbe4c618205356d538b9d08a)
1 /*	$OpenBSD: tmpfs_vnops.c,v 1.22 2015/04/17 04:43:21 guenther Exp $	*/
2 /*	$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $	*/
3 
4 /*
5  * Copyright (c) 2005, 2006, 2007, 2012 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Taylor R Campbell.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/event.h>
42 #include <sys/namei.h>
43 #include <sys/stat.h>
44 #include <sys/uio.h>
45 #include <sys/unistd.h>
46 #include <sys/vnode.h>
47 #include <sys/lockf.h>
48 #include <sys/poll.h>
49 #include <sys/file.h>
50 
51 #include <miscfs/fifofs/fifo.h>
52 #include <tmpfs/tmpfs_vnops.h>
53 #include <tmpfs/tmpfs.h>
54 
55 int tmpfs_kqfilter(void *v);
56 
57 /*
58  * vnode operations vector used for files stored in a tmpfs file system.
59  */
60 struct vops tmpfs_vops = {
61 	.vop_lookup	= tmpfs_lookup,
62 	.vop_create	= tmpfs_create,
63 	.vop_mknod	= tmpfs_mknod,
64 	.vop_open	= tmpfs_open,
65 	.vop_close	= tmpfs_close,
66 	.vop_access	= tmpfs_access,
67 	.vop_getattr	= tmpfs_getattr,
68 	.vop_setattr	= tmpfs_setattr,
69 	.vop_read	= tmpfs_read,
70 	.vop_write	= tmpfs_write,
71 	.vop_ioctl	= tmpfs_ioctl,
72 	.vop_poll	= tmpfs_poll,
73 	.vop_kqfilter	= tmpfs_kqfilter,
74 	.vop_revoke	= vop_generic_revoke,
75 	.vop_fsync	= tmpfs_fsync,
76 	.vop_remove	= tmpfs_remove,
77 	.vop_link	= tmpfs_link,
78 	.vop_rename	= tmpfs_rename,
79 	.vop_mkdir	= tmpfs_mkdir,
80 	.vop_rmdir	= tmpfs_rmdir,
81 	.vop_symlink	= tmpfs_symlink,
82 	.vop_readdir	= tmpfs_readdir,
83 	.vop_readlink	= tmpfs_readlink,
84 	.vop_abortop	= vop_generic_abortop,
85 	.vop_inactive	= tmpfs_inactive,
86 	.vop_reclaim	= tmpfs_reclaim,
87 	.vop_lock	= tmpfs_lock,
88 	.vop_unlock	= tmpfs_unlock,
89 	.vop_bmap	= vop_generic_bmap,
90 	.vop_strategy	= tmpfs_strategy,
91 	.vop_print	= tmpfs_print,
92 	.vop_islocked	= tmpfs_islocked,
93 	.vop_pathconf	= tmpfs_pathconf,
94 	.vop_advlock	= tmpfs_advlock,
95 	.vop_bwrite	= tmpfs_bwrite,
96 };
97 
98 /*
99  * tmpfs_lookup: path name traversal routine.
100  *
101  * Arguments: dvp (directory being searched), vpp (result),
102  * cnp (component name - path).
103  *
104  * => Caller holds a reference and lock on dvp.
105  * => We return looked-up vnode (vpp) locked, with a reference held.
106  */
107 int
108 tmpfs_lookup(void *v)
109 {
110 	struct vop_lookup_args /* {
111 		struct vnode *a_dvp;
112 		struct vnode **a_vpp;
113 		struct componentname *a_cnp;
114 	} */ *ap = v;
115 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
116 	struct componentname *cnp = ap->a_cnp;
117 	struct ucred *cred = cnp->cn_cred;
118 	const int lastcn = (cnp->cn_flags & ISLASTCN) != 0;
119 	const int lockparent = (cnp->cn_flags & LOCKPARENT) != 0;
120 	tmpfs_node_t *dnode, *tnode;
121 	tmpfs_dirent_t *de;
122 	int cachefound;
123 	int error;
124 
125 	KASSERT(VOP_ISLOCKED(dvp));
126 
127 	dnode = VP_TO_TMPFS_DIR(dvp);
128 	cnp->cn_flags &= ~PDIRUNLOCK;
129 	*vpp = NULL;
130 
131 	/* Check accessibility of directory. */
132 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
133 	if (error) {
134 		goto out;
135 	}
136 
137 	/*
138 	 * If requesting the last path component on a read-only file system
139 	 * with a write operation, deny it.
140 	 */
141 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
142 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
143 		error = EROFS;
144 		goto out;
145 	}
146 
147 	/*
148 	 * Avoid doing a linear scan of the directory if the requested
149 	 * directory/name couple is already in the cache.
150 	 */
151 	cachefound = cache_lookup(dvp, vpp, cnp);
152 	if (cachefound == ENOENT /* && *vpp == NULLVP */)
153 		return ENOENT; /* Negative cache hit. */
154 	else if (cachefound != -1)
155 		return 0; /* Found in cache. */
156 
157 	if (cnp->cn_flags & ISDOTDOT) {
158 		tmpfs_node_t *pnode;
159 
160 		/*
161 		 * Lookup of ".." case.
162 		 */
163 		if (lastcn) {
164 			if (cnp->cn_nameiop == RENAME) {
165 				error = EINVAL;
166 				goto out;
167 			}
168 			if (cnp->cn_nameiop == DELETE) {
169 				/* Keep the name for tmpfs_rmdir(). */
170 				cnp->cn_flags |= SAVENAME;
171 			}
172 		}
173 		KASSERT(dnode->tn_type == VDIR);
174 		pnode = dnode->tn_spec.tn_dir.tn_parent;
175 		if (pnode == NULL) {
176 			error = ENOENT;
177 			goto out;
178 		}
179 
180 		/*
181 		 * Lock the parent tn_nlock before releasing the vnode lock,
182 		 * and thus prevents parent from disappearing.
183 		 */
184 		rw_enter_write(&pnode->tn_nlock);
185 		VOP_UNLOCK(dvp, 0, curproc);
186 
187 		/*
188 		 * Get a vnode of the '..' entry and re-acquire the lock.
189 		 * Release the tn_nlock.
190 		 */
191 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
192 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
193 		goto out;
194 
195 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
196 		/*
197 		 * Lookup of "." case.
198 		 */
199 		if (lastcn && cnp->cn_nameiop == RENAME) {
200 			error = EISDIR;
201 			goto out;
202 		}
203 		vref(dvp);
204 		*vpp = dvp;
205 		error = 0;
206 		goto done;
207 	}
208 
209 	/*
210 	 * Other lookup cases: perform directory scan.
211 	 */
212 	de = tmpfs_dir_lookup(dnode, cnp);
213 	if (de == NULL) {
214 		/*
215 		 * The entry was not found in the directory.  This is valid
216 		 * if we are creating or renaming an entry and are working
217 		 * on the last component of the path name.
218 		 */
219 		if (lastcn && (cnp->cn_nameiop == CREATE ||
220 		    cnp->cn_nameiop == RENAME)) {
221 			error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
222 			if (error) {
223 				goto out;
224 			}
225 			/*
226 			 * We are creating an entry in the file system, so
227 			 * save its name for further use by tmpfs_create().
228 			 */
229 			cnp->cn_flags |= SAVENAME;
230 			error = EJUSTRETURN;
231 		} else {
232 			error = ENOENT;
233 		}
234 		goto done;
235 	}
236 
237 	tnode = de->td_node;
238 
239 	/*
240 	 * If it is not the last path component and found a non-directory
241 	 * or non-link entry (which may itself be pointing to a directory),
242 	 * raise an error.
243 	 */
244 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
245 		error = ENOTDIR;
246 		goto out;
247 	}
248 
249 	/* Check the permissions. */
250 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
251 		error = VOP_ACCESS(dvp, VWRITE, cred, curproc);
252 		if (error)
253 			goto out;
254 
255 		/*
256 		 * If not root and directory is sticky, check for permission
257 		 * on directory or on file. This implements append-only
258 		 * directories.
259 		 */
260 		if ((dnode->tn_mode & S_ISTXT) != 0) {
261 			if (cred->cr_uid != 0 &&
262 			    cred->cr_uid != dnode->tn_uid &&
263 			    cred->cr_uid != tnode->tn_uid) {
264 				error = EPERM;
265 				goto out;
266 			}
267 		}
268 
269 		/*
270 		 * XXX pedro: We might need cn_nameptr later in tmpfs_remove()
271 		 * or tmpfs_rmdir() for a tmpfs_dir_lookup(). We should really
272 		 * get rid of SAVENAME at some point.
273 		 */
274 		if (cnp->cn_nameiop == DELETE)
275 			cnp->cn_flags |= SAVENAME;
276 	}
277 
278 	/* Get a vnode for the matching entry. */
279 	rw_enter_write(&tnode->tn_nlock);
280 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
281 done:
282 	/*
283 	 * Cache the result, unless request was for creation (as it does
284 	 * not improve the performance).
285 	 */
286 	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
287 		cache_enter(dvp, *vpp, cnp);
288 	}
289 out:
290 	/*
291 	 * If (1) we succeded, (2) found a distinct vnode to return and (3) were
292 	 * either explicitely told to keep the parent locked or are in the
293 	 * middle of a lookup, unlock the parent vnode.
294 	 */
295 	if ((error == 0 || error == EJUSTRETURN) && /* (1) */
296 	    *vpp != dvp &&			    /* (2) */
297 	    (!lockparent || !lastcn)) {		    /* (3) */
298 		VOP_UNLOCK(dvp, 0, curproc);
299 		cnp->cn_flags |= PDIRUNLOCK;
300 	} else
301 		KASSERT(VOP_ISLOCKED(dvp));
302 
303 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
304 
305 	return error;
306 }
307 
308 int
309 tmpfs_create(void *v)
310 {
311 	struct vop_create_args /* {
312 		struct vnode		*a_dvp;
313 		struct vnode		**a_vpp;
314 		struct componentname	*a_cnp;
315 		struct vattr		*a_vap;
316 	} */ *ap = v;
317 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
318 	struct componentname *cnp = ap->a_cnp;
319 	struct vattr *vap = ap->a_vap;
320 
321 	KASSERT(VOP_ISLOCKED(dvp));
322 	KASSERT(cnp->cn_flags & HASBUF);
323 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
324 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
325 }
326 
327 int
328 tmpfs_mknod(void *v)
329 {
330 	struct vop_mknod_args /* {
331 		struct vnode		*a_dvp;
332 		struct vnode		**a_vpp;
333 		struct componentname	*a_cnp;
334 		struct vattr		*a_vap;
335 	} */ *ap = v;
336 	struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp;
337 	struct componentname *cnp = ap->a_cnp;
338 	struct vattr *vap = ap->a_vap;
339 	enum vtype vt = vap->va_type;
340 	int error;
341 
342 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
343 		vput(dvp);
344 		return EINVAL;
345 	}
346 
347 	/* tmpfs_alloc_file() will unlock 'dvp'. */
348 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
349 	if (error)
350 		return error;
351 
352 	vput(*vpp);
353 
354 	return 0;
355 }
356 
357 int
358 tmpfs_open(void *v)
359 {
360 	struct vop_open_args /* {
361 		struct vnode	*a_vp;
362 		int		a_mode;
363 		kauth_cred_t	a_cred;
364 	} */ *ap = v;
365 	struct vnode *vp = ap->a_vp;
366 	mode_t mode = ap->a_mode;
367 	tmpfs_node_t *node;
368 
369 	KASSERT(VOP_ISLOCKED(vp));
370 
371 	node = VP_TO_TMPFS_NODE(vp);
372 	if (node->tn_links < 1) {
373 		/*
374 		 * The file is still active, but all its names have been
375 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
376 		 * any more, as it is about to be destroyed.
377 		 */
378 		return ENOENT;
379 	}
380 
381 	/* If the file is marked append-only, deny write requests. */
382 	if ((node->tn_flags & APPEND) != 0 &&
383 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
384 		return EPERM;
385 	}
386 	return 0;
387 }
388 
389 int
390 tmpfs_close(void *v)
391 {
392 	struct vop_close_args /* {
393 		struct vnode	*a_vp;
394 		int		a_fflag;
395 		kauth_cred_t	a_cred;
396 	} */ *ap = v;
397 	struct vnode *vp = ap->a_vp;
398 
399 	KASSERT(VOP_ISLOCKED(vp));
400 
401 	return 0;
402 }
403 
404 int
405 tmpfs_access(void *v)
406 {
407 	struct vop_access_args /* {
408 		struct vnode	*a_vp;
409 		int		a_mode;
410 		kauth_cred_t	a_cred;
411 	} */ *ap = v;
412 	struct vnode *vp = ap->a_vp;
413 	mode_t mode = ap->a_mode;
414 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
415 	const int writing = (mode & VWRITE) != 0;
416 
417 	KASSERT(VOP_ISLOCKED(vp));
418 
419 	/* Possible? */
420 	switch (vp->v_type) {
421 	case VDIR:
422 	case VLNK:
423 	case VREG:
424 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
425 			return EROFS;
426 		}
427 		break;
428 	case VBLK:
429 	case VCHR:
430 	case VSOCK:
431 	case VFIFO:
432 		break;
433 	default:
434 		return EINVAL;
435 	}
436 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
437 		return EPERM;
438 	}
439 
440 	return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
441 	    mode, ap->a_cred));
442 }
443 
444 int
445 tmpfs_getattr(void *v)
446 {
447 	struct vop_getattr_args /* {
448 		struct vnode	*a_vp;
449 		struct vattr	*a_vap;
450 		kauth_cred_t	a_cred;
451 	} */ *ap = v;
452 	struct vnode *vp = ap->a_vp;
453 	struct vattr *vap = ap->a_vap;
454 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
455 
456 	vattr_null(vap);
457 
458 	vap->va_type = vp->v_type;
459 	vap->va_mode = node->tn_mode;
460 	vap->va_nlink = node->tn_links;
461 	vap->va_uid = node->tn_uid;
462 	vap->va_gid = node->tn_gid;
463 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
464 	vap->va_fileid = node->tn_id;
465 	vap->va_size = node->tn_size;
466 	vap->va_blocksize = PAGE_SIZE;
467 	vap->va_atime = node->tn_atime;
468 	vap->va_mtime = node->tn_mtime;
469 	vap->va_ctime = node->tn_ctime;
470 	/* vap->va_birthtime = node->tn_birthtime; */
471 	vap->va_gen = TMPFS_NODE_GEN(node);
472 	vap->va_flags = node->tn_flags;
473 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
474 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
475 	vap->va_bytes = round_page(node->tn_size);
476 	vap->va_filerev = VNOVAL;
477 	vap->va_vaflags = 0;
478 	vap->va_spare = VNOVAL; /* XXX */
479 
480 	return 0;
481 }
482 
483 #define GOODTIME(tv)	((tv)->tv_nsec != VNOVAL)
484 /* XXX Should this operation be atomic?  I think it should, but code in
485  * XXX other places (e.g., ufs) doesn't seem to be... */
486 int
487 tmpfs_setattr(void *v)
488 {
489 	struct vop_setattr_args /* {
490 		struct vnode	*a_vp;
491 		struct vattr	*a_vap;
492 		kauth_cred_t	a_cred;
493 	} */ *ap = v;
494 	struct vnode *vp = ap->a_vp;
495 	struct vattr *vap = ap->a_vap;
496 	struct ucred *cred = ap->a_cred;
497 	struct proc *p = curproc;
498 	int error = 0;
499 
500 	KASSERT(VOP_ISLOCKED(vp));
501 
502 	/* Abort if any unsettable attribute is given. */
503 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
504 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
505 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
506 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
507 	    vap->va_bytes != VNOVAL) {
508 		return EINVAL;
509 	}
510 	if (error == 0 && (vap->va_flags != VNOVAL))
511 		error = tmpfs_chflags(vp, vap->va_flags, cred, p);
512 
513 	if (error == 0 && (vap->va_size != VNOVAL))
514 		error = tmpfs_chsize(vp, vap->va_size, cred, p);
515 
516 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
517 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
518 
519 	if (error == 0 && (vap->va_mode != VNOVAL))
520 		error = tmpfs_chmod(vp, vap->va_mode, cred, p);
521 
522 	if (error == 0 && ((vap->va_vaflags & VA_UTIMES_CHANGE)
523 	    || GOODTIME(&vap->va_atime)
524 	    || GOODTIME(&vap->va_mtime)))
525 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
526 		    vap->va_vaflags, cred, p);
527 
528 	return error;
529 }
530 
531 int
532 tmpfs_read(void *v)
533 {
534 	struct vop_read_args /* {
535 		struct vnode *a_vp;
536 		struct uio *a_uio;
537 		int a_ioflag;
538 		struct ucred *a_cred;
539 	} */ *ap = v;
540 	struct vnode *vp = ap->a_vp;
541 	struct uio *uio = ap->a_uio;
542 	/* const int ioflag = ap->a_ioflag; */
543 	tmpfs_node_t *node;
544 	int error;
545 
546 	KASSERT(VOP_ISLOCKED(vp));
547 
548 	if (vp->v_type != VREG) {
549 		return EISDIR;
550 	}
551 	if (uio->uio_offset < 0) {
552 		return EINVAL;
553 	}
554 
555 	node = VP_TO_TMPFS_NODE(vp);
556 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
557 	error = 0;
558 
559 	while (error == 0 && uio->uio_resid > 0) {
560 		vsize_t len;
561 
562 		if (node->tn_size <= uio->uio_offset) {
563 			break;
564 		}
565 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
566 		if (len == 0) {
567 			break;
568 		}
569 		error = tmpfs_uiomove(node, uio, len);
570 	}
571 
572 	return error;
573 }
574 
575 int
576 tmpfs_write(void *v)
577 {
578 	struct vop_write_args /* {
579 		struct vnode	*a_vp;
580 		struct uio	*a_uio;
581 		int		a_ioflag;
582 		kauth_cred_t	a_cred;
583 	} */ *ap = v;
584 	struct vnode *vp = ap->a_vp;
585 	struct uio *uio = ap->a_uio;
586 	const int ioflag = ap->a_ioflag;
587 	tmpfs_node_t *node;
588 	off_t oldsize;
589 	ssize_t overrun;
590 	int extended;
591 	int error;
592 
593 	KASSERT(VOP_ISLOCKED(vp));
594 
595 	node = VP_TO_TMPFS_NODE(vp);
596 	oldsize = node->tn_size;
597 
598 	if (vp->v_type != VREG)
599 		return (EINVAL);
600 
601 	if (uio->uio_resid == 0)
602 		return (0);
603 
604 	if (ioflag & IO_APPEND) {
605 		uio->uio_offset = node->tn_size;
606 	}
607 
608 	if (uio->uio_offset < 0 ||
609 	    (u_int64_t)uio->uio_offset + uio->uio_resid > LLONG_MAX)
610 		return (EFBIG);
611 
612 	/* do the filesize rlimit check */
613 	if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
614 		return (error);
615 
616 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
617 	if (extended) {
618 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
619 		if (error)
620 			goto out;
621 	}
622 
623 	error = 0;
624 	while (error == 0 && uio->uio_resid > 0) {
625 		vsize_t len;
626 
627 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
628 		if (len == 0) {
629 			break;
630 		}
631 		error = tmpfs_uiomove(node, uio, len);
632 	}
633 	if (error) {
634 		(void)tmpfs_reg_resize(vp, oldsize);
635 	}
636 
637 	tmpfs_update(node, TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
638 	    (extended ? TMPFS_NODE_CHANGED : 0));
639 	if (extended)
640 		VN_KNOTE(vp, NOTE_WRITE | NOTE_EXTEND);
641 	else
642 		VN_KNOTE(vp, NOTE_WRITE);
643 out:
644 	if (error) {
645 		KASSERT(oldsize == node->tn_size);
646 	} else {
647 		KASSERT(uio->uio_resid == 0);
648 
649 		/* correct the result for writes clamped by vn_fsizechk() */
650 		uio->uio_resid += overrun;
651 
652 	}
653 	return error;
654 }
655 
656 int
657 tmpfs_fsync(void *v)
658 {
659 	struct vop_fsync_args /* {
660 		struct vnode *a_vp;
661 		struct ucred *a_cred;
662 		int a_flags;
663 		off_t a_offlo;
664 		off_t a_offhi;
665 		struct lwp *a_l;
666 	} */ *ap = v;
667 	struct vnode *vp = ap->a_vp;
668 
669 	/* Nothing to do.  Just update. */
670 	KASSERT(VOP_ISLOCKED(vp));
671 	return 0;
672 }
673 
674 /*
675  * tmpfs_remove: unlink a file.
676  *
677  * => Both directory (dvp) and file (vp) are locked.
678  * => We unlock and drop the reference on both.
679  */
680 int
681 tmpfs_remove(void *v)
682 {
683 	struct vop_remove_args /* {
684 		struct vnode *a_dvp;
685 		struct vnode *a_vp;
686 		struct componentname *a_cnp;
687 	} */ *ap = v;
688 	struct vnode *dvp = ap->a_dvp, *vp = ap->a_vp;
689 	struct componentname *cnp = ap->a_cnp;
690 	tmpfs_node_t *dnode, *node;
691 	tmpfs_dirent_t *de;
692 	int error;
693 
694 	KASSERT(VOP_ISLOCKED(dvp));
695 	KASSERT(VOP_ISLOCKED(vp));
696 	KASSERT(cnp->cn_flags & HASBUF);
697 
698 	if (vp->v_type == VDIR) {
699 		error = EPERM;
700 		goto out;
701 	}
702 
703 	dnode = VP_TO_TMPFS_NODE(dvp);
704 	node = VP_TO_TMPFS_NODE(vp);
705 
706 	/* Files marked as immutable or append-only cannot be deleted. */
707 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
708 		error = EPERM;
709 		goto out;
710 	}
711 
712 	/*
713 	 * Likewise, files residing on directories marked as append-only cannot
714 	 * be deleted.
715 	 */
716 	if (dnode->tn_flags & APPEND) {
717 		error = EPERM;
718 		goto out;
719 	}
720 
721 	/* Lookup the directory entry (check the cached hint first). */
722 	de = tmpfs_dir_cached(node);
723 	if (de == NULL) {
724 		de = tmpfs_dir_lookup(dnode, cnp);
725 	}
726 
727 	KASSERT(de && de->td_node == node);
728 
729 	/*
730 	 * Remove the entry from the directory (drops the link count) and
731 	 * destroy it.
732 	 * Note: the inode referred by it will not be destroyed
733 	 * until the vnode is reclaimed/recycled.
734 	 */
735 	tmpfs_dir_detach(dnode, de);
736 	tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
737 	if (node->tn_links > 0)  {
738 		/* We removed a hard link. */
739 		tmpfs_update(node, TMPFS_NODE_CHANGED);
740 	}
741 	error = 0;
742 out:
743 	pool_put(&namei_pool, cnp->cn_pnbuf);
744 	/* Drop the references and unlock the vnodes. */
745 	vput(vp);
746 	if (dvp == vp) {
747 		vrele(dvp);
748 	} else {
749 		vput(dvp);
750 	}
751 	return error;
752 }
753 
754 /*
755  * tmpfs_link: create a hard link.
756  */
757 int
758 tmpfs_link(void *v)
759 {
760 	struct vop_link_args /* {
761 		struct vnode *a_dvp;
762 		struct vnode *a_vp;
763 		struct componentname *a_cnp;
764 	} */ *ap = v;
765 	struct vnode *dvp = ap->a_dvp;
766 	struct vnode *vp = ap->a_vp;
767 	struct componentname *cnp = ap->a_cnp;
768 	tmpfs_node_t *dnode, *node;
769 	tmpfs_dirent_t *de;
770 	int error;
771 
772 	KASSERT(VOP_ISLOCKED(dvp));
773 
774 	if (vp->v_type == VDIR) {
775 		VOP_ABORTOP(dvp, cnp);
776 		vput(dvp);
777 		return EPERM;
778 	}
779 
780 	KASSERT(dvp != vp);
781 
782 	if (dvp->v_mount != vp->v_mount) {
783 		VOP_ABORTOP(dvp, cnp);
784 		vput(dvp);
785 		return EXDEV;
786 	}
787 
788 	dnode = VP_TO_TMPFS_DIR(dvp);
789 	node = VP_TO_TMPFS_NODE(vp);
790 
791 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
792 
793 	/* Check for maximum number of links limit. */
794 	if (node->tn_links == LINK_MAX) {
795 		error = EMLINK;
796 		goto out;
797 	}
798 	KASSERT(node->tn_links < LINK_MAX);
799 
800 	/* We cannot create links of files marked immutable or append-only. */
801 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
802 		error = EPERM;
803 		goto out;
804 	}
805 
806 	if (TMPFS_DIRSEQ_FULL(dnode)) {
807 		error = ENOSPC;
808 		goto out;
809 	}
810 
811 	/* Allocate a new directory entry to represent the inode. */
812 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
813 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
814 	if (error) {
815 		goto out;
816 	}
817 
818 	/*
819 	 * Insert the entry into the directory.
820 	 * It will increase the inode link count.
821 	 */
822 	tmpfs_dir_attach(dnode, de, node);
823 
824 	/* Update the timestamps and trigger the event. */
825 	if (node->tn_vnode) {
826 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
827 	}
828 	tmpfs_update(node, TMPFS_NODE_CHANGED);
829 	error = 0;
830 out:
831 	pool_put(&namei_pool, cnp->cn_pnbuf);
832 	VOP_UNLOCK(vp, 0, curproc);
833 	vput(dvp);
834 	return error;
835 }
836 
837 int
838 tmpfs_mkdir(void *v)
839 {
840 	struct vop_mkdir_args /* {
841 		struct vnode		*a_dvp;
842 		struct vnode		**a_vpp;
843 		struct componentname	*a_cnp;
844 		struct vattr		*a_vap;
845 	} */ *ap = v;
846 	struct vnode *dvp = ap->a_dvp;
847 	struct vnode **vpp = ap->a_vpp;
848 	struct componentname *cnp = ap->a_cnp;
849 	struct vattr *vap = ap->a_vap;
850 
851 	KASSERT(vap->va_type == VDIR);
852 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
853 }
854 
855 int
856 tmpfs_rmdir(void *v)
857 {
858 	struct vop_rmdir_args /* {
859 		struct vnode		*a_dvp;
860 		struct vnode		*a_vp;
861 		struct componentname	*a_cnp;
862 	} */ *ap = v;
863 	struct vnode *dvp = ap->a_dvp;
864 	struct vnode *vp = ap->a_vp;
865 	struct componentname *cnp = ap->a_cnp;
866 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
867 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
868 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
869 	tmpfs_dirent_t *de;
870 	int error = 0;
871 
872 	KASSERT(VOP_ISLOCKED(dvp));
873 	KASSERT(VOP_ISLOCKED(vp));
874 	KASSERT(cnp->cn_flags & HASBUF);
875 
876 	if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
877 	    cnp->cn_nameptr[1] == '.') {
878 		error = ENOTEMPTY;
879 		goto out;
880 	}
881 
882 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
883 
884 	/*
885 	 * Directories with more than two entries ('.' and '..') cannot be
886 	 * removed.
887 	 */
888 	if (node->tn_size > 0) {
889 		KASSERT(error == 0);
890 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
891 			error = ENOTEMPTY;
892 			break;
893 		}
894 		if (error)
895 			goto out;
896 	}
897 
898 	/* Lookup the directory entry (check the cached hint first). */
899 	de = tmpfs_dir_cached(node);
900 	if (de == NULL)
901 		de = tmpfs_dir_lookup(dnode, cnp);
902 
903 	KASSERT(de && de->td_node == node);
904 
905 	/* Check flags to see if we are allowed to remove the directory. */
906 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
907 		error = EPERM;
908 		goto out;
909 	}
910 
911 	/* Decrement the link count for the virtual '.' entry. */
912 	node->tn_links--;
913 	tmpfs_update(node, TMPFS_NODE_STATUSALL);
914 
915 	/* Detach the directory entry from the directory. */
916 	tmpfs_dir_detach(dnode, de);
917 
918 	/* Purge the cache for parent. */
919 	cache_purge(dvp);
920 
921 	/*
922 	 * Destroy the directory entry.
923 	 * Note: the inode referred by it will not be destroyed
924 	 * until the vnode is reclaimed.
925 	 */
926 	tmpfs_free_dirent(tmp, de);
927 	KASSERT(TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir) == NULL);
928 
929 	KASSERT(node->tn_links == 0);
930 out:
931 	pool_put(&namei_pool, cnp->cn_pnbuf);
932 	/* Release the nodes. */
933 	vput(dvp);
934 	vput(vp);
935 	return error;
936 }
937 
938 int
939 tmpfs_symlink(void *v)
940 {
941 	struct vop_symlink_args /* {
942 		struct vnode		*a_dvp;
943 		struct vnode		**a_vpp;
944 		struct componentname	*a_cnp;
945 		struct vattr		*a_vap;
946 		char			*a_target;
947 	} */ *ap = v;
948 	struct vnode *dvp = ap->a_dvp;
949 	struct vnode **vpp = ap->a_vpp;
950 	struct componentname *cnp = ap->a_cnp;
951 	struct vattr *vap = ap->a_vap;
952 	char *target = ap->a_target;
953 	int error;
954 
955 	KASSERT(vap->va_type == 0);
956 	vap->va_type = VLNK;
957 
958 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
959 	if (error == 0)
960 		vput(*vpp);
961 
962 	return error;
963 }
964 
965 int
966 tmpfs_readdir(void *v)
967 {
968 	struct vop_readdir_args /* {
969 		struct vnode	*a_vp;
970 		struct uio	*a_uio;
971 		kauth_cred_t	a_cred;
972 		int		*a_eofflag;
973 	} */ *ap = v;
974 	struct vnode *vp = ap->a_vp;
975 	struct uio *uio = ap->a_uio;
976 	int *eofflag = ap->a_eofflag;
977 	tmpfs_node_t *node;
978 	int error;
979 
980 	KASSERT(VOP_ISLOCKED(vp));
981 
982 	/* This operation only makes sense on directory nodes. */
983 	if (vp->v_type != VDIR) {
984 		return ENOTDIR;
985 	}
986 	node = VP_TO_TMPFS_DIR(vp);
987 	/*
988 	 * Retrieve the directory entries, unless it is being destroyed.
989 	 */
990 	if (node->tn_links) {
991 		error = tmpfs_dir_getdents(node, uio);
992 	} else {
993 		error = 0;
994 	}
995 
996 	if (eofflag != NULL) {
997 		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
998 	}
999 	return error;
1000 }
1001 
1002 int
1003 tmpfs_readlink(void *v)
1004 {
1005 	struct vop_readlink_args /* {
1006 		struct vnode	*a_vp;
1007 		struct uio	*a_uio;
1008 		kauth_cred_t	a_cred;
1009 	} */ *ap = v;
1010 	struct vnode *vp = ap->a_vp;
1011 	struct uio *uio = ap->a_uio;
1012 	tmpfs_node_t *node;
1013 	int error;
1014 
1015 	KASSERT(VOP_ISLOCKED(vp));
1016 	KASSERT(uio->uio_offset == 0);
1017 	KASSERT(vp->v_type == VLNK);
1018 
1019 	node = VP_TO_TMPFS_NODE(vp);
1020 	error = uiomovei(node->tn_spec.tn_lnk.tn_link,
1021 	    MIN(node->tn_size, uio->uio_resid), uio);
1022 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
1023 
1024 	return error;
1025 }
1026 
1027 int
1028 tmpfs_inactive(void *v)
1029 {
1030 	struct vop_inactive_args /* {
1031 		struct vnode *a_vp;
1032 		int *a_recycle;
1033 	} */ *ap = v;
1034 	struct vnode *vp = ap->a_vp;
1035 	tmpfs_node_t *node;
1036 
1037 	KASSERT(VOP_ISLOCKED(vp));
1038 
1039 	node = VP_TO_TMPFS_NODE(vp);
1040 
1041 	if (vp->v_type == VREG && tmpfs_uio_cached(node))
1042 		tmpfs_uio_uncache(node);
1043 
1044 	VOP_UNLOCK(vp, 0, curproc);
1045 
1046 	/*
1047 	 * If we are done with the node, reclaim it so that it can be reused
1048 	 * immediately.
1049 	 */
1050 	if (node->tn_links == 0)
1051 		vrecycle(vp, curproc);
1052 
1053 	return 0;
1054 }
1055 
1056 int
1057 tmpfs_reclaim(void *v)
1058 {
1059 	struct vop_reclaim_args /* {
1060 		struct vnode *a_vp;
1061 	} */ *ap = v;
1062 	struct vnode *vp = ap->a_vp;
1063 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1064 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1065 	int racing;
1066 
1067 	/* Disassociate inode from vnode. */
1068 	rw_enter_write(&node->tn_nlock);
1069 	node->tn_vnode = NULL;
1070 	vp->v_data = NULL;
1071 	/* Check if tmpfs_vnode_get() is racing with us. */
1072 	racing = TMPFS_NODE_RECLAIMING(node);
1073 	rw_exit_write(&node->tn_nlock);
1074 
1075 	/*
1076 	 * If inode is not referenced, i.e. no links, then destroy it.
1077 	 * Note: if racing - inode is about to get a new vnode, leave it.
1078 	 */
1079 	if (node->tn_links == 0 && !racing) {
1080 		tmpfs_free_node(tmp, node);
1081 	}
1082 	return 0;
1083 }
1084 
1085 int
1086 tmpfs_pathconf(void *v)
1087 {
1088 	struct vop_pathconf_args /* {
1089 		struct vnode	*a_vp;
1090 		int		a_name;
1091 		register_t	*a_retval;
1092 	} */ *ap = v;
1093 	const int name = ap->a_name;
1094 	register_t *retval = ap->a_retval;
1095 	int error = 0;
1096 
1097 	switch (name) {
1098 	case _PC_LINK_MAX:
1099 		*retval = LINK_MAX;
1100 		break;
1101 	case _PC_NAME_MAX:
1102 		*retval = TMPFS_MAXNAMLEN;
1103 		break;
1104 	case _PC_CHOWN_RESTRICTED:
1105 		*retval = 1;
1106 		break;
1107 	case _PC_NO_TRUNC:
1108 		*retval = 1;
1109 		break;
1110 	case _PC_FILESIZEBITS:
1111 		*retval = 64;
1112 		break;
1113 	case _PC_TIMESTAMP_RESOLUTION:
1114 		*retval = 1;
1115 		break;
1116 	default:
1117 		error = EINVAL;
1118 	}
1119 	return error;
1120 }
1121 
1122 int
1123 tmpfs_advlock(void *v)
1124 {
1125 	struct vop_advlock_args /* {
1126 		struct vnode	*a_vp;
1127 		void *		a_id;
1128 		int		a_op;
1129 		struct flock	*a_fl;
1130 		int		a_flags;
1131 	} */ *ap = v;
1132 	struct vnode *vp = ap->a_vp;
1133 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1134 
1135 	return lf_advlock(&node->tn_lockf, node->tn_size, ap->a_id, ap->a_op,
1136 	    ap->a_fl, ap->a_flags);
1137 }
1138 
1139 int
1140 tmpfs_print(void *v)
1141 {
1142 	struct vop_print_args /* {
1143 		struct vnode	*a_vp;
1144 	} */ *ap = v;
1145 	struct vnode *vp = ap->a_vp;
1146 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1147 
1148 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1149 	    "\tmode 0%o, owner %d, group %d, size %lld",
1150 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1151 	    node->tn_gid, node->tn_size);
1152 #ifdef FIFO
1153 	if (vp->v_type == VFIFO)
1154 		fifo_printinfo(vp);
1155 #endif
1156 	printf("\n");
1157 	return 0;
1158 }
1159 
1160 /* a null op */
1161 int
1162 tmpfs_bwrite(void *v)
1163 {
1164 	return 0;
1165 }
1166 
1167 int
1168 tmpfs_poll(void *v)
1169 {
1170 	struct vop_poll_args *ap = v;
1171 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1172 }
1173 
1174 int
1175 tmpfs_strategy(void *v)
1176 {
1177 	return EOPNOTSUPP;
1178 }
1179 
1180 int
1181 tmpfs_ioctl(void *v)
1182 {
1183 	return ENOTTY;
1184 }
1185 
1186 int
1187 tmpfs_lock(void *v)
1188 {
1189 	struct vop_lock_args *ap = v;
1190 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1191 
1192 	return lockmgr(&tnp->tn_vlock, ap->a_flags, NULL);
1193 }
1194 
1195 int
1196 tmpfs_unlock(void *v)
1197 {
1198 	struct vop_unlock_args *ap = v;
1199 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1200 
1201 	return lockmgr(&tnp->tn_vlock, ap->a_flags | LK_RELEASE, NULL);
1202 }
1203 
1204 int
1205 tmpfs_islocked(void *v)
1206 {
1207 	struct vop_islocked_args *ap = v;
1208 	tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp);
1209 
1210 	return lockstatus(&tnp->tn_vlock);
1211 }
1212 
1213 /*
1214  * tmpfs_rename: rename routine, the hairiest system call, with the
1215  * insane API.
1216  *
1217  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
1218  * and tvp (to-leaf), if exists (NULL if not).
1219  *
1220  * => Caller holds a reference on fdvp and fvp, they are unlocked.
1221  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
1222  *
1223  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
1224  *    to release the references and unlock them (or destroy).
1225  */
1226 
1227 /*
1228  * First, some forward declarations of subroutines.
1229  */
1230 
1231 int tmpfs_sane_rename(struct vnode *, struct componentname *,
1232     struct vnode *, struct componentname *, struct ucred *, int);
1233 int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
1234     struct ucred *,
1235     struct vnode *, struct tmpfs_node *, struct componentname *,
1236     struct tmpfs_dirent **, struct vnode **,
1237     struct vnode *, struct tmpfs_node *, struct componentname *,
1238     struct tmpfs_dirent **, struct vnode **);
1239 int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
1240     struct ucred *,
1241     struct vnode *, struct tmpfs_node *,
1242     struct componentname *, struct tmpfs_dirent **, struct vnode **,
1243     struct componentname *, struct tmpfs_dirent **, struct vnode **);
1244 int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
1245     struct ucred *,
1246     struct vnode *, struct tmpfs_node *, struct componentname *,
1247     struct tmpfs_dirent **, struct vnode **,
1248     struct vnode *, struct tmpfs_node *, struct componentname *,
1249     struct tmpfs_dirent **, struct vnode **);
1250 void tmpfs_rename_exit(struct tmpfs_mount *,
1251     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
1252 int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
1253 int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
1254     struct tmpfs_node **);
1255 int tmpfs_rename_lock(struct mount *, struct ucred *, int,
1256     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1257     struct tmpfs_dirent **, struct vnode **,
1258     struct vnode *, struct tmpfs_node *, struct componentname *, int,
1259     struct tmpfs_dirent **, struct vnode **);
1260 void tmpfs_rename_attachdetach(struct tmpfs_mount *,
1261     struct vnode *, struct tmpfs_dirent *, struct vnode *,
1262     struct vnode *, struct tmpfs_dirent *, struct vnode *);
1263 int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
1264     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, struct ucred *);
1265 int tmpfs_rename_check_possible(struct tmpfs_node *,
1266     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
1267 int tmpfs_rename_check_permitted(struct ucred *,
1268     struct tmpfs_node *, struct tmpfs_node *,
1269     struct tmpfs_node *, struct tmpfs_node *);
1270 int tmpfs_remove_check_possible(struct tmpfs_node *,
1271     struct tmpfs_node *);
1272 int tmpfs_remove_check_permitted(struct ucred *,
1273     struct tmpfs_node *, struct tmpfs_node *);
1274 int tmpfs_check_sticky(struct ucred *,
1275     struct tmpfs_node *, struct tmpfs_node *);
1276 void tmpfs_rename_cache_purge(struct vnode *, struct vnode *, struct vnode *,
1277     struct vnode *);
1278 void tmpfs_rename_abort(void *);
1279 
1280 int
1281 tmpfs_rename(void *v)
1282 {
1283 	struct vop_rename_args  /* {
1284 		struct vnode		*a_fdvp;
1285 		struct vnode		*a_fvp;
1286 		struct componentname	*a_fcnp;
1287 		struct vnode		*a_tdvp;
1288 		struct vnode		*a_tvp;
1289 		struct componentname	*a_tcnp;
1290 	} */ *ap = v;
1291 	struct vnode *fdvp = ap->a_fdvp;
1292 	struct vnode *fvp = ap->a_fvp;
1293 	struct componentname *fcnp = ap->a_fcnp;
1294 	struct vnode *tdvp = ap->a_tdvp;
1295 	struct vnode *tvp = ap->a_tvp;
1296 	struct componentname *tcnp = ap->a_tcnp;
1297 	struct ucred *cred;
1298 	int error;
1299 
1300 	KASSERT(fdvp != NULL);
1301 	KASSERT(fvp != NULL);
1302 	KASSERT(fcnp != NULL);
1303 	KASSERT(fcnp->cn_nameptr != NULL);
1304 	KASSERT(tdvp != NULL);
1305 	KASSERT(tcnp != NULL);
1306 	KASSERT(fcnp->cn_nameptr != NULL);
1307 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1308 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
1309 	KASSERT(fdvp->v_type == VDIR);
1310 	KASSERT(tdvp->v_type == VDIR);
1311 	KASSERT(fcnp->cn_flags & HASBUF);
1312 	KASSERT(tcnp->cn_flags & HASBUF);
1313 
1314 	cred = fcnp->cn_cred;
1315 	KASSERT(tcnp->cn_cred == cred);
1316 
1317 	/*
1318 	 * Check for cross-device rename.
1319 	 */
1320 	if (fvp->v_mount != tdvp->v_mount ||
1321 	    (tvp != NULL && (fvp->v_mount != tvp->v_mount))) {
1322 	    	tmpfs_rename_abort(v);
1323 		return EXDEV;
1324 	}
1325 
1326 	/*
1327 	 * Can't check the locks on these until we know they're on
1328 	 * the same FS, as not all FS do locking the same way.
1329 	 */
1330 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1331 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1332 
1333 	/*
1334 	 * Reject renaming '.' and '..'.
1335 	 */
1336 	if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1337 	    (fcnp->cn_namelen == 2 && fcnp->cn_nameptr[0] == '.' &&
1338 	     fcnp->cn_nameptr[1] == '.')) {
1339 	     	tmpfs_rename_abort(v);
1340 	     	return EINVAL;
1341 	}
1342 
1343 	/*
1344 	 * Sanitize our world from the VFS insanity.  Unlock the target
1345 	 * directory and node, which are locked.  Release the children,
1346 	 * which are referenced.  Check for rename("x", "y/."), which
1347 	 * it is our responsibility to reject, not the caller's.  (But
1348 	 * the caller does reject rename("x/.", "y").  Go figure.)
1349 	 */
1350 
1351 	VOP_UNLOCK(tdvp, 0, curproc);
1352 	if ((tvp != NULL) && (tvp != tdvp))
1353 		VOP_UNLOCK(tvp, 0, curproc);
1354 
1355 	vrele(fvp);
1356 	if (tvp != NULL)
1357 		vrele(tvp);
1358 
1359 	if (tvp == tdvp) {
1360 		error = EINVAL;
1361 		goto out;
1362 	}
1363 
1364 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, 0);
1365 
1366 out:	/*
1367 	 * All done, whether with success or failure.  Release the
1368 	 * directory nodes now, as the caller expects from the VFS
1369 	 * protocol.
1370 	 */
1371 	vrele(fdvp);
1372 	vrele(tdvp);
1373 
1374 	return error;
1375 }
1376 
1377 /*
1378  * tmpfs_sane_rename: rename routine, the hairiest system call, with
1379  * the sane API.
1380  *
1381  * Arguments:
1382  *
1383  * . fdvp (from directory vnode),
1384  * . fcnp (from component name),
1385  * . tdvp (to directory vnode), and
1386  * . tcnp (to component name).
1387  *
1388  * fdvp and tdvp must be referenced and unlocked.
1389  */
1390 int
1391 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
1392     struct vnode *tdvp, struct componentname *tcnp, struct ucred *cred,
1393     int posixly_correct)
1394 {
1395 	struct mount *mount;
1396 	struct tmpfs_mount *tmpfs;
1397 	struct tmpfs_node *fdnode, *tdnode;
1398 	struct tmpfs_dirent *fde, *tde;
1399 	struct vnode *fvp, *tvp;
1400 	char *newname;
1401 	int error;
1402 
1403 	KASSERT(fdvp != NULL);
1404 	KASSERT(fcnp != NULL);
1405 	KASSERT(tdvp != NULL);
1406 	KASSERT(tcnp != NULL);
1407 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1408 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1409 	KASSERT(fdvp->v_type == VDIR);
1410 	KASSERT(tdvp->v_type == VDIR);
1411 	KASSERT(fdvp->v_mount == tdvp->v_mount);
1412 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
1413 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
1414 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
1415 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
1416 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
1417 	    (fcnp->cn_nameptr[1] != '.'));
1418 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
1419 	    (tcnp->cn_nameptr[1] != '.'));
1420 
1421 	/*
1422 	 * Pull out the tmpfs data structures.
1423 	 */
1424 	fdnode = VP_TO_TMPFS_NODE(fdvp);
1425 	tdnode = VP_TO_TMPFS_NODE(tdvp);
1426 	KASSERT(fdnode != NULL);
1427 	KASSERT(tdnode != NULL);
1428 	KASSERT(fdnode->tn_vnode == fdvp);
1429 	KASSERT(tdnode->tn_vnode == tdvp);
1430 	KASSERT(fdnode->tn_type == VDIR);
1431 	KASSERT(tdnode->tn_type == VDIR);
1432 
1433 	mount = fdvp->v_mount;
1434 	KASSERT(mount != NULL);
1435 	KASSERT(mount == tdvp->v_mount);
1436 	/* XXX How can we be sure this stays true?  (Not that you're
1437 	 * likely to mount a tmpfs read-only...)  */
1438 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
1439 	tmpfs = VFS_TO_TMPFS(mount);
1440 	KASSERT(tmpfs != NULL);
1441 
1442 	/*
1443 	 * Decide whether we need a new name, and allocate memory for
1444 	 * it if so.  Do this before locking anything or taking
1445 	 * destructive actions so that we can back out safely and sleep
1446 	 * safely.  XXX Is sleeping an issue here?  Can this just be
1447 	 * moved into tmpfs_rename_attachdetach?
1448 	 */
1449 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
1450 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
1451 		if (newname == NULL) {
1452 			error = ENOSPC;
1453 			goto out_unlocked;
1454 		}
1455 	} else {
1456 		newname = NULL;
1457 	}
1458 
1459 	/*
1460 	 * Lock and look up everything.  GCC is not very clever.
1461 	 */
1462 	fde = tde = NULL;
1463 	fvp = tvp = NULL;
1464 	error = tmpfs_rename_enter(mount, tmpfs, cred,
1465 	    fdvp, fdnode, fcnp, &fde, &fvp,
1466 	    tdvp, tdnode, tcnp, &tde, &tvp);
1467 	if (error)
1468 		goto out_unlocked;
1469 
1470 	/*
1471 	 * Check that everything is locked and looks right.
1472 	 */
1473 	KASSERT(fde != NULL);
1474 	KASSERT(fvp != NULL);
1475 	KASSERT(fde->td_node != NULL);
1476 	KASSERT(fde->td_node->tn_vnode == fvp);
1477 	KASSERT(fde->td_node->tn_type == fvp->v_type);
1478 	KASSERT((tde == NULL) == (tvp == NULL));
1479 	KASSERT((tde == NULL) || (tde->td_node != NULL));
1480 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
1481 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
1482 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1483 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1484 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1485 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1486 
1487 	/*
1488 	 * If the source and destination are the same object, we need
1489 	 * only at most delete the source entry.
1490 	 */
1491 	if (fvp == tvp) {
1492 		KASSERT(tvp != NULL);
1493 		if (fde->td_node->tn_type == VDIR) {
1494 			/* XXX How can this possibly happen?  */
1495 			error = EINVAL;
1496 			goto out_locked;
1497 		}
1498 		if (!posixly_correct && (fde != tde)) {
1499 			/* XXX Doesn't work because of locking.
1500 			 * error = VOP_REMOVE(fdvp, fvp);
1501 			 */
1502 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
1503 			    cred);
1504 			if (error)
1505 				goto out_locked;
1506 		}
1507 		goto success;
1508 	}
1509 	KASSERT(fde != tde);
1510 	KASSERT(fvp != tvp);
1511 
1512 	/*
1513 	 * If the target exists, refuse to rename a directory over a
1514 	 * non-directory or vice versa, or to clobber a non-empty
1515 	 * directory.
1516 	 */
1517 	if (tvp != NULL) {
1518 		KASSERT(tde != NULL);
1519 		KASSERT(tde->td_node != NULL);
1520 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
1521 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
1522 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
1523 			error = ENOTDIR;
1524 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
1525 			error = EISDIR;
1526 		else
1527 			error = 0;
1528 		if (error)
1529 			goto out_locked;
1530 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
1531 	}
1532 
1533 	/*
1534 	 * Authorize the rename.
1535 	 */
1536 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
1537 	    tdnode, (tde? tde->td_node : NULL));
1538 	if (error)
1539 		goto out_locked;
1540 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
1541 	    tdnode, (tde? tde->td_node : NULL));
1542 	if (error)
1543 		goto out_locked;
1544 
1545 	/*
1546 	 * Everything is hunky-dory.  Shuffle the directory entries.
1547 	 */
1548 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
1549 
1550 	/*
1551 	 * Update the directory entry's name necessary, and flag
1552 	 * metadata updates.  A memory allocation failure here is not
1553 	 * OK because we've already committed some changes that we
1554 	 * can't back out at this point, and we have things locked so
1555 	 * we can't sleep, hence the early allocation above.
1556 	 */
1557 	if (newname != NULL) {
1558 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
1559 
1560 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
1561 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
1562 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1563 		/* Commit newname and don't free it on the way out.  */
1564 		fde->td_name = newname;
1565 		newname = NULL;
1566 
1567 		tmpfs_update(fde->td_node, TMPFS_NODE_CHANGED);
1568 		tmpfs_update(tdnode, TMPFS_NODE_MODIFIED);
1569 	}
1570 
1571 success:
1572 	VN_KNOTE(fvp, NOTE_RENAME);
1573 	tmpfs_rename_cache_purge(fdvp, fvp, tdvp, tvp);
1574 	error = 0;
1575 
1576 out_locked:
1577 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1578 
1579 out_unlocked:
1580 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
1581 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
1582 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
1583 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
1584 
1585 	if (newname != NULL)
1586 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
1587 
1588 	return error;
1589 }
1590 
1591 /*
1592  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
1593  * and the associated vnode in fvp_ret; fail if not found.  Look up
1594  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
1595  * associated vnode in tvp_ret; store null instead if not found.  Fail
1596  * if anything has been mounted on any of the nodes involved.
1597  *
1598  * fdvp and tdvp must be referenced.
1599  *
1600  * On entry, nothing is locked.
1601  *
1602  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
1603  * nonnull, are referenced.  The only pairs of vnodes that may be
1604  * identical are {fdvp, tdvp} and {fvp, tvp}.
1605  *
1606  * On failure, everything remains as was.
1607  *
1608  * Locking everything including the source and target nodes is
1609  * necessary to make sure that, e.g., link count updates are OK.  The
1610  * locking order is, in general, ancestor-first, matching the order you
1611  * need to use to look up a descendant anyway.
1612  */
1613 int
1614 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
1615     struct ucred *cred,
1616     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1617     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1618     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1619     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1620 {
1621 	int error;
1622 
1623 	KASSERT(mount != NULL);
1624 	KASSERT(tmpfs != NULL);
1625 	KASSERT(fdvp != NULL);
1626 	KASSERT(fdnode != NULL);
1627 	KASSERT(fcnp != NULL);
1628 	KASSERT(fde_ret != NULL);
1629 	KASSERT(fvp_ret != NULL);
1630 	KASSERT(tdvp != NULL);
1631 	KASSERT(tdnode != NULL);
1632 	KASSERT(tcnp != NULL);
1633 	KASSERT(tde_ret != NULL);
1634 	KASSERT(tvp_ret != NULL);
1635 	KASSERT(fdnode->tn_vnode == fdvp);
1636 	KASSERT(tdnode->tn_vnode == tdvp);
1637 	KASSERT(fdnode->tn_type == VDIR);
1638 	KASSERT(tdnode->tn_type == VDIR);
1639 
1640 	if (fdvp == tdvp) {
1641 		KASSERT(fdnode == tdnode);
1642 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
1643 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
1644 	} else {
1645 		KASSERT(fdnode != tdnode);
1646 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
1647 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
1648 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
1649 	}
1650 
1651 	if (error)
1652 		return error;
1653 
1654 	KASSERT(*fde_ret != NULL);
1655 	KASSERT(*fvp_ret != NULL);
1656 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
1657 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
1658 	KASSERT((*tde_ret == NULL) ||
1659 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
1660 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1661 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
1662 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1663 	KASSERT((*tvp_ret == NULL) ||
1664 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
1665 	KASSERT(*fvp_ret != fdvp);
1666 	KASSERT(*fvp_ret != tdvp);
1667 	KASSERT(*tvp_ret != fdvp);
1668 	KASSERT(*tvp_ret != tdvp);
1669 	return 0;
1670 }
1671 
1672 /*
1673  * Lock and look up with a common source/target directory.
1674  */
1675 int
1676 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
1677     struct ucred *cred,
1678     struct vnode *dvp, struct tmpfs_node *dnode,
1679     struct componentname *fcnp,
1680     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1681     struct componentname *tcnp,
1682     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1683 {
1684 	struct tmpfs_dirent *fde, *tde;
1685 	struct vnode *fvp, *tvp;
1686 	int error;
1687 
1688 	error = tmpfs_rename_lock_directory(dvp, dnode);
1689 	if (error)
1690 		goto fail0;
1691 
1692 	/* Did we lose a race with mount?  */
1693 	if (dvp->v_mountedhere != NULL) {
1694 		error = EBUSY;
1695 		goto fail1;
1696 	}
1697 
1698 	/* Make sure the caller may read the directory.  */
1699 	error = VOP_ACCESS(dvp, VEXEC, cred, curproc);
1700 	if (error)
1701 		goto fail1;
1702 
1703 	/*
1704 	 * The order in which we lock the source and target nodes is
1705 	 * irrelevant because there can only be one rename on this
1706 	 * directory in flight at a time, and we have it locked.
1707 	 */
1708 
1709 	fde = tmpfs_dir_lookup(dnode, fcnp);
1710 	if (fde == NULL) {
1711 		error = ENOENT;
1712 		goto fail1;
1713 	}
1714 
1715 	KASSERT(fde->td_node != NULL);
1716 	/* We ruled out `.' earlier.  */
1717 	KASSERT(fde->td_node != dnode);
1718 	/* We ruled out `..' earlier.  */
1719 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1720 	rw_enter_write(&fde->td_node->tn_nlock);
1721 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
1722 	if (error)
1723 		goto fail1;
1724 	KASSERT(fvp != NULL);
1725 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1726 	KASSERT(fvp != dvp);
1727 	KASSERT(fvp->v_mount == mount);
1728 
1729 	/* Refuse to rename a mount point.  */
1730 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
1731 		error = EBUSY;
1732 		goto fail2;
1733 	}
1734 
1735 	tde = tmpfs_dir_lookup(dnode, tcnp);
1736 	if (tde == NULL) {
1737 		tvp = NULL;
1738 	} else {
1739 		KASSERT(tde->td_node != NULL);
1740 		/* We ruled out `.' earlier.  */
1741 		KASSERT(tde->td_node != dnode);
1742 		/* We ruled out `..' earlier.  */
1743 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
1744 		if (tde->td_node != fde->td_node) {
1745 			rw_enter_write(&tde->td_node->tn_nlock);
1746 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
1747 			if (error)
1748 				goto fail2;
1749 			KASSERT(tvp->v_mount == mount);
1750 			/* Refuse to rename over a mount point.  */
1751 			if ((tvp->v_type == VDIR) &&
1752 			    (tvp->v_mountedhere != NULL)) {
1753 				error = EBUSY;
1754 				goto fail3;
1755 			}
1756 		} else {
1757 			tvp = fvp;
1758 			vref(tvp);
1759 		}
1760 		KASSERT(tvp != NULL);
1761 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
1762 	}
1763 	KASSERT(tvp != dvp);
1764 
1765 	*fde_ret = fde;
1766 	*fvp_ret = fvp;
1767 	*tde_ret = tde;
1768 	*tvp_ret = tvp;
1769 	return 0;
1770 
1771 fail3:	if (tvp != NULL) {
1772 		if (tvp != fvp)
1773 			vput(tvp);
1774 		else
1775 			vrele(tvp);
1776 	}
1777 
1778 fail2:	vput(fvp);
1779 fail1:	VOP_UNLOCK(dvp, 0, curproc);
1780 fail0:	return error;
1781 }
1782 
1783 /*
1784  * Lock and look up with separate source and target directories.
1785  */
1786 int
1787 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
1788     struct ucred *cred,
1789     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
1790     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
1791     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
1792     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
1793 {
1794 	struct tmpfs_node *intermediate_node;
1795 	struct tmpfs_dirent *fde, *tde;
1796 	struct vnode *fvp, *tvp;
1797 	int error;
1798 
1799 	KASSERT(fdvp != tdvp);
1800 	KASSERT(fdnode != tdnode);
1801 
1802 #if 0				/* XXX */
1803 	mutex_enter(&tmpfs->tm_rename_lock);
1804 #endif
1805 
1806 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
1807 	if (error)
1808 		goto fail;
1809 
1810 	/*
1811 	 * intermediate_node == NULL means fdnode is not an ancestor of
1812 	 * tdnode.
1813 	 */
1814 	if (intermediate_node == NULL)
1815 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
1816 		    tdvp, tdnode, tcnp, 1, &tde, &tvp,
1817 		    fdvp, fdnode, fcnp, 0, &fde, &fvp);
1818 	else
1819 		error = tmpfs_rename_lock(mount, cred, EINVAL,
1820 		    fdvp, fdnode, fcnp, 0, &fde, &fvp,
1821 		    tdvp, tdnode, tcnp, 1, &tde, &tvp);
1822 	if (error)
1823 		goto fail;
1824 
1825 	KASSERT(fde != NULL);
1826 	KASSERT(fde->td_node != NULL);
1827 
1828 	/*
1829 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
1830 	 */
1831 	if (fde->td_node == intermediate_node) {
1832 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
1833 		return EINVAL;
1834 	}
1835 
1836 	*fde_ret = fde;
1837 	*fvp_ret = fvp;
1838 	*tde_ret = tde;
1839 	*tvp_ret = tvp;
1840 	return 0;
1841 
1842 fail:
1843 #if 0				/* XXX */
1844 	mutex_exit(&tmpfs->tm_rename_lock);
1845 #endif
1846 	return error;
1847 }
1848 
1849 /*
1850  * Unlock everything we locked for rename.
1851  *
1852  * fdvp and tdvp must be referenced.
1853  *
1854  * On entry, everything is locked, and fvp and tvp referenced.
1855  *
1856  * On exit, everything is unlocked, and fvp and tvp are released.
1857  */
1858 void
1859 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
1860     struct vnode *fdvp, struct vnode *fvp,
1861     struct vnode *tdvp, struct vnode *tvp)
1862 {
1863 
1864 	KASSERT(tmpfs != NULL);
1865 	KASSERT(fdvp != NULL);
1866 	KASSERT(fvp != NULL);
1867 	KASSERT(fdvp != fvp);
1868 	KASSERT(fdvp != tvp);
1869 	KASSERT(tdvp != tvp);
1870 	KASSERT(tdvp != fvp);
1871 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
1872 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
1873 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
1874 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
1875 
1876 	if (tvp != NULL) {
1877 		if (tvp != fvp)
1878 			vput(tvp);
1879 		else
1880 			vrele(tvp);
1881 	}
1882 	VOP_UNLOCK(tdvp, 0, curproc);
1883 	vput(fvp);
1884 	if (fdvp != tdvp)
1885 		VOP_UNLOCK(fdvp, 0, curproc);
1886 
1887 #if 0				/* XXX */
1888 	if (fdvp != tdvp)
1889 		mutex_exit(&tmpfs->tm_rename_lock);
1890 #endif
1891 }
1892 
1893 /*
1894  * Lock a directory, but fail if it has been rmdir'd.
1895  *
1896  * vp must be referenced.
1897  */
1898 int
1899 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
1900 {
1901 
1902 	KASSERT(vp != NULL);
1903 	KASSERT(node != NULL);
1904 	KASSERT(node->tn_vnode == vp);
1905 	KASSERT(node->tn_type == VDIR);
1906 
1907 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
1908 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
1909 		VOP_UNLOCK(vp, 0, curproc);
1910 		return ENOENT;
1911 	}
1912 
1913 	return 0;
1914 }
1915 
1916 /*
1917  * Analyze the genealogy of the source and target nodes.
1918  *
1919  * On success, stores in *intermediate_node_ret either the child of
1920  * fdnode of which tdnode is a descendant, or null if tdnode is not a
1921  * descendant of fdnode at all.
1922  *
1923  * fdnode and tdnode must be unlocked and referenced.  The file
1924  * system's rename lock must also be held, to exclude concurrent
1925  * changes to the file system's genealogy other than rmdir.
1926  *
1927  * XXX This causes an extra lock/unlock of tdnode in the case when
1928  * we're just about to lock it again before locking anything else.
1929  * However, changing that requires reorganizing the code to make it
1930  * even more horrifically obscure.
1931  */
1932 int
1933 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
1934     struct tmpfs_node **intermediate_node_ret)
1935 {
1936 	struct tmpfs_node *node = tdnode, *parent;
1937 	int error;
1938 
1939 	KASSERT(fdnode != NULL);
1940 	KASSERT(tdnode != NULL);
1941 	KASSERT(fdnode != tdnode);
1942 	KASSERT(intermediate_node_ret != NULL);
1943 
1944 	KASSERT(fdnode->tn_vnode != NULL);
1945 	KASSERT(tdnode->tn_vnode != NULL);
1946 	KASSERT(fdnode->tn_type == VDIR);
1947 	KASSERT(tdnode->tn_type == VDIR);
1948 
1949 	/*
1950 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
1951 	 * from deleting it -- or any ancestor -- at an inopportune
1952 	 * moment.
1953 	 */
1954 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
1955 	if (error)
1956 		return error;
1957 
1958 	for (;;) {
1959 		parent = node->tn_spec.tn_dir.tn_parent;
1960 		KASSERT(parent != NULL);
1961 		KASSERT(parent->tn_type == VDIR);
1962 
1963 		/* Did we hit the root without finding fdnode?  */
1964 		if (parent == node) {
1965 			*intermediate_node_ret = NULL;
1966 			break;
1967 		}
1968 
1969 		/* Did we find that fdnode is an ancestor?  */
1970 		if (parent == fdnode) {
1971 			*intermediate_node_ret = node;
1972 			break;
1973 		}
1974 
1975 		/* Neither -- keep ascending the family tree.  */
1976 		node = parent;
1977 	}
1978 
1979 	VOP_UNLOCK(tdnode->tn_vnode, 0, curproc);
1980 	return 0;
1981 }
1982 
1983 /*
1984  * Lock directories a and b, which must be distinct, and look up and
1985  * lock nodes a and b.  Do a first and then b.  Directory b may not be
1986  * an ancestor of directory a, although directory a may be an ancestor
1987  * of directory b.  Fail with overlap_error if node a is directory b.
1988  * Neither componentname may be `.' or `..'.
1989  *
1990  * a_dvp and b_dvp must be referenced.
1991  *
1992  * On entry, a_dvp and b_dvp are unlocked.
1993  *
1994  * On success,
1995  * . a_dvp and b_dvp are locked,
1996  * . *a_dirent_ret is filled with a directory entry whose node is
1997  *     locked and referenced,
1998  * . *b_vp_ret is filled with the corresponding vnode,
1999  * . *b_dirent_ret is filled either with null or with a directory entry
2000  *     whose node is locked and referenced,
2001  * . *b_vp is filled either with null or with the corresponding vnode,
2002  *     and
2003  * . the only pair of vnodes that may be identical is a_vp and b_vp.
2004  *
2005  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
2006  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
2007  */
2008 int
2009 tmpfs_rename_lock(struct mount *mount, struct ucred *cred, int overlap_error,
2010     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
2011     struct componentname *a_cnp, int a_missing_ok,
2012     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
2013     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
2014     struct componentname *b_cnp, int b_missing_ok,
2015     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
2016 {
2017 	struct tmpfs_dirent *a_dirent, *b_dirent;
2018 	struct vnode *a_vp, *b_vp;
2019 	int error;
2020 
2021 	KASSERT(a_dvp != NULL);
2022 	KASSERT(a_dnode != NULL);
2023 	KASSERT(a_cnp != NULL);
2024 	KASSERT(a_dirent_ret != NULL);
2025 	KASSERT(a_vp_ret != NULL);
2026 	KASSERT(b_dvp != NULL);
2027 	KASSERT(b_dnode != NULL);
2028 	KASSERT(b_cnp != NULL);
2029 	KASSERT(b_dirent_ret != NULL);
2030 	KASSERT(b_vp_ret != NULL);
2031 	KASSERT(a_dvp != b_dvp);
2032 	KASSERT(a_dnode != b_dnode);
2033 	KASSERT(a_dnode->tn_vnode == a_dvp);
2034 	KASSERT(b_dnode->tn_vnode == b_dvp);
2035 	KASSERT(a_dnode->tn_type == VDIR);
2036 	KASSERT(b_dnode->tn_type == VDIR);
2037 	KASSERT(a_missing_ok != b_missing_ok);
2038 
2039 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
2040 	if (error)
2041 		goto fail0;
2042 
2043 	/* Did we lose a race with mount?  */
2044 	if (a_dvp->v_mountedhere != NULL) {
2045 		error = EBUSY;
2046 		goto fail1;
2047 	}
2048 
2049 	/* Make sure the caller may read the directory.  */
2050 	error = VOP_ACCESS(a_dvp, VEXEC, cred, curproc);
2051 	if (error)
2052 		goto fail1;
2053 
2054 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
2055 	if (a_dirent != NULL) {
2056 		KASSERT(a_dirent->td_node != NULL);
2057 		/* We ruled out `.' earlier.  */
2058 		KASSERT(a_dirent->td_node != a_dnode);
2059 		/* We ruled out `..' earlier.  */
2060 		KASSERT(a_dirent->td_node !=
2061 		    a_dnode->tn_spec.tn_dir.tn_parent);
2062 		if (a_dirent->td_node == b_dnode) {
2063 			error = overlap_error;
2064 			goto fail1;
2065 		}
2066 		rw_enter_write(&a_dirent->td_node->tn_nlock);
2067 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
2068 		if (error)
2069 			goto fail1;
2070 		KASSERT(a_vp->v_mount == mount);
2071 		/* Refuse to rename (over) a mount point.  */
2072 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
2073 			error = EBUSY;
2074 			goto fail2;
2075 		}
2076 	} else if (!a_missing_ok) {
2077 		error = ENOENT;
2078 		goto fail1;
2079 	} else {
2080 		a_vp = NULL;
2081 	}
2082 	KASSERT(a_vp != a_dvp);
2083 	KASSERT(a_vp != b_dvp);
2084 
2085 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
2086 	if (error)
2087 		goto fail2;
2088 
2089 	/* Did we lose a race with mount?  */
2090 	if (b_dvp->v_mountedhere != NULL) {
2091 		error = EBUSY;
2092 		goto fail3;
2093 	}
2094 
2095 	/* Make sure the caller may read the directory.  */
2096 	error = VOP_ACCESS(b_dvp, VEXEC, cred, curproc);
2097 	if (error)
2098 		goto fail3;
2099 
2100 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
2101 	if (b_dirent != NULL) {
2102 		KASSERT(b_dirent->td_node != NULL);
2103 		/* We ruled out `.' earlier.  */
2104 		KASSERT(b_dirent->td_node != b_dnode);
2105 		/* We ruled out `..' earlier.  */
2106 		KASSERT(b_dirent->td_node !=
2107 		    b_dnode->tn_spec.tn_dir.tn_parent);
2108 		/* b is not an ancestor of a.  */
2109 		KASSERT(b_dirent->td_node != a_dnode);
2110 		/* But the source and target nodes might be the same.  */
2111 		if ((a_dirent == NULL) ||
2112 		    (a_dirent->td_node != b_dirent->td_node)) {
2113 			rw_enter_write(&b_dirent->td_node->tn_nlock);
2114 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
2115 			    &b_vp);
2116 			if (error)
2117 				goto fail3;
2118 			KASSERT(b_vp->v_mount == mount);
2119 			KASSERT(a_vp != b_vp);
2120 			/* Refuse to rename (over) a mount point.  */
2121 			if ((b_vp->v_type == VDIR) &&
2122 			    (b_vp->v_mountedhere != NULL)) {
2123 				error = EBUSY;
2124 				goto fail4;
2125 			}
2126 		} else {
2127 			b_vp = a_vp;
2128 			vref(b_vp);
2129 		}
2130 	} else if (!b_missing_ok) {
2131 		error = ENOENT;
2132 		goto fail3;
2133 	} else {
2134 		b_vp = NULL;
2135 	}
2136 	KASSERT(b_vp != a_dvp);
2137 	KASSERT(b_vp != b_dvp);
2138 
2139 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2140 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2141 	KASSERT(a_missing_ok || (a_dirent != NULL));
2142 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
2143 	KASSERT(b_missing_ok || (b_dirent != NULL));
2144 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
2145 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
2146 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
2147 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
2148 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
2149 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
2150 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
2151 
2152 	*a_dirent_ret = a_dirent;
2153 	*b_dirent_ret = b_dirent;
2154 	*a_vp_ret = a_vp;
2155 	*b_vp_ret = b_vp;
2156 	return 0;
2157 
2158 fail4:	if (b_vp != NULL) {
2159 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
2160 		if (b_vp != a_vp)
2161 			vput(b_vp);
2162 		else
2163 			vrele(a_vp);
2164 	}
2165 
2166 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
2167 	VOP_UNLOCK(b_dvp, 0, curproc);
2168 
2169 fail2:	if (a_vp != NULL) {
2170 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
2171 		vput(a_vp);
2172 	}
2173 
2174 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
2175 	VOP_UNLOCK(a_dvp, 0, curproc);
2176 
2177 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
2178 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
2179 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
2180 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
2181 	return error;
2182 }
2183 
2184 /*
2185  * Shuffle the directory entries to move fvp from the directory fdvp
2186  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
2187  * we are overwriting a target node, it is tvp, and tde is its
2188  * directory entry in tdvp.
2189  *
2190  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
2191  */
2192 void
2193 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
2194     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
2195     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
2196 {
2197 
2198 	KASSERT(tmpfs != NULL);
2199 	KASSERT(fdvp != NULL);
2200 	KASSERT(fde != NULL);
2201 	KASSERT(fvp != NULL);
2202 	KASSERT(tdvp != NULL);
2203 	KASSERT(fde->td_node != NULL);
2204 	KASSERT(fde->td_node->tn_vnode == fvp);
2205 	KASSERT((tde == NULL) == (tvp == NULL));
2206 	KASSERT((tde == NULL) || (tde->td_node != NULL));
2207 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
2208 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
2209 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
2210 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
2211 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
2212 
2213 	/*
2214 	 * If we are moving from one directory to another, detach the
2215 	 * source entry and reattach it to the target directory.
2216 	 */
2217 	if (fdvp != tdvp) {
2218 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
2219 		struct tmpfs_node *fnode = fde->td_node;
2220 		tmpfs_node_t *fdnode = VP_TO_TMPFS_DIR(fdvp);
2221 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2222 		tmpfs_dir_detach(fdnode, fde);
2223 		tmpfs_dir_attach(tdnode, fde, fnode);
2224 	} else if (tvp == NULL) {
2225 		/*
2226 		 * We are changing the directory.  tmpfs_dir_attach and
2227 		 * tmpfs_dir_detach note the events for us, but for
2228 		 * this case we don't call them, so we must note the
2229 		 * event explicitly.
2230 		 */
2231 		VN_KNOTE(fdvp, NOTE_WRITE);
2232 	}
2233 
2234 	/*
2235 	 * If we are replacing an existing target entry, delete it.
2236 	 */
2237 	if (tde != NULL) {
2238 		tmpfs_node_t *tdnode = VP_TO_TMPFS_DIR(tdvp);
2239 		KASSERT(tvp != NULL);
2240 		KASSERT(tde->td_node != NULL);
2241 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
2242 		if (tde->td_node->tn_type == VDIR) {
2243 			KASSERT(tde->td_node->tn_size == 0);
2244 			KASSERT(tde->td_node->tn_links == 2);
2245 			/* Decrement the extra link count for `.' so
2246 			 * the vnode will be recycled when released.  */
2247 			tde->td_node->tn_links--;
2248 		}
2249 		tmpfs_dir_detach(tdnode, tde);
2250 		tmpfs_free_dirent(tmpfs, tde);
2251 	}
2252 }
2253 
2254 /*
2255  * Remove the entry de for the non-directory vp from the directory dvp.
2256  *
2257  * Everything must be locked and referenced.
2258  */
2259 int
2260 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
2261     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
2262     struct ucred *cred)
2263 {
2264 	int error;
2265 
2266 	KASSERT(tmpfs != NULL);
2267 	KASSERT(dvp != NULL);
2268 	KASSERT(dnode != NULL);
2269 	KASSERT(de != NULL);
2270 	KASSERT(vp != NULL);
2271 	KASSERT(dnode->tn_vnode == dvp);
2272 	KASSERT(de->td_node != NULL);
2273 	KASSERT(de->td_node->tn_vnode == vp);
2274 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
2275 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2276 
2277 	error = tmpfs_remove_check_possible(dnode, de->td_node);
2278 	if (error)
2279 		return error;
2280 
2281 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
2282 	if (error)
2283 		return error;
2284 
2285 	/*
2286 	 * If not root and directory is sticky, check for permission on
2287 	 * directory or on file. This implements append-only directories.
2288 	 */
2289 	if ((dnode->tn_mode & S_ISTXT) != 0)
2290 		if (cred->cr_uid != 0 && cred->cr_uid != dnode->tn_uid &&
2291 		    cred->cr_uid != de->td_node->tn_uid)
2292 			return EPERM;
2293 
2294 	tmpfs_dir_detach(dnode, de);
2295 	tmpfs_free_dirent(tmpfs, de);
2296 
2297 	return 0;
2298 }
2299 
2300 /*
2301  * Check whether a rename is possible independent of credentials.
2302  *
2303  * Everything must be locked and referenced.
2304  */
2305 int
2306 tmpfs_rename_check_possible(
2307     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2308     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2309 {
2310 
2311 	KASSERT(fdnode != NULL);
2312 	KASSERT(fnode != NULL);
2313 	KASSERT(tdnode != NULL);
2314 	KASSERT(fdnode != fnode);
2315 	KASSERT(tdnode != tnode);
2316 	KASSERT(fnode != tnode);
2317 	KASSERT(fdnode->tn_vnode != NULL);
2318 	KASSERT(fnode->tn_vnode != NULL);
2319 	KASSERT(tdnode->tn_vnode != NULL);
2320 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2321 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2322 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2323 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2324 	KASSERT((tnode == NULL) ||
2325 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2326 
2327 	/*
2328 	 * If fdnode is immutable, we can't write to it.  If fdnode is
2329 	 * append-only, the only change we can make is to add entries
2330 	 * to it.  If fnode is immutable, we can't change the links to
2331 	 * it.  If fnode is append-only...well, this is what UFS does.
2332 	 */
2333 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
2334 		return EPERM;
2335 
2336 	/*
2337 	 * If tdnode is immutable, we can't write to it.  If tdnode is
2338 	 * append-only, we can add entries, but we can't change
2339 	 * existing entries.
2340 	 */
2341 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
2342 		return EPERM;
2343 
2344 	/*
2345 	 * If tnode is immutable, we can't replace links to it.  If
2346 	 * tnode is append-only...well, this is what UFS does.
2347 	 */
2348 	if (tnode != NULL) {
2349 		KASSERT(tnode != NULL);
2350 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
2351 			return EPERM;
2352 	}
2353 
2354 	return 0;
2355 }
2356 
2357 /*
2358  * Check whether a rename is permitted given our credentials.
2359  *
2360  * Everything must be locked and referenced.
2361  */
2362 int
2363 tmpfs_rename_check_permitted(struct ucred *cred,
2364     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
2365     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
2366 {
2367 	int error;
2368 
2369 	KASSERT(fdnode != NULL);
2370 	KASSERT(fnode != NULL);
2371 	KASSERT(tdnode != NULL);
2372 	KASSERT(fdnode != fnode);
2373 	KASSERT(tdnode != tnode);
2374 	KASSERT(fnode != tnode);
2375 	KASSERT(fdnode->tn_vnode != NULL);
2376 	KASSERT(fnode->tn_vnode != NULL);
2377 	KASSERT(tdnode->tn_vnode != NULL);
2378 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
2379 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
2380 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
2381 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
2382 	KASSERT((tnode == NULL) ||
2383 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
2384 
2385 	/*
2386 	 * We need to remove or change an entry in the source directory.
2387 	 */
2388 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred, curproc);
2389 	if (error)
2390 		return error;
2391 
2392 	/*
2393 	 * If we are changing directories, then we need to write to the
2394 	 * target directory to add or change an entry.  Also, if fnode
2395 	 * is a directory, we need to write to it to change its `..'
2396 	 * entry.
2397 	 */
2398 	if (fdnode != tdnode) {
2399 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred, curproc);
2400 		if (error)
2401 			return error;
2402 		if (fnode->tn_type == VDIR) {
2403 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred,
2404 			    curproc);
2405 			if (error)
2406 				return error;
2407 		}
2408 	}
2409 
2410 	error = tmpfs_check_sticky(cred, fdnode, fnode);
2411 	if (error)
2412 		return error;
2413 
2414 	if (TMPFS_DIRSEQ_FULL(tdnode))
2415 		return (ENOSPC);
2416 
2417 	error = tmpfs_check_sticky(cred, tdnode, tnode);
2418 	if (error)
2419 		return error;
2420 
2421 	return 0;
2422 }
2423 
2424 /*
2425  * Check whether removing node's entry in dnode is possible independent
2426  * of credentials.
2427  *
2428  * Everything must be locked and referenced.
2429  */
2430 int
2431 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
2432 {
2433 
2434 	KASSERT(dnode != NULL);
2435 	KASSERT(dnode->tn_vnode != NULL);
2436 	KASSERT(node != NULL);
2437 	KASSERT(dnode != node);
2438 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2439 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2440 
2441 	/*
2442 	 * We want to delete the entry.  If dnode is immutable, we
2443 	 * can't write to it to delete the entry.  If dnode is
2444 	 * append-only, the only change we can make is to add entries,
2445 	 * so we can't delete entries.  If node is immutable, we can't
2446 	 * change the links to it, so we can't delete the entry.  If
2447 	 * node is append-only...well, this is what UFS does.
2448 	 */
2449 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
2450 		return EPERM;
2451 
2452 	return 0;
2453 }
2454 
2455 /*
2456  * Check whether removing node's entry in dnode is permitted given our
2457  * credentials.
2458  *
2459  * Everything must be locked and referenced.
2460  */
2461 int
2462 tmpfs_remove_check_permitted(struct ucred *cred,
2463     struct tmpfs_node *dnode, struct tmpfs_node *node)
2464 {
2465 	int error;
2466 
2467 	KASSERT(dnode != NULL);
2468 	KASSERT(dnode->tn_vnode != NULL);
2469 	KASSERT(node != NULL);
2470 	KASSERT(dnode != node);
2471 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2472 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
2473 
2474 	/*
2475 	 * Check whether we are permitted to write to the source
2476 	 * directory in order to delete an entry from it.
2477 	 */
2478 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred, curproc);
2479 	if (error)
2480 		return error;
2481 
2482 	error = tmpfs_check_sticky(cred, dnode, node);
2483 	if (error)
2484 		return error;
2485 
2486 	return 0;
2487 }
2488 
2489 /*
2490  * Check whether we may change an entry in a sticky directory.  If the
2491  * directory is sticky, the user must own either the directory or, if
2492  * it exists, the node, in order to change the entry.
2493  *
2494  * Everything must be locked and referenced.
2495  */
2496 int
2497 tmpfs_check_sticky(struct ucred *cred,
2498     struct tmpfs_node *dnode, struct tmpfs_node *node)
2499 {
2500 
2501 	KASSERT(dnode != NULL);
2502 	KASSERT(dnode->tn_vnode != NULL);
2503 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
2504 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
2505 	KASSERT((node == NULL) ||
2506 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
2507 
2508 	if (node == NULL)
2509 		return 0;
2510 
2511 	if (dnode->tn_mode & S_ISTXT) {
2512 		if (cred->cr_uid != 0 &&
2513 		    cred->cr_uid != dnode->tn_uid &&
2514 		    cred->cr_uid != node->tn_uid)
2515 			return EPERM;
2516 	}
2517 
2518 	return 0;
2519 }
2520 
2521 void
2522 tmpfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp,
2523     struct vnode *tdvp, struct vnode *tvp)
2524 {
2525 
2526 	KASSERT(fdvp != NULL);
2527 	KASSERT(fvp != NULL);
2528 	KASSERT(tdvp != NULL);
2529 	KASSERT(fdvp != fvp);
2530 	KASSERT(fdvp != tvp);
2531 	KASSERT(tdvp != fvp);
2532 	KASSERT(tdvp != tvp);
2533 	KASSERT(fvp != tvp);
2534 	KASSERT(fdvp->v_type == VDIR);
2535 	KASSERT(tdvp->v_type == VDIR);
2536 
2537 	/*
2538 	 * XXX What actually needs to be purged?
2539 	 */
2540 
2541 	cache_purge(fdvp);
2542 
2543 	if (fvp->v_type == VDIR)
2544 		cache_purge(fvp);
2545 
2546 	if (tdvp != fdvp)
2547 		cache_purge(tdvp);
2548 
2549 	if ((tvp != NULL) && (tvp->v_type == VDIR))
2550 		cache_purge(tvp);
2551 }
2552 
2553 void
2554 tmpfs_rename_abort(void *v)
2555 {
2556 	struct vop_rename_args *ap = v;
2557 	struct vnode *fdvp = ap->a_fdvp;
2558 	struct vnode *fvp = ap->a_fvp;
2559 	struct componentname *fcnp = ap->a_fcnp;
2560 	struct vnode *tdvp = ap->a_tdvp;
2561 	struct vnode *tvp = ap->a_tvp;
2562 	struct componentname *tcnp = ap->a_tcnp;
2563 
2564 	VOP_ABORTOP(tdvp, tcnp);
2565 	if (tdvp == tvp)
2566 		vrele(tdvp);
2567 	else
2568 		vput(tdvp);
2569 	if (tvp != NULL)
2570 		vput(tvp);
2571 	VOP_ABORTOP(fdvp, fcnp);
2572 	vrele(fdvp);
2573 	vrele(fvp);
2574 }
2575 
2576 void filt_tmpfsdetach(struct knote *kn);
2577 int filt_tmpfsread(struct knote *kn, long hint);
2578 int filt_tmpfswrite(struct knote *kn, long hint);
2579 int filt_tmpfsvnode(struct knote *kn, long hint);
2580 
2581 struct filterops tmpfsread_filtops =
2582 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsread };
2583 struct filterops tmpfswrite_filtops =
2584 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfswrite };
2585 struct filterops tmpfsvnode_filtops =
2586 	{ 1, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
2587 
2588 int
2589 tmpfs_kqfilter(void *v)
2590 {
2591 	struct vop_kqfilter_args *ap = v;
2592 	struct vnode *vp = ap->a_vp;
2593 	struct knote *kn = ap->a_kn;
2594 
2595 	switch (kn->kn_filter) {
2596 	case EVFILT_READ:
2597 		kn->kn_fop = &tmpfsread_filtops;
2598 		break;
2599 	case EVFILT_WRITE:
2600 		kn->kn_fop = &tmpfswrite_filtops;
2601 		break;
2602 	case EVFILT_VNODE:
2603 		kn->kn_fop = &tmpfsvnode_filtops;
2604 		break;
2605 	default:
2606 		return (EINVAL);
2607 	}
2608 
2609 	kn->kn_hook = (caddr_t)vp;
2610 
2611 	SLIST_INSERT_HEAD(&vp->v_selectinfo.si_note, kn, kn_selnext);
2612 
2613 	return (0);
2614 }
2615 
2616 void
2617 filt_tmpfsdetach(struct knote *kn)
2618 {
2619 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2620 
2621 	SLIST_REMOVE(&vp->v_selectinfo.si_note, kn, knote, kn_selnext);
2622 }
2623 
2624 int
2625 filt_tmpfsread(struct knote *kn, long hint)
2626 {
2627 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2628 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
2629 
2630 	/*
2631 	 * filesystem is gone, so set the EOF flag and schedule
2632 	 * the knote for deletion.
2633 	 */
2634 	if (hint == NOTE_REVOKE) {
2635 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2636 		return (1);
2637 	}
2638 
2639         kn->kn_data = node->tn_size - kn->kn_fp->f_offset;
2640 	if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) {
2641 		kn->kn_fflags |= NOTE_EOF;
2642 		return (1);
2643 	}
2644 
2645         return (kn->kn_data != 0);
2646 }
2647 
2648 int
2649 filt_tmpfswrite(struct knote *kn, long hint)
2650 {
2651 	/*
2652 	 * filesystem is gone, so set the EOF flag and schedule
2653 	 * the knote for deletion.
2654 	 */
2655 	if (hint == NOTE_REVOKE) {
2656 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2657 		return (1);
2658 	}
2659 
2660         kn->kn_data = 0;
2661         return (1);
2662 }
2663 
2664 int
2665 filt_tmpfsvnode(struct knote *kn, long hint)
2666 {
2667 	if (kn->kn_sfflags & hint)
2668 		kn->kn_fflags |= hint;
2669 	if (hint == NOTE_REVOKE) {
2670 		kn->kn_flags |= EV_EOF;
2671 		return (1);
2672 	}
2673 	return (kn->kn_fflags != 0);
2674 }
2675