xref: /dflybsd-src/sys/vfs/tmpfs/tmpfs_vnops.c (revision d4e390fc9a3878b804ce122fbda94892c11fa301)
1 /*-
2  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The NetBSD Foundation
6  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7  * 2005 program.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
46 #include <sys/stat.h>
47 #include <sys/systm.h>
48 #include <sys/sysctl.h>
49 #include <sys/unistd.h>
50 #include <sys/vfsops.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
59 #include <vm/vm_pager.h>
60 #include <vm/swap_pager.h>
61 
62 #include <sys/buf2.h>
63 #include <vm/vm_page2.h>
64 
65 #include <vfs/fifofs/fifo.h>
66 #include <vfs/tmpfs/tmpfs_vnops.h>
67 #include "tmpfs.h"
68 
69 static void tmpfs_strategy_done(struct bio *bio);
70 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst);
71 
72 static int tmpfs_cluster_enable = 1;
73 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem");
74 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_enable, CTLFLAG_RW,
75 		&tmpfs_cluster_enable, 0, "");
76 
77 static __inline
78 void
79 tmpfs_knote(struct vnode *vp, int flags)
80 {
81 	if (flags)
82 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
83 }
84 
85 
86 /* --------------------------------------------------------------------- */
87 
88 static int
89 tmpfs_nresolve(struct vop_nresolve_args *ap)
90 {
91 	struct vnode *dvp = ap->a_dvp;
92 	struct vnode *vp = NULL;
93 	struct namecache *ncp = ap->a_nch->ncp;
94 	struct tmpfs_node *tnode;
95 	struct tmpfs_dirent *de;
96 	struct tmpfs_node *dnode;
97 	int error;
98 
99 	dnode = VP_TO_TMPFS_DIR(dvp);
100 
101 	TMPFS_NODE_LOCK_SH(dnode);
102 	de = tmpfs_dir_lookup(dnode, NULL, ncp);
103 	if (de == NULL) {
104 		error = ENOENT;
105 	} else {
106 		/*
107 		 * Allocate a vnode for the node we found.
108 		 */
109 		tnode = de->td_node;
110 		error = tmpfs_alloc_vp(dvp->v_mount, tnode,
111 				       LK_EXCLUSIVE | LK_RETRY, &vp);
112 		if (error)
113 			goto out;
114 		KKASSERT(vp);
115 	}
116 
117 out:
118 	TMPFS_NODE_UNLOCK(dnode);
119 
120 	if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) {
121 		TMPFS_NODE_LOCK(dnode);
122 		dnode->tn_status |= TMPFS_NODE_ACCESSED;
123 		TMPFS_NODE_UNLOCK(dnode);
124 	}
125 
126 	/*
127 	 * Store the result of this lookup in the cache.  Avoid this if the
128 	 * request was for creation, as it does not improve timings on
129 	 * emprical tests.
130 	 */
131 	if (vp) {
132 		vn_unlock(vp);
133 		cache_setvp(ap->a_nch, vp);
134 		vrele(vp);
135 	} else if (error == ENOENT) {
136 		cache_setvp(ap->a_nch, NULL);
137 	}
138 	return (error);
139 }
140 
141 static int
142 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
143 {
144 	struct vnode *dvp = ap->a_dvp;
145 	struct vnode **vpp = ap->a_vpp;
146 	struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
147 	struct ucred *cred = ap->a_cred;
148 	int error;
149 
150 	*vpp = NULL;
151 
152 	/* Check accessibility of requested node as a first step. */
153 	error = VOP_ACCESS(dvp, VEXEC, cred);
154 	if (error != 0)
155 		return error;
156 
157 	if (dnode->tn_dir.tn_parent != NULL) {
158 		/* Allocate a new vnode on the matching entry. */
159 		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
160 				       LK_EXCLUSIVE | LK_RETRY, vpp);
161 
162 		if (*vpp)
163 			vn_unlock(*vpp);
164 	}
165 	return (*vpp == NULL) ? ENOENT : 0;
166 }
167 
168 /* --------------------------------------------------------------------- */
169 
170 static int
171 tmpfs_ncreate(struct vop_ncreate_args *ap)
172 {
173 	struct vnode *dvp = ap->a_dvp;
174 	struct vnode **vpp = ap->a_vpp;
175 	struct namecache *ncp = ap->a_nch->ncp;
176 	struct vattr *vap = ap->a_vap;
177 	struct ucred *cred = ap->a_cred;
178 	int error;
179 
180 	KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
181 
182 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
183 	if (error == 0) {
184 		cache_setunresolved(ap->a_nch);
185 		cache_setvp(ap->a_nch, *vpp);
186 		tmpfs_knote(dvp, NOTE_WRITE);
187 	}
188 	return (error);
189 }
190 /* --------------------------------------------------------------------- */
191 
192 static int
193 tmpfs_nmknod(struct vop_nmknod_args *ap)
194 {
195 	struct vnode *dvp = ap->a_dvp;
196 	struct vnode **vpp = ap->a_vpp;
197 	struct namecache *ncp = ap->a_nch->ncp;
198 	struct vattr *vap = ap->a_vap;
199 	struct ucred *cred = ap->a_cred;
200 	int error;
201 
202 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
203 	    vap->va_type != VFIFO) {
204 		return (EINVAL);
205 	}
206 
207 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
208 	if (error == 0) {
209 		cache_setunresolved(ap->a_nch);
210 		cache_setvp(ap->a_nch, *vpp);
211 		tmpfs_knote(dvp, NOTE_WRITE);
212 	}
213 	return error;
214 }
215 
216 /* --------------------------------------------------------------------- */
217 
218 static int
219 tmpfs_open(struct vop_open_args *ap)
220 {
221 	struct vnode *vp = ap->a_vp;
222 	int mode = ap->a_mode;
223 	struct tmpfs_node *node;
224 	int error;
225 
226 	node = VP_TO_TMPFS_NODE(vp);
227 
228 #if 0
229 	/* The file is still active but all its names have been removed
230 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
231 	 * it is about to die. */
232 	if (node->tn_links < 1)
233 		return (ENOENT);
234 #endif
235 
236 	/* If the file is marked append-only, deny write requests. */
237 	if ((node->tn_flags & APPEND) &&
238 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
239 		error = EPERM;
240 	} else {
241 		if (node->tn_reg.tn_pages_in_aobj) {
242 			TMPFS_NODE_LOCK(node);
243 			if (node->tn_reg.tn_pages_in_aobj) {
244 				tmpfs_move_pages(node->tn_reg.tn_aobj,
245 						 vp->v_object);
246 				node->tn_reg.tn_pages_in_aobj = 0;
247 			}
248 			TMPFS_NODE_UNLOCK(node);
249 		}
250 		error = vop_stdopen(ap);
251 	}
252 
253 	return (error);
254 }
255 
256 /* --------------------------------------------------------------------- */
257 
258 static int
259 tmpfs_close(struct vop_close_args *ap)
260 {
261 	struct vnode *vp = ap->a_vp;
262 	struct tmpfs_node *node;
263 	int error;
264 
265 	node = VP_TO_TMPFS_NODE(vp);
266 
267 	if (node->tn_links > 0) {
268 		/*
269 		 * Update node times.  No need to do it if the node has
270 		 * been deleted, because it will vanish after we return.
271 		 */
272 		tmpfs_update(vp);
273 	}
274 
275 	error = vop_stdclose(ap);
276 
277 	return (error);
278 }
279 
280 /* --------------------------------------------------------------------- */
281 
282 int
283 tmpfs_access(struct vop_access_args *ap)
284 {
285 	struct vnode *vp = ap->a_vp;
286 	int error;
287 	struct tmpfs_node *node;
288 
289 	node = VP_TO_TMPFS_NODE(vp);
290 
291 	switch (vp->v_type) {
292 	case VDIR:
293 		/* FALLTHROUGH */
294 	case VLNK:
295 		/* FALLTHROUGH */
296 	case VREG:
297 		if ((ap->a_mode & VWRITE) &&
298 	            (vp->v_mount->mnt_flag & MNT_RDONLY)) {
299 			error = EROFS;
300 			goto out;
301 		}
302 		break;
303 
304 	case VBLK:
305 		/* FALLTHROUGH */
306 	case VCHR:
307 		/* FALLTHROUGH */
308 	case VSOCK:
309 		/* FALLTHROUGH */
310 	case VFIFO:
311 		break;
312 
313 	default:
314 		error = EINVAL;
315 		goto out;
316 	}
317 
318 	if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
319 		error = EPERM;
320 		goto out;
321 	}
322 
323 	error = vop_helper_access(ap, node->tn_uid, node->tn_gid,
324 			          node->tn_mode, 0);
325 out:
326 	return error;
327 }
328 
329 /* --------------------------------------------------------------------- */
330 
331 int
332 tmpfs_getattr(struct vop_getattr_args *ap)
333 {
334 	struct vnode *vp = ap->a_vp;
335 	struct vattr *vap = ap->a_vap;
336 	struct tmpfs_node *node;
337 
338 	node = VP_TO_TMPFS_NODE(vp);
339 
340 	tmpfs_update(vp);
341 
342 	TMPFS_NODE_LOCK_SH(node);
343 	vap->va_type = vp->v_type;
344 	vap->va_mode = node->tn_mode;
345 	vap->va_nlink = node->tn_links;
346 	vap->va_uid = node->tn_uid;
347 	vap->va_gid = node->tn_gid;
348 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
349 	vap->va_fileid = node->tn_id;
350 	vap->va_size = node->tn_size;
351 	vap->va_blocksize = PAGE_SIZE;
352 	vap->va_atime.tv_sec = node->tn_atime;
353 	vap->va_atime.tv_nsec = node->tn_atimensec;
354 	vap->va_mtime.tv_sec = node->tn_mtime;
355 	vap->va_mtime.tv_nsec = node->tn_mtimensec;
356 	vap->va_ctime.tv_sec = node->tn_ctime;
357 	vap->va_ctime.tv_nsec = node->tn_ctimensec;
358 	vap->va_gen = node->tn_gen;
359 	vap->va_flags = node->tn_flags;
360 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
361 		vap->va_rmajor = umajor(node->tn_rdev);
362 		vap->va_rminor = uminor(node->tn_rdev);
363 	}
364 	vap->va_bytes = round_page(node->tn_size);
365 	vap->va_filerev = 0;
366 	TMPFS_NODE_UNLOCK(node);
367 
368 	return 0;
369 }
370 
371 /* --------------------------------------------------------------------- */
372 
373 int
374 tmpfs_setattr(struct vop_setattr_args *ap)
375 {
376 	struct vnode *vp = ap->a_vp;
377 	struct vattr *vap = ap->a_vap;
378 	struct ucred *cred = ap->a_cred;
379 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
380 	int error = 0;
381 	int kflags = 0;
382 
383 	TMPFS_NODE_LOCK(node);
384 	if (error == 0 && (vap->va_flags != VNOVAL)) {
385 		error = tmpfs_chflags(vp, vap->va_flags, cred);
386 		kflags |= NOTE_ATTRIB;
387 	}
388 
389 	if (error == 0 && (vap->va_size != VNOVAL)) {
390 		/* restore any saved pages before proceeding */
391 		if (node->tn_reg.tn_pages_in_aobj) {
392 			tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
393 			node->tn_reg.tn_pages_in_aobj = 0;
394 		}
395 		if (vap->va_size > node->tn_size)
396 			kflags |= NOTE_WRITE | NOTE_EXTEND;
397 		else
398 			kflags |= NOTE_WRITE;
399 		error = tmpfs_chsize(vp, vap->va_size, cred);
400 	}
401 
402 	if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
403 			   vap->va_gid != (gid_t)VNOVAL)) {
404 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
405 		kflags |= NOTE_ATTRIB;
406 	}
407 
408 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
409 		error = tmpfs_chmod(vp, vap->va_mode, cred);
410 		kflags |= NOTE_ATTRIB;
411 	}
412 
413 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
414 	    vap->va_atime.tv_nsec != VNOVAL) ||
415 	    (vap->va_mtime.tv_sec != VNOVAL &&
416 	    vap->va_mtime.tv_nsec != VNOVAL) )) {
417 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
418 				      vap->va_vaflags, cred);
419 		kflags |= NOTE_ATTRIB;
420 	}
421 
422 	/*
423 	 * Update the node times.  We give preference to the error codes
424 	 * generated by this function rather than the ones that may arise
425 	 * from tmpfs_update.
426 	 */
427 	tmpfs_update(vp);
428 	TMPFS_NODE_UNLOCK(node);
429 	tmpfs_knote(vp, kflags);
430 
431 	return (error);
432 }
433 
434 /* --------------------------------------------------------------------- */
435 
436 /*
437  * fsync is usually a NOP, but we must take action when unmounting or
438  * when recycling.
439  */
440 static int
441 tmpfs_fsync(struct vop_fsync_args *ap)
442 {
443 	struct tmpfs_node *node;
444 	struct vnode *vp = ap->a_vp;
445 
446 	node = VP_TO_TMPFS_NODE(vp);
447 
448 	tmpfs_update(vp);
449 	if (vp->v_type == VREG) {
450 		if (vp->v_flag & VRECLAIMED) {
451 			if (node->tn_links == 0)
452 				tmpfs_truncate(vp, 0);
453 			else
454 				vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
455 		}
456 	}
457 	return 0;
458 }
459 
460 /* --------------------------------------------------------------------- */
461 
462 static int
463 tmpfs_read(struct vop_read_args *ap)
464 {
465 	struct buf *bp;
466 	struct vnode *vp = ap->a_vp;
467 	struct uio *uio = ap->a_uio;
468 	struct tmpfs_node *node;
469 	off_t base_offset;
470 	size_t offset;
471 	size_t len;
472 	size_t resid;
473 	int error;
474 	int seqcount;
475 
476 	/*
477 	 * Check the basics
478 	 */
479 	if (uio->uio_offset < 0)
480 		return (EINVAL);
481 	if (vp->v_type != VREG)
482 		return (EINVAL);
483 
484 	/*
485 	 * Extract node, try to shortcut the operation through
486 	 * the VM page cache, allowing us to avoid buffer cache
487 	 * overheads.
488 	 */
489 	node = VP_TO_TMPFS_NODE(vp);
490         resid = uio->uio_resid;
491 	seqcount = ap->a_ioflag >> 16;
492         error = vop_helper_read_shortcut(ap);
493         if (error)
494                 return error;
495         if (uio->uio_resid == 0) {
496 		if (resid)
497 			goto finished;
498 		return error;
499 	}
500 
501 	/*
502 	 * restore any saved pages before proceeding
503 	 */
504 	if (node->tn_reg.tn_pages_in_aobj) {
505 		TMPFS_NODE_LOCK(node);
506 		if (node->tn_reg.tn_pages_in_aobj) {
507 			tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
508 			node->tn_reg.tn_pages_in_aobj = 0;
509 		}
510 		TMPFS_NODE_UNLOCK(node);
511 	}
512 
513 	/*
514 	 * Fall-through to our normal read code.
515 	 */
516 	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
517 		/*
518 		 * Use buffer cache I/O (via tmpfs_strategy)
519 		 */
520 		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
521 		base_offset = (off_t)uio->uio_offset - offset;
522 		bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, GETBLK_KVABIO);
523 		if (bp == NULL) {
524 			if (tmpfs_cluster_enable) {
525 				error = cluster_readx(vp, node->tn_size,
526 						     base_offset,
527 						     TMPFS_BLKSIZE,
528 						     B_NOTMETA | B_KVABIO,
529 						     uio->uio_resid,
530 						     seqcount * MAXBSIZE,
531 						     &bp);
532 			} else {
533 				error = bread_kvabio(vp, base_offset,
534 						     TMPFS_BLKSIZE, &bp);
535 			}
536 			if (error) {
537 				brelse(bp);
538 				kprintf("tmpfs_read bread error %d\n", error);
539 				break;
540 			}
541 
542 			/*
543 			 * tmpfs pretty much fiddles directly with the VM
544 			 * system, don't let it exhaust it or we won't play
545 			 * nice with other processes.
546 			 *
547 			 * Only do this if the VOP is coming from a normal
548 			 * read/write.  The VM system handles the case for
549 			 * UIO_NOCOPY.
550 			 */
551 			if (uio->uio_segflg != UIO_NOCOPY)
552 				vm_wait_nominal();
553 		}
554 		bp->b_flags |= B_CLUSTEROK;
555 		bkvasync(bp);
556 
557 		/*
558 		 * Figure out how many bytes we can actually copy this loop.
559 		 */
560 		len = TMPFS_BLKSIZE - offset;
561 		if (len > uio->uio_resid)
562 			len = uio->uio_resid;
563 		if (len > node->tn_size - uio->uio_offset)
564 			len = (size_t)(node->tn_size - uio->uio_offset);
565 
566 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
567 		bqrelse(bp);
568 		if (error) {
569 			kprintf("tmpfs_read uiomove error %d\n", error);
570 			break;
571 		}
572 	}
573 
574 finished:
575 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
576 		TMPFS_NODE_LOCK(node);
577 		node->tn_status |= TMPFS_NODE_ACCESSED;
578 		TMPFS_NODE_UNLOCK(node);
579 	}
580 	return (error);
581 }
582 
583 static int
584 tmpfs_write(struct vop_write_args *ap)
585 {
586 	struct buf *bp;
587 	struct vnode *vp = ap->a_vp;
588 	struct uio *uio = ap->a_uio;
589 	struct thread *td = uio->uio_td;
590 	struct tmpfs_node *node;
591 	boolean_t extended;
592 	off_t oldsize;
593 	int error;
594 	off_t base_offset;
595 	size_t offset;
596 	size_t len;
597 	struct rlimit limit;
598 	int trivial = 0;
599 	int kflags = 0;
600 	int seqcount;
601 
602 	error = 0;
603 	if (uio->uio_resid == 0) {
604 		return error;
605 	}
606 
607 	node = VP_TO_TMPFS_NODE(vp);
608 
609 	if (vp->v_type != VREG)
610 		return (EINVAL);
611 	seqcount = ap->a_ioflag >> 16;
612 
613 	TMPFS_NODE_LOCK(node);
614 
615 	/*
616 	 * restore any saved pages before proceeding
617 	 */
618 	if (node->tn_reg.tn_pages_in_aobj) {
619 		tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
620 		node->tn_reg.tn_pages_in_aobj = 0;
621 	}
622 
623 	oldsize = node->tn_size;
624 	if (ap->a_ioflag & IO_APPEND)
625 		uio->uio_offset = node->tn_size;
626 
627 	/*
628 	 * Check for illegal write offsets.
629 	 */
630 	if (uio->uio_offset + uio->uio_resid >
631 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
632 		error = EFBIG;
633 		goto done;
634 	}
635 
636 	/*
637 	 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
638 	 */
639 	if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
640 		error = kern_getrlimit(RLIMIT_FSIZE, &limit);
641 		if (error)
642 			goto done;
643 		if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
644 			ksignal(td->td_proc, SIGXFSZ);
645 			error = EFBIG;
646 			goto done;
647 		}
648 	}
649 
650 	/*
651 	 * Extend the file's size if necessary
652 	 */
653 	extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
654 
655 	while (uio->uio_resid > 0) {
656 		/*
657 		 * Don't completely blow out running buffer I/O
658 		 * when being hit from the pageout daemon.
659 		 */
660 		if (uio->uio_segflg == UIO_NOCOPY &&
661 		    (ap->a_ioflag & IO_RECURSE) == 0) {
662 			bwillwrite(TMPFS_BLKSIZE);
663 		}
664 
665 		/*
666 		 * Use buffer cache I/O (via tmpfs_strategy)
667 		 */
668 		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
669 		base_offset = (off_t)uio->uio_offset - offset;
670 		len = TMPFS_BLKSIZE - offset;
671 		if (len > uio->uio_resid)
672 			len = uio->uio_resid;
673 
674 		if ((uio->uio_offset + len) > node->tn_size) {
675 			trivial = (uio->uio_offset <= node->tn_size);
676 			error = tmpfs_reg_resize(vp, uio->uio_offset + len,
677 						 trivial);
678 			if (error)
679 				break;
680 		}
681 
682 		/*
683 		 * Read to fill in any gaps.  Theoretically we could
684 		 * optimize this if the write covers the entire buffer
685 		 * and is not a UIO_NOCOPY write, however this can lead
686 		 * to a security violation exposing random kernel memory
687 		 * (whatever junk was in the backing VM pages before).
688 		 *
689 		 * So just use bread() to do the right thing.
690 		 */
691 		error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp);
692 		bkvasync(bp);
693 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
694 		if (error) {
695 			kprintf("tmpfs_write uiomove error %d\n", error);
696 			brelse(bp);
697 			break;
698 		}
699 
700 		if (uio->uio_offset > node->tn_size) {
701 			node->tn_size = uio->uio_offset;
702 			kflags |= NOTE_EXTEND;
703 		}
704 		kflags |= NOTE_WRITE;
705 
706 		/*
707 		 * Always try to flush the page in the UIO_NOCOPY case.  This
708 		 * can come from the pageout daemon or during vnode eviction.
709 		 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
710 		 *
711 		 * For the normal case we buwrite(), dirtying the underlying
712 		 * VM pages instead of dirtying the buffer and releasing the
713 		 * buffer as a clean buffer.  This allows tmpfs to use
714 		 * essentially all available memory to cache file data.
715 		 * If we used bdwrite() the buffer cache would wind up
716 		 * flushing the data to swap too quickly.
717 		 *
718 		 * But because tmpfs can seriously load the VM system we
719 		 * fall-back to using bdwrite() when free memory starts
720 		 * to get low.  This shifts the load away from the VM system
721 		 * and makes tmpfs act more like a normal filesystem with
722 		 * regards to disk activity.
723 		 *
724 		 * tmpfs pretty much fiddles directly with the VM
725 		 * system, don't let it exhaust it or we won't play
726 		 * nice with other processes.  Only do this if the
727 		 * VOP is coming from a normal read/write.  The VM system
728 		 * handles the case for UIO_NOCOPY.
729 		 */
730 		bp->b_flags |= B_CLUSTEROK;
731 		if (uio->uio_segflg == UIO_NOCOPY) {
732 			/*
733 			 * Flush from the pageout daemon, deal with
734 			 * potentially very heavy tmpfs write activity
735 			 * causing long stalls in the pageout daemon
736 			 * before pages get to free/cache.
737 			 *
738 			 * (a) Under severe pressure setting B_DIRECT will
739 			 *     cause a buffer release to try to free the
740 			 *     underlying pages.
741 			 *
742 			 * (b) Under modest memory pressure the B_RELBUF
743 			 *     alone is sufficient to get the pages moved
744 			 *     to the cache.  We could also force this by
745 			 *     setting B_NOTMETA but that might have other
746 			 *     unintended side-effects (e.g. setting
747 			 *     PG_NOTMETA on the VM page).
748 			 *
749 			 * Hopefully this will unblock the VM system more
750 			 * quickly under extreme tmpfs write load.
751 			 */
752 			if (vm_page_count_min(vm_page_free_hysteresis))
753 				bp->b_flags |= B_DIRECT;
754 			bp->b_flags |= B_AGE | B_RELBUF;
755 			bp->b_act_count = 0;	/* buffer->deactivate pgs */
756 			cluster_awrite(bp);
757 		} else if (vm_page_count_target()) {
758 			/*
759 			 * Normal (userland) write but we are low on memory,
760 			 * run the buffer the buffer cache.
761 			 */
762 			bp->b_act_count = 0;	/* buffer->deactivate pgs */
763 			bdwrite(bp);
764 		} else {
765 			/*
766 			 * Otherwise run the buffer directly through to the
767 			 * backing VM store.
768 			 */
769 			buwrite(bp);
770 			/*vm_wait_nominal();*/
771 		}
772 
773 		if (bp->b_error) {
774 			kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
775 			break;
776 		}
777 	}
778 
779 	if (error) {
780 		if (extended) {
781 			(void)tmpfs_reg_resize(vp, oldsize, trivial);
782 			kflags &= ~NOTE_EXTEND;
783 		}
784 		goto done;
785 	}
786 
787 	/*
788 	 * Currently we don't set the mtime on files modified via mmap()
789 	 * because we can't tell the difference between those modifications
790 	 * and an attempt by the pageout daemon to flush tmpfs pages to
791 	 * swap.
792 	 *
793 	 * This is because in order to defer flushes as long as possible
794 	 * buwrite() works by marking the underlying VM pages dirty in
795 	 * order to be able to dispose of the buffer cache buffer without
796 	 * flushing it.
797 	 */
798 	if (uio->uio_segflg == UIO_NOCOPY) {
799 		if (vp->v_flag & VLASTWRITETS) {
800 			node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
801 			node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
802 		}
803 	} else {
804 		node->tn_status |= TMPFS_NODE_MODIFIED;
805 		vclrflags(vp, VLASTWRITETS);
806 	}
807 
808 	if (extended)
809 		node->tn_status |= TMPFS_NODE_CHANGED;
810 
811 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
812 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
813 			node->tn_mode &= ~(S_ISUID | S_ISGID);
814 	}
815 done:
816 	TMPFS_NODE_UNLOCK(node);
817 	if (kflags)
818 		tmpfs_knote(vp, kflags);
819 
820 	return(error);
821 }
822 
823 static int
824 tmpfs_advlock(struct vop_advlock_args *ap)
825 {
826 	struct tmpfs_node *node;
827 	struct vnode *vp = ap->a_vp;
828 	int error;
829 
830 	node = VP_TO_TMPFS_NODE(vp);
831 	error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
832 
833 	return (error);
834 }
835 
836 /*
837  * The strategy function is typically only called when memory pressure
838  * forces the system to attempt to pageout pages.  It can also be called
839  * by [n]vtruncbuf() when a truncation cuts a page in half.  Normal write
840  * operations
841  *
842  * We set VKVABIO for VREG files so bp->b_data may not be synchronized to
843  * our cpu.  swap_pager_strategy() is all we really use, and it directly
844  * supports this.
845  */
846 static int
847 tmpfs_strategy(struct vop_strategy_args *ap)
848 {
849 	struct bio *bio = ap->a_bio;
850 	struct bio *nbio;
851 	struct buf *bp = bio->bio_buf;
852 	struct vnode *vp = ap->a_vp;
853 	struct tmpfs_node *node;
854 	vm_object_t uobj;
855 	vm_page_t m;
856 	int i;
857 
858 	if (vp->v_type != VREG) {
859 		bp->b_resid = bp->b_bcount;
860 		bp->b_flags |= B_ERROR | B_INVAL;
861 		bp->b_error = EINVAL;
862 		biodone(bio);
863 		return(0);
864 	}
865 
866 	node = VP_TO_TMPFS_NODE(vp);
867 
868 	uobj = node->tn_reg.tn_aobj;
869 
870 	/*
871 	 * Don't bother flushing to swap if there is no swap, just
872 	 * ensure that the pages are marked as needing a commit (still).
873 	 */
874 	if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
875 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
876 			m = bp->b_xio.xio_pages[i];
877 			vm_page_need_commit(m);
878 		}
879 		bp->b_resid = 0;
880 		bp->b_error = 0;
881 		biodone(bio);
882 	} else {
883 		nbio = push_bio(bio);
884 		nbio->bio_done = tmpfs_strategy_done;
885 		nbio->bio_offset = bio->bio_offset;
886 		swap_pager_strategy(uobj, nbio);
887 	}
888 	return 0;
889 }
890 
891 /*
892  * If we were unable to commit the pages to swap make sure they are marked
893  * as needing a commit (again).  If we were, clear the flag to allow the
894  * pages to be freed.
895  *
896  * Do not error-out the buffer.  In particular, vinvalbuf() needs to
897  * always work.
898  */
899 static void
900 tmpfs_strategy_done(struct bio *bio)
901 {
902 	struct buf *bp;
903 	vm_page_t m;
904 	int i;
905 
906 	bp = bio->bio_buf;
907 
908 	if (bp->b_flags & B_ERROR) {
909 		bp->b_flags &= ~B_ERROR;
910 		bp->b_error = 0;
911 		bp->b_resid = 0;
912 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
913 			m = bp->b_xio.xio_pages[i];
914 			vm_page_need_commit(m);
915 		}
916 	} else {
917 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
918 			m = bp->b_xio.xio_pages[i];
919 			vm_page_clear_commit(m);
920 		}
921 	}
922 	bio = pop_bio(bio);
923 	biodone(bio);
924 }
925 
926 static int
927 tmpfs_bmap(struct vop_bmap_args *ap)
928 {
929 	if (ap->a_doffsetp != NULL)
930 		*ap->a_doffsetp = ap->a_loffset;
931 	if (ap->a_runp != NULL)
932 		*ap->a_runp = 0;
933 	if (ap->a_runb != NULL)
934 		*ap->a_runb = 0;
935 
936 	return 0;
937 }
938 
939 /* --------------------------------------------------------------------- */
940 
941 static int
942 tmpfs_nremove(struct vop_nremove_args *ap)
943 {
944 	struct vnode *dvp = ap->a_dvp;
945 	struct namecache *ncp = ap->a_nch->ncp;
946 	struct vnode *vp;
947 	int error;
948 	struct tmpfs_dirent *de;
949 	struct tmpfs_mount *tmp;
950 	struct tmpfs_node *dnode;
951 	struct tmpfs_node *node;
952 
953 	/*
954 	 * We have to acquire the vp from ap->a_nch because we will likely
955 	 * unresolve the namecache entry, and a vrele/vput is needed to
956 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
957 	 *
958 	 * We have to use vget to clear any inactive state on the vnode,
959 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
960 	 * will not get called when we release it.
961 	 */
962 	error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
963 	KKASSERT(vp->v_mount == dvp->v_mount);
964 	KKASSERT(error == 0);
965 	vn_unlock(vp);
966 
967 	if (vp->v_type == VDIR) {
968 		error = EISDIR;
969 		goto out2;
970 	}
971 
972 	dnode = VP_TO_TMPFS_DIR(dvp);
973 	node = VP_TO_TMPFS_NODE(vp);
974 	tmp = VFS_TO_TMPFS(vp->v_mount);
975 
976 	TMPFS_NODE_LOCK(dnode);
977 	de = tmpfs_dir_lookup(dnode, node, ncp);
978 	if (de == NULL) {
979 		error = ENOENT;
980 		goto out;
981 	}
982 
983 	/* Files marked as immutable or append-only cannot be deleted. */
984 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
985 	    (dnode->tn_flags & APPEND)) {
986 		error = EPERM;
987 		goto out;
988 	}
989 
990 	/* Remove the entry from the directory; as it is a file, we do not
991 	 * have to change the number of hard links of the directory. */
992 	tmpfs_dir_detach(dnode, de);
993 
994 	/* Free the directory entry we just deleted.  Note that the node
995 	 * referred by it will not be removed until the vnode is really
996 	 * reclaimed. */
997 	tmpfs_free_dirent(tmp, de);
998 
999 	if (node->tn_links > 0) {
1000 	        TMPFS_NODE_LOCK(node);
1001 		node->tn_status |= TMPFS_NODE_CHANGED;
1002 	        TMPFS_NODE_UNLOCK(node);
1003 	}
1004 
1005 	cache_unlink(ap->a_nch);
1006 	tmpfs_knote(vp, NOTE_DELETE);
1007 	error = 0;
1008 
1009 out:
1010 	TMPFS_NODE_UNLOCK(dnode);
1011 	if (error == 0)
1012 		tmpfs_knote(dvp, NOTE_WRITE);
1013 out2:
1014 	vrele(vp);
1015 
1016 	return error;
1017 }
1018 
1019 /* --------------------------------------------------------------------- */
1020 
1021 static int
1022 tmpfs_nlink(struct vop_nlink_args *ap)
1023 {
1024 	struct vnode *dvp = ap->a_dvp;
1025 	struct vnode *vp = ap->a_vp;
1026 	struct namecache *ncp = ap->a_nch->ncp;
1027 	struct tmpfs_dirent *de;
1028 	struct tmpfs_node *node;
1029 	struct tmpfs_node *dnode;
1030 	int error;
1031 
1032 	KKASSERT(dvp != vp); /* XXX When can this be false? */
1033 
1034 	node = VP_TO_TMPFS_NODE(vp);
1035 	dnode = VP_TO_TMPFS_NODE(dvp);
1036 	TMPFS_NODE_LOCK(dnode);
1037 
1038 	/* XXX: Why aren't the following two tests done by the caller? */
1039 
1040 	/* Hard links of directories are forbidden. */
1041 	if (vp->v_type == VDIR) {
1042 		error = EPERM;
1043 		goto out;
1044 	}
1045 
1046 	/* Cannot create cross-device links. */
1047 	if (dvp->v_mount != vp->v_mount) {
1048 		error = EXDEV;
1049 		goto out;
1050 	}
1051 
1052 	/* Ensure that we do not overflow the maximum number of links imposed
1053 	 * by the system. */
1054 	KKASSERT(node->tn_links <= LINK_MAX);
1055 	if (node->tn_links >= LINK_MAX) {
1056 		error = EMLINK;
1057 		goto out;
1058 	}
1059 
1060 	/* We cannot create links of files marked immutable or append-only. */
1061 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
1062 		error = EPERM;
1063 		goto out;
1064 	}
1065 
1066 	/* Allocate a new directory entry to represent the node. */
1067 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
1068 				   ncp->nc_name, ncp->nc_nlen, &de);
1069 	if (error != 0)
1070 		goto out;
1071 
1072 	/* Insert the new directory entry into the appropriate directory. */
1073 	tmpfs_dir_attach(dnode, de);
1074 
1075 	/* vp link count has changed, so update node times. */
1076 
1077 	TMPFS_NODE_LOCK(node);
1078 	node->tn_status |= TMPFS_NODE_CHANGED;
1079 	TMPFS_NODE_UNLOCK(node);
1080 	tmpfs_update(vp);
1081 
1082 	tmpfs_knote(vp, NOTE_LINK);
1083 	cache_setunresolved(ap->a_nch);
1084 	cache_setvp(ap->a_nch, vp);
1085 	error = 0;
1086 
1087 out:
1088 	TMPFS_NODE_UNLOCK(dnode);
1089 	if (error == 0)
1090 		tmpfs_knote(dvp, NOTE_WRITE);
1091 	return error;
1092 }
1093 
1094 /* --------------------------------------------------------------------- */
1095 
1096 static int
1097 tmpfs_nrename(struct vop_nrename_args *ap)
1098 {
1099 	struct vnode *fdvp = ap->a_fdvp;
1100 	struct namecache *fncp = ap->a_fnch->ncp;
1101 	struct vnode *fvp = fncp->nc_vp;
1102 	struct vnode *tdvp = ap->a_tdvp;
1103 	struct namecache *tncp = ap->a_tnch->ncp;
1104 	struct vnode *tvp;
1105 	struct tmpfs_dirent *de, *tde;
1106 	struct tmpfs_mount *tmp;
1107 	struct tmpfs_node *fdnode;
1108 	struct tmpfs_node *fnode;
1109 	struct tmpfs_node *tnode;
1110 	struct tmpfs_node *tdnode;
1111 	char *newname;
1112 	char *oldname;
1113 	int error;
1114 
1115 	KKASSERT(fdvp->v_mount == fvp->v_mount);
1116 
1117 	/*
1118 	 * Because tvp can get overwritten we have to vget it instead of
1119 	 * just vref or use it, otherwise it's VINACTIVE flag may not get
1120 	 * cleared and the node won't get destroyed.
1121 	 */
1122 	error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
1123 	if (error == 0) {
1124 		tnode = VP_TO_TMPFS_NODE(tvp);
1125 		vn_unlock(tvp);
1126 	} else {
1127 		tnode = NULL;
1128 	}
1129 
1130 	/* Disallow cross-device renames.
1131 	 * XXX Why isn't this done by the caller? */
1132 	if (fvp->v_mount != tdvp->v_mount ||
1133 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1134 		error = EXDEV;
1135 		goto out;
1136 	}
1137 
1138 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1139 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1140 
1141 	/* If source and target are the same file, there is nothing to do. */
1142 	if (fvp == tvp) {
1143 		error = 0;
1144 		goto out;
1145 	}
1146 
1147 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1148 	fnode = VP_TO_TMPFS_NODE(fvp);
1149 	TMPFS_NODE_LOCK(fdnode);
1150 	de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1151 	TMPFS_NODE_UNLOCK(fdnode);	/* XXX depend on namecache lock */
1152 
1153 	/* Avoid manipulating '.' and '..' entries. */
1154 	if (de == NULL) {
1155 		error = ENOENT;
1156 		goto out_locked;
1157 	}
1158 	KKASSERT(de->td_node == fnode);
1159 
1160 	/*
1161 	 * If replacing an entry in the target directory and that entry
1162 	 * is a directory, it must be empty.
1163 	 *
1164 	 * Kern_rename gurantees the destination to be a directory
1165 	 * if the source is one (it does?).
1166 	 */
1167 	if (tvp != NULL) {
1168 		KKASSERT(tnode != NULL);
1169 
1170 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1171 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1172 			error = EPERM;
1173 			goto out_locked;
1174 		}
1175 
1176 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1177 			if (tnode->tn_size > 0) {
1178 				error = ENOTEMPTY;
1179 				goto out_locked;
1180 			}
1181 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1182 			error = ENOTDIR;
1183 			goto out_locked;
1184 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1185 			error = EISDIR;
1186 			goto out_locked;
1187 		} else {
1188 			KKASSERT(fnode->tn_type != VDIR &&
1189 				tnode->tn_type != VDIR);
1190 		}
1191 	}
1192 
1193 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1194 	    (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1195 		error = EPERM;
1196 		goto out_locked;
1197 	}
1198 
1199 	/*
1200 	 * Ensure that we have enough memory to hold the new name, if it
1201 	 * has to be changed.
1202 	 */
1203 	if (fncp->nc_nlen != tncp->nc_nlen ||
1204 	    bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1205 		newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1206 				  M_WAITOK | M_NULLOK);
1207 		if (newname == NULL) {
1208 			error = ENOSPC;
1209 			goto out_locked;
1210 		}
1211 		bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1212 		newname[tncp->nc_nlen] = '\0';
1213 	} else {
1214 		newname = NULL;
1215 	}
1216 
1217 	/*
1218 	 * Unlink entry from source directory.  Note that the kernel has
1219 	 * already checked for illegal recursion cases (renaming a directory
1220 	 * into a subdirectory of itself).
1221 	 */
1222 	if (fdnode != tdnode) {
1223 		tmpfs_dir_detach(fdnode, de);
1224 	} else {
1225 		/* XXX depend on namecache lock */
1226 		TMPFS_NODE_LOCK(fdnode);
1227 		KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp));
1228 		RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1229 		RB_REMOVE(tmpfs_dirtree_cookie,
1230 			  &fdnode->tn_dir.tn_cookietree, de);
1231 		TMPFS_NODE_UNLOCK(fdnode);
1232 	}
1233 
1234 	/*
1235 	 * Handle any name change.  Swap with newname, we will
1236 	 * deallocate it at the end.
1237 	 */
1238 	if (newname != NULL) {
1239 #if 0
1240 		TMPFS_NODE_LOCK(fnode);
1241 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1242 		TMPFS_NODE_UNLOCK(fnode);
1243 #endif
1244 		oldname = de->td_name;
1245 		de->td_name = newname;
1246 		de->td_namelen = (uint16_t)tncp->nc_nlen;
1247 		newname = oldname;
1248 	}
1249 
1250 	/*
1251 	 * If we are overwriting an entry, we have to remove the old one
1252 	 * from the target directory.
1253 	 */
1254 	if (tvp != NULL) {
1255 		/* Remove the old entry from the target directory. */
1256 		TMPFS_NODE_LOCK(tdnode);
1257 		tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1258 		tmpfs_dir_detach(tdnode, tde);
1259 		TMPFS_NODE_UNLOCK(tdnode);
1260 		tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1261 
1262 		/*
1263 		 * Free the directory entry we just deleted.  Note that the
1264 		 * node referred by it will not be removed until the vnode is
1265 		 * really reclaimed.
1266 		 */
1267 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1268 		/*cache_inval_vp(tvp, CINV_DESTROY);*/
1269 	}
1270 
1271 	/*
1272 	 * Link entry to target directory.  If the entry
1273 	 * represents a directory move the parent linkage
1274 	 * as well.
1275 	 */
1276 	if (fdnode != tdnode) {
1277 		if (de->td_node->tn_type == VDIR) {
1278 			TMPFS_VALIDATE_DIR(fnode);
1279 		}
1280 		tmpfs_dir_attach(tdnode, de);
1281 	} else {
1282 		TMPFS_NODE_LOCK(tdnode);
1283 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1284 		RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1285 		RB_INSERT(tmpfs_dirtree_cookie,
1286 			  &tdnode->tn_dir.tn_cookietree, de);
1287 		TMPFS_NODE_UNLOCK(tdnode);
1288 	}
1289 
1290 	/*
1291 	 * Finish up
1292 	 */
1293 	if (newname) {
1294 		kfree(newname, tmp->tm_name_zone);
1295 		newname = NULL;
1296 	}
1297 	cache_rename(ap->a_fnch, ap->a_tnch);
1298 	tmpfs_knote(ap->a_fdvp, NOTE_WRITE);
1299 	tmpfs_knote(ap->a_tdvp, NOTE_WRITE);
1300 	if (fnode->tn_vnode)
1301 		tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1302 	error = 0;
1303 
1304 out_locked:
1305 	;
1306 out:
1307 	if (tvp)
1308 		vrele(tvp);
1309 	return error;
1310 }
1311 
1312 /* --------------------------------------------------------------------- */
1313 
1314 static int
1315 tmpfs_nmkdir(struct vop_nmkdir_args *ap)
1316 {
1317 	struct vnode *dvp = ap->a_dvp;
1318 	struct vnode **vpp = ap->a_vpp;
1319 	struct namecache *ncp = ap->a_nch->ncp;
1320 	struct vattr *vap = ap->a_vap;
1321 	struct ucred *cred = ap->a_cred;
1322 	int error;
1323 
1324 	KKASSERT(vap->va_type == VDIR);
1325 
1326 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1327 	if (error == 0) {
1328 		cache_setunresolved(ap->a_nch);
1329 		cache_setvp(ap->a_nch, *vpp);
1330 		tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1331 	}
1332 	return error;
1333 }
1334 
1335 /* --------------------------------------------------------------------- */
1336 
1337 static int
1338 tmpfs_nrmdir(struct vop_nrmdir_args *ap)
1339 {
1340 	struct vnode *dvp = ap->a_dvp;
1341 	struct namecache *ncp = ap->a_nch->ncp;
1342 	struct vnode *vp;
1343 	struct tmpfs_dirent *de;
1344 	struct tmpfs_mount *tmp;
1345 	struct tmpfs_node *dnode;
1346 	struct tmpfs_node *node;
1347 	int error;
1348 
1349 	/*
1350 	 * We have to acquire the vp from ap->a_nch because we will likely
1351 	 * unresolve the namecache entry, and a vrele/vput is needed to
1352 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1353 	 *
1354 	 * We have to use vget to clear any inactive state on the vnode,
1355 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1356 	 * will not get called when we release it.
1357 	 */
1358 	error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
1359 	KKASSERT(error == 0);
1360 	vn_unlock(vp);
1361 
1362 	/*
1363 	 * Prevalidate so we don't hit an assertion later
1364 	 */
1365 	if (vp->v_type != VDIR) {
1366 		error = ENOTDIR;
1367 		goto out;
1368 	}
1369 
1370 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1371 	dnode = VP_TO_TMPFS_DIR(dvp);
1372 	node = VP_TO_TMPFS_DIR(vp);
1373 
1374 	/*
1375 	 * Directories with more than two entries ('.' and '..') cannot
1376 	 * be removed.
1377 	 */
1378 	if (node->tn_size > 0) {
1379 		error = ENOTEMPTY;
1380 		goto out;
1381 	}
1382 
1383 	if ((dnode->tn_flags & APPEND)
1384 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1385 		error = EPERM;
1386 		goto out;
1387 	}
1388 
1389 	/*
1390 	 * This invariant holds only if we are not trying to
1391 	 * remove "..".  We checked for that above so this is safe now.
1392 	 */
1393 	KKASSERT(node->tn_dir.tn_parent == dnode);
1394 
1395 	/*
1396 	 * Get the directory entry associated with node (vp).  This
1397 	 * was filled by tmpfs_lookup while looking up the entry.
1398 	 */
1399 	TMPFS_NODE_LOCK(dnode);
1400 	de = tmpfs_dir_lookup(dnode, node, ncp);
1401 	KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen));
1402 
1403 	/* Check flags to see if we are allowed to remove the directory. */
1404 	if ((dnode->tn_flags & APPEND) ||
1405 	    node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1406 		error = EPERM;
1407 		TMPFS_NODE_UNLOCK(dnode);
1408 		goto out;
1409 	}
1410 
1411 	/* Detach the directory entry from the directory (dnode). */
1412 	tmpfs_dir_detach(dnode, de);
1413 	TMPFS_NODE_UNLOCK(dnode);
1414 
1415 	/* No vnode should be allocated for this entry from this point */
1416 	TMPFS_NODE_LOCK(dnode);
1417 	TMPFS_ASSERT_ELOCKED(dnode);
1418 	TMPFS_NODE_LOCK(node);
1419 	TMPFS_ASSERT_ELOCKED(node);
1420 
1421 	/*
1422 	 * Must set parent linkage to NULL (tested by ncreate to disallow
1423 	 * the creation of new files/dirs in a deleted directory)
1424 	 */
1425 	node->tn_status |= TMPFS_NODE_CHANGED;
1426 
1427 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1428 			    TMPFS_NODE_MODIFIED;
1429 
1430 	TMPFS_NODE_UNLOCK(node);
1431 	TMPFS_NODE_UNLOCK(dnode);
1432 
1433 	/* Free the directory entry we just deleted.  Note that the node
1434 	 * referred by it will not be removed until the vnode is really
1435 	 * reclaimed. */
1436 	tmpfs_free_dirent(tmp, de);
1437 
1438 	/* Release the deleted vnode (will destroy the node, notify
1439 	 * interested parties and clean it from the cache). */
1440 
1441 	TMPFS_NODE_LOCK(dnode);
1442 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1443 	TMPFS_NODE_UNLOCK(dnode);
1444 	tmpfs_update(dvp);
1445 
1446 	cache_unlink(ap->a_nch);
1447 	tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1448 	error = 0;
1449 
1450 out:
1451 	vrele(vp);
1452 
1453 	return error;
1454 }
1455 
1456 /* --------------------------------------------------------------------- */
1457 
1458 static int
1459 tmpfs_nsymlink(struct vop_nsymlink_args *ap)
1460 {
1461 	struct vnode *dvp = ap->a_dvp;
1462 	struct vnode **vpp = ap->a_vpp;
1463 	struct namecache *ncp = ap->a_nch->ncp;
1464 	struct vattr *vap = ap->a_vap;
1465 	struct ucred *cred = ap->a_cred;
1466 	char *target = ap->a_target;
1467 	int error;
1468 
1469 	vap->va_type = VLNK;
1470 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1471 	if (error == 0) {
1472 		tmpfs_knote(*vpp, NOTE_WRITE);
1473 		cache_setunresolved(ap->a_nch);
1474 		cache_setvp(ap->a_nch, *vpp);
1475 	}
1476 	return error;
1477 }
1478 
1479 /* --------------------------------------------------------------------- */
1480 
1481 static int
1482 tmpfs_readdir(struct vop_readdir_args *ap)
1483 {
1484 	struct vnode *vp = ap->a_vp;
1485 	struct uio *uio = ap->a_uio;
1486 	int *eofflag = ap->a_eofflag;
1487 	off_t **cookies = ap->a_cookies;
1488 	int *ncookies = ap->a_ncookies;
1489 	struct tmpfs_mount *tmp;
1490 	int error;
1491 	off_t startoff;
1492 	off_t cnt = 0;
1493 	struct tmpfs_node *node;
1494 
1495 	/* This operation only makes sense on directory nodes. */
1496 	if (vp->v_type != VDIR) {
1497 		return ENOTDIR;
1498 	}
1499 
1500 	tmp = VFS_TO_TMPFS(vp->v_mount);
1501 	node = VP_TO_TMPFS_DIR(vp);
1502 	startoff = uio->uio_offset;
1503 
1504 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1505 		error = tmpfs_dir_getdotdent(node, uio);
1506 		if (error != 0) {
1507 			TMPFS_NODE_LOCK_SH(node);
1508 			goto outok;
1509 		}
1510 		cnt++;
1511 	}
1512 
1513 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1514 		/* may lock parent, cannot hold node lock */
1515 		error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1516 		if (error != 0) {
1517 			TMPFS_NODE_LOCK_SH(node);
1518 			goto outok;
1519 		}
1520 		cnt++;
1521 	}
1522 
1523 	TMPFS_NODE_LOCK_SH(node);
1524 	error = tmpfs_dir_getdents(node, uio, &cnt);
1525 
1526 outok:
1527 	KKASSERT(error >= -1);
1528 
1529 	if (error == -1)
1530 		error = 0;
1531 
1532 	if (eofflag != NULL)
1533 		*eofflag =
1534 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1535 
1536 	/* Update NFS-related variables. */
1537 	if (error == 0 && cookies != NULL && ncookies != NULL) {
1538 		off_t i;
1539 		off_t off = startoff;
1540 		struct tmpfs_dirent *de = NULL;
1541 
1542 		*ncookies = cnt;
1543 		*cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1544 
1545 		for (i = 0; i < cnt; i++) {
1546 			KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1547 			if (off == TMPFS_DIRCOOKIE_DOT) {
1548 				off = TMPFS_DIRCOOKIE_DOTDOT;
1549 			} else {
1550 				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1551 					de = RB_MIN(tmpfs_dirtree_cookie,
1552 						&node->tn_dir.tn_cookietree);
1553 				} else if (de != NULL) {
1554 					de = RB_NEXT(tmpfs_dirtree_cookie,
1555 					       &node->tn_dir.tn_cookietree, de);
1556 				} else {
1557 					de = tmpfs_dir_lookupbycookie(node,
1558 								      off);
1559 					KKASSERT(de != NULL);
1560 					de = RB_NEXT(tmpfs_dirtree_cookie,
1561 					       &node->tn_dir.tn_cookietree, de);
1562 				}
1563 				if (de == NULL)
1564 					off = TMPFS_DIRCOOKIE_EOF;
1565 				else
1566 					off = tmpfs_dircookie(de);
1567 			}
1568 			(*cookies)[i] = off;
1569 		}
1570 		KKASSERT(uio->uio_offset == off);
1571 	}
1572 	TMPFS_NODE_UNLOCK(node);
1573 
1574 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1575 		TMPFS_NODE_LOCK(node);
1576 		node->tn_status |= TMPFS_NODE_ACCESSED;
1577 		TMPFS_NODE_UNLOCK(node);
1578 	}
1579 	return error;
1580 }
1581 
1582 /* --------------------------------------------------------------------- */
1583 
1584 static int
1585 tmpfs_readlink(struct vop_readlink_args *ap)
1586 {
1587 	struct vnode *vp = ap->a_vp;
1588 	struct uio *uio = ap->a_uio;
1589 	int error;
1590 	struct tmpfs_node *node;
1591 
1592 	KKASSERT(uio->uio_offset == 0);
1593 	KKASSERT(vp->v_type == VLNK);
1594 
1595 	node = VP_TO_TMPFS_NODE(vp);
1596 	TMPFS_NODE_LOCK_SH(node);
1597 	error = uiomove(node->tn_link,
1598 			MIN(node->tn_size, uio->uio_resid), uio);
1599 	TMPFS_NODE_UNLOCK(node);
1600 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1601 		TMPFS_NODE_LOCK(node);
1602 		node->tn_status |= TMPFS_NODE_ACCESSED;
1603 		TMPFS_NODE_UNLOCK(node);
1604 	}
1605 	return error;
1606 }
1607 
1608 /* --------------------------------------------------------------------- */
1609 
1610 static int
1611 tmpfs_inactive(struct vop_inactive_args *ap)
1612 {
1613 	struct vnode *vp = ap->a_vp;
1614 	struct tmpfs_node *node;
1615 	struct mount *mp;
1616 
1617 	mp = vp->v_mount;
1618 	lwkt_gettoken(&mp->mnt_token);
1619 	node = VP_TO_TMPFS_NODE(vp);
1620 
1621 	/*
1622 	 * Degenerate case
1623 	 */
1624 	if (node == NULL) {
1625 		vrecycle(vp);
1626 		lwkt_reltoken(&mp->mnt_token);
1627 		return(0);
1628 	}
1629 
1630 	/*
1631 	 * Get rid of unreferenced deleted vnodes sooner rather than
1632 	 * later so the data memory can be recovered immediately.
1633 	 *
1634 	 * We must truncate the vnode to prevent the normal reclamation
1635 	 * path from flushing the data for the removed file to disk.
1636 	 */
1637 	TMPFS_NODE_LOCK(node);
1638 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1639 	    node->tn_links == 0)
1640 	{
1641 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1642 		TMPFS_NODE_UNLOCK(node);
1643 		if (node->tn_type == VREG)
1644 			tmpfs_truncate(vp, 0);
1645 		vrecycle(vp);
1646 	} else {
1647 		/*
1648 		 * We must retain any VM pages belonging to the vnode's
1649 		 * object as the vnode will destroy the object during a
1650 		 * later reclaim.  We call vinvalbuf(V_SAVE) to clean
1651 		 * out the buffer cache.
1652 		 *
1653 		 * On DragonFlyBSD, vnodes are not immediately deactivated
1654 		 * on the 1->0 refs, so this is a relatively optimal
1655 		 * operation.  We have to do this in tmpfs_inactive()
1656 		 * because the pages will have already been thrown away
1657 		 * at the time tmpfs_reclaim() is called.
1658 		 */
1659 		if (node->tn_type == VREG &&
1660 		    node->tn_reg.tn_pages_in_aobj == 0) {
1661 			vinvalbuf(vp, V_SAVE, 0, 0);
1662 			KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree));
1663 			KKASSERT(RB_EMPTY(&vp->v_rbclean_tree));
1664 			tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj);
1665 			node->tn_reg.tn_pages_in_aobj = 1;
1666 		}
1667 
1668 		TMPFS_NODE_UNLOCK(node);
1669 	}
1670 	lwkt_reltoken(&mp->mnt_token);
1671 
1672 	return 0;
1673 }
1674 
1675 /* --------------------------------------------------------------------- */
1676 
1677 int
1678 tmpfs_reclaim(struct vop_reclaim_args *ap)
1679 {
1680 	struct vnode *vp = ap->a_vp;
1681 	struct tmpfs_mount *tmp;
1682 	struct tmpfs_node *node;
1683 	struct mount *mp;
1684 
1685 	mp = vp->v_mount;
1686 	lwkt_gettoken(&mp->mnt_token);
1687 
1688 	node = VP_TO_TMPFS_NODE(vp);
1689 	tmp = VFS_TO_TMPFS(vp->v_mount);
1690 	KKASSERT(mp == tmp->tm_mount);
1691 
1692 	tmpfs_free_vp(vp);
1693 
1694 	/*
1695 	 * If the node referenced by this vnode was deleted by the
1696 	 * user, we must free its associated data structures now that
1697 	 * the vnode is being reclaimed.
1698 	 *
1699 	 * Directories have an extra link ref.
1700 	 */
1701 	TMPFS_NODE_LOCK(node);
1702 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1703 	    node->tn_links == 0) {
1704 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1705 		tmpfs_free_node(tmp, node);
1706 		/* eats the lock */
1707 	} else {
1708 		TMPFS_NODE_UNLOCK(node);
1709 	}
1710 	lwkt_reltoken(&mp->mnt_token);
1711 
1712 	KKASSERT(vp->v_data == NULL);
1713 	return 0;
1714 }
1715 
1716 /* --------------------------------------------------------------------- */
1717 
1718 static int
1719 tmpfs_mountctl(struct vop_mountctl_args *ap)
1720 {
1721 	struct tmpfs_mount *tmp;
1722 	struct mount *mp;
1723 	int rc;
1724 
1725 	mp = ap->a_head.a_ops->head.vv_mount;
1726 	lwkt_gettoken(&mp->mnt_token);
1727 
1728 	switch (ap->a_op) {
1729 	case (MOUNTCTL_SET_EXPORT):
1730 		tmp = (struct tmpfs_mount *) mp->mnt_data;
1731 
1732 		if (ap->a_ctllen != sizeof(struct export_args))
1733 			rc = (EINVAL);
1734 		else
1735 			rc = vfs_export(mp, &tmp->tm_export,
1736 					(const struct export_args *) ap->a_ctl);
1737 		break;
1738 	default:
1739 		rc = vop_stdmountctl(ap);
1740 		break;
1741 	}
1742 
1743 	lwkt_reltoken(&mp->mnt_token);
1744 	return (rc);
1745 }
1746 
1747 /* --------------------------------------------------------------------- */
1748 
1749 static int
1750 tmpfs_print(struct vop_print_args *ap)
1751 {
1752 	struct vnode *vp = ap->a_vp;
1753 
1754 	struct tmpfs_node *node;
1755 
1756 	node = VP_TO_TMPFS_NODE(vp);
1757 
1758 	kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1759 	    node, node->tn_flags, node->tn_links);
1760 	kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1761 	    node->tn_mode, node->tn_uid, node->tn_gid,
1762 	    (uintmax_t)node->tn_size, node->tn_status);
1763 
1764 	if (vp->v_type == VFIFO)
1765 		fifo_printinfo(vp);
1766 
1767 	kprintf("\n");
1768 
1769 	return 0;
1770 }
1771 
1772 /* --------------------------------------------------------------------- */
1773 
1774 static int
1775 tmpfs_pathconf(struct vop_pathconf_args *ap)
1776 {
1777 	struct vnode *vp = ap->a_vp;
1778 	int name = ap->a_name;
1779 	register_t *retval = ap->a_retval;
1780 	struct tmpfs_mount *tmp;
1781 	int error;
1782 
1783 	error = 0;
1784 
1785 	switch (name) {
1786 	case _PC_CHOWN_RESTRICTED:
1787 		*retval = 1;
1788 		break;
1789 
1790 	case _PC_FILESIZEBITS:
1791 		tmp = VFS_TO_TMPFS(vp->v_mount);
1792 		*retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1);
1793 		break;
1794 
1795 	case _PC_LINK_MAX:
1796 		*retval = LINK_MAX;
1797 		break;
1798 
1799 	case _PC_NAME_MAX:
1800 		*retval = NAME_MAX;
1801 		break;
1802 
1803 	case _PC_NO_TRUNC:
1804 		*retval = 1;
1805 		break;
1806 
1807 	case _PC_PATH_MAX:
1808 		*retval = PATH_MAX;
1809 		break;
1810 
1811 	case _PC_PIPE_BUF:
1812 		*retval = PIPE_BUF;
1813 		break;
1814 
1815 	case _PC_SYNC_IO:
1816 		*retval = 1;
1817 		break;
1818 
1819 	case _PC_2_SYMLINKS:
1820 		*retval = 1;
1821 		break;
1822 
1823 	default:
1824 		error = EINVAL;
1825 	}
1826 
1827 	return error;
1828 }
1829 
1830 /************************************************************************
1831  *                          KQFILTER OPS                                *
1832  ************************************************************************/
1833 
1834 static void filt_tmpfsdetach(struct knote *kn);
1835 static int filt_tmpfsread(struct knote *kn, long hint);
1836 static int filt_tmpfswrite(struct knote *kn, long hint);
1837 static int filt_tmpfsvnode(struct knote *kn, long hint);
1838 
1839 static struct filterops tmpfsread_filtops =
1840 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1841 	  NULL, filt_tmpfsdetach, filt_tmpfsread };
1842 static struct filterops tmpfswrite_filtops =
1843 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1844 	  NULL, filt_tmpfsdetach, filt_tmpfswrite };
1845 static struct filterops tmpfsvnode_filtops =
1846 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1847 	  NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1848 
1849 static int
1850 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1851 {
1852 	struct vnode *vp = ap->a_vp;
1853 	struct knote *kn = ap->a_kn;
1854 
1855 	switch (kn->kn_filter) {
1856 	case EVFILT_READ:
1857 		kn->kn_fop = &tmpfsread_filtops;
1858 		break;
1859 	case EVFILT_WRITE:
1860 		kn->kn_fop = &tmpfswrite_filtops;
1861 		break;
1862 	case EVFILT_VNODE:
1863 		kn->kn_fop = &tmpfsvnode_filtops;
1864 		break;
1865 	default:
1866 		return (EOPNOTSUPP);
1867 	}
1868 
1869 	kn->kn_hook = (caddr_t)vp;
1870 
1871 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1872 
1873 	return(0);
1874 }
1875 
1876 static void
1877 filt_tmpfsdetach(struct knote *kn)
1878 {
1879 	struct vnode *vp = (void *)kn->kn_hook;
1880 
1881 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1882 }
1883 
1884 static int
1885 filt_tmpfsread(struct knote *kn, long hint)
1886 {
1887 	struct vnode *vp = (void *)kn->kn_hook;
1888 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1889 	off_t off;
1890 
1891 	if (hint == NOTE_REVOKE) {
1892 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1893 		return(1);
1894 	}
1895 
1896 	/*
1897 	 * Interlock against MP races when performing this function.
1898 	 */
1899 	TMPFS_NODE_LOCK_SH(node);
1900 	off = node->tn_size - kn->kn_fp->f_offset;
1901 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1902 	if (kn->kn_sfflags & NOTE_OLDAPI) {
1903 		TMPFS_NODE_UNLOCK(node);
1904 		return(1);
1905 	}
1906 	if (kn->kn_data == 0) {
1907 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1908 	}
1909 	TMPFS_NODE_UNLOCK(node);
1910 	return (kn->kn_data != 0);
1911 }
1912 
1913 static int
1914 filt_tmpfswrite(struct knote *kn, long hint)
1915 {
1916 	if (hint == NOTE_REVOKE)
1917 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1918 	kn->kn_data = 0;
1919 	return (1);
1920 }
1921 
1922 static int
1923 filt_tmpfsvnode(struct knote *kn, long hint)
1924 {
1925 	if (kn->kn_sfflags & hint)
1926 		kn->kn_fflags |= hint;
1927 	if (hint == NOTE_REVOKE) {
1928 		kn->kn_flags |= (EV_EOF | EV_NODATA);
1929 		return (1);
1930 	}
1931 	return (kn->kn_fflags != 0);
1932 }
1933 
1934 /*
1935  * Helper to move VM pages between objects
1936  *
1937  * NOTE: The vm_page_rename() dirties the page, so we can clear the
1938  *	 PG_NEED_COMMIT flag.  If the pages are being moved into tn_aobj,
1939  *	 the pageout daemon will be able to page them out.
1940  */
1941 static int
1942 tmpfs_move_pages_callback(vm_page_t p, void *data)
1943 {
1944 	struct rb_vm_page_scan_info *info = data;
1945 	vm_pindex_t pindex;
1946 
1947 	pindex = p->pindex;
1948 	if (vm_page_busy_try(p, TRUE)) {
1949 		vm_page_sleep_busy(p, TRUE, "tpgmov");
1950 		info->error = -1;
1951 		return -1;
1952 	}
1953 	if (p->object != info->object || p->pindex != pindex) {
1954 		vm_page_wakeup(p);
1955 		info->error = -1;
1956 		return -1;
1957 	}
1958 	vm_page_rename(p, info->backing_object, pindex);
1959 	vm_page_clear_commit(p);
1960 	vm_page_wakeup(p);
1961 	/* page automaticaly made dirty */
1962 
1963 	return 0;
1964 }
1965 
1966 static
1967 void
1968 tmpfs_move_pages(vm_object_t src, vm_object_t dst)
1969 {
1970 	struct rb_vm_page_scan_info info;
1971 
1972 	vm_object_hold(src);
1973 	vm_object_hold(dst);
1974 	info.object = src;
1975 	info.backing_object = dst;
1976 	do {
1977 		info.error = 1;
1978 		vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL,
1979 					tmpfs_move_pages_callback, &info);
1980 	} while (info.error < 0);
1981 	vm_object_drop(dst);
1982 	vm_object_drop(src);
1983 }
1984 
1985 /* --------------------------------------------------------------------- */
1986 
1987 /*
1988  * vnode operations vector used for files stored in a tmpfs file system.
1989  */
1990 struct vop_ops tmpfs_vnode_vops = {
1991 	.vop_default =			vop_defaultop,
1992 	.vop_getpages = 		vop_stdgetpages,
1993 	.vop_putpages = 		vop_stdputpages,
1994 	.vop_ncreate =			tmpfs_ncreate,
1995 	.vop_nresolve =			tmpfs_nresolve,
1996 	.vop_nlookupdotdot =		tmpfs_nlookupdotdot,
1997 	.vop_nmknod =			tmpfs_nmknod,
1998 	.vop_open =			tmpfs_open,
1999 	.vop_close =			tmpfs_close,
2000 	.vop_access =			tmpfs_access,
2001 	.vop_getattr =			tmpfs_getattr,
2002 	.vop_setattr =			tmpfs_setattr,
2003 	.vop_read =			tmpfs_read,
2004 	.vop_write =			tmpfs_write,
2005 	.vop_fsync =			tmpfs_fsync,
2006 	.vop_mountctl =			tmpfs_mountctl,
2007 	.vop_nremove =			tmpfs_nremove,
2008 	.vop_nlink =			tmpfs_nlink,
2009 	.vop_nrename =			tmpfs_nrename,
2010 	.vop_nmkdir =			tmpfs_nmkdir,
2011 	.vop_nrmdir =			tmpfs_nrmdir,
2012 	.vop_nsymlink =			tmpfs_nsymlink,
2013 	.vop_readdir =			tmpfs_readdir,
2014 	.vop_readlink =			tmpfs_readlink,
2015 	.vop_inactive =			tmpfs_inactive,
2016 	.vop_reclaim =			tmpfs_reclaim,
2017 	.vop_print =			tmpfs_print,
2018 	.vop_pathconf =			tmpfs_pathconf,
2019 	.vop_bmap =			tmpfs_bmap,
2020 	.vop_strategy =			tmpfs_strategy,
2021 	.vop_advlock =			tmpfs_advlock,
2022 	.vop_kqfilter =			tmpfs_kqfilter
2023 };
2024