xref: /dflybsd-src/sys/vfs/tmpfs/tmpfs_vnops.c (revision 681e352e6cf7f30481ae49f284ab7dab4168ba5a)
1 /*-
2  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The NetBSD Foundation
6  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7  * 2005 program.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
46 #include <sys/stat.h>
47 #include <sys/systm.h>
48 #include <sys/unistd.h>
49 #include <sys/vfsops.h>
50 #include <sys/vnode.h>
51 #include <sys/mountctl.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/swap_pager.h>
58 
59 #include <sys/buf2.h>
60 
61 #include <vfs/fifofs/fifo.h>
62 #include <vfs/tmpfs/tmpfs_vnops.h>
63 #if 0
64 #include <vfs/tmpfs/tmpfs.h>
65 #endif
66 #include "tmpfs.h"
67 
68 static void tmpfs_strategy_done(struct bio *bio);
69 
70 static __inline
71 void
72 tmpfs_knote(struct vnode *vp, int flags)
73 {
74 	if (flags)
75 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
76 }
77 
78 
79 /* --------------------------------------------------------------------- */
80 
81 static int
82 tmpfs_nresolve(struct vop_nresolve_args *v)
83 {
84 	struct vnode *dvp = v->a_dvp;
85 	struct vnode *vp = NULL;
86 	struct namecache *ncp = v->a_nch->ncp;
87 	struct tmpfs_node *tnode;
88 	struct mount *mp;
89 
90 	int error;
91 	struct tmpfs_dirent *de;
92 	struct tmpfs_node *dnode;
93 
94 	mp = dvp->v_mount;
95 	lwkt_gettoken(&mp->mnt_token);
96 
97 	dnode = VP_TO_TMPFS_DIR(dvp);
98 
99 	de = tmpfs_dir_lookup(dnode, NULL, ncp);
100 	if (de == NULL) {
101 		error = ENOENT;
102 	} else {
103 		/*
104 		 * Allocate a vnode for the node we found.
105 		 */
106 		tnode = de->td_node;
107 		error = tmpfs_alloc_vp(dvp->v_mount, tnode,
108 				       LK_EXCLUSIVE | LK_RETRY, &vp);
109 		if (error)
110 			goto out;
111 		KKASSERT(vp);
112 	}
113 
114 out:
115 	/*
116 	 * Store the result of this lookup in the cache.  Avoid this if the
117 	 * request was for creation, as it does not improve timings on
118 	 * emprical tests.
119 	 */
120 	if (vp) {
121 		vn_unlock(vp);
122 		cache_setvp(v->a_nch, vp);
123 		vrele(vp);
124 	} else if (error == ENOENT) {
125 		cache_setvp(v->a_nch, NULL);
126 	}
127 
128 	lwkt_reltoken(&mp->mnt_token);
129 	return (error);
130 }
131 
132 static int
133 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *v)
134 {
135 	struct vnode *dvp = v->a_dvp;
136 	struct vnode **vpp = v->a_vpp;
137 	struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
138 	struct ucred *cred = v->a_cred;
139 	struct mount *mp;
140 	int error;
141 
142 	*vpp = NULL;
143 
144 	mp = dvp->v_mount;
145 	lwkt_gettoken(&mp->mnt_token);
146 
147 	/* Check accessibility of requested node as a first step. */
148 	error = VOP_ACCESS(dvp, VEXEC, cred);
149 	if (error != 0) {
150 		lwkt_reltoken(&mp->mnt_token);
151 		return error;
152 	}
153 
154 	if (dnode->tn_dir.tn_parent != NULL) {
155 		/* Allocate a new vnode on the matching entry. */
156 		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
157 				       LK_EXCLUSIVE | LK_RETRY, vpp);
158 
159 		if (*vpp)
160 			vn_unlock(*vpp);
161 	}
162 
163 	lwkt_reltoken(&mp->mnt_token);
164 
165 	return (*vpp == NULL) ? ENOENT : 0;
166 }
167 
168 /* --------------------------------------------------------------------- */
169 
170 static int
171 tmpfs_ncreate(struct vop_ncreate_args *v)
172 {
173 	struct vnode *dvp = v->a_dvp;
174 	struct vnode **vpp = v->a_vpp;
175 	struct namecache *ncp = v->a_nch->ncp;
176 	struct vattr *vap = v->a_vap;
177 	struct ucred *cred = v->a_cred;
178 	struct mount *mp;
179 	int error;
180 
181 	mp = dvp->v_mount;
182 	lwkt_gettoken(&mp->mnt_token);
183 
184 	KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
185 
186 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
187 	if (error == 0) {
188 		cache_setunresolved(v->a_nch);
189 		cache_setvp(v->a_nch, *vpp);
190 		tmpfs_knote(dvp, NOTE_WRITE);
191 	}
192 
193 	lwkt_reltoken(&mp->mnt_token);
194 
195 	return (error);
196 }
197 /* --------------------------------------------------------------------- */
198 
199 static int
200 tmpfs_nmknod(struct vop_nmknod_args *v)
201 {
202 	struct vnode *dvp = v->a_dvp;
203 	struct vnode **vpp = v->a_vpp;
204 	struct namecache *ncp = v->a_nch->ncp;
205 	struct vattr *vap = v->a_vap;
206 	struct ucred *cred = v->a_cred;
207 	struct mount *mp = dvp->v_mount;
208 	int error;
209 
210 	lwkt_gettoken(&mp->mnt_token);
211 
212 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
213 	    vap->va_type != VFIFO) {
214 		lwkt_reltoken(&mp->mnt_token);
215 		return (EINVAL);
216 	}
217 
218 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
219 	if (error == 0) {
220 		cache_setunresolved(v->a_nch);
221 		cache_setvp(v->a_nch, *vpp);
222 		tmpfs_knote(dvp, NOTE_WRITE);
223 	}
224 
225 	lwkt_reltoken(&mp->mnt_token);
226 
227 	return error;
228 }
229 
230 /* --------------------------------------------------------------------- */
231 
232 static int
233 tmpfs_open(struct vop_open_args *v)
234 {
235 	struct vnode *vp = v->a_vp;
236 	int mode = v->a_mode;
237 	struct mount *mp = vp->v_mount;
238 	struct tmpfs_node *node;
239 	int error;
240 
241 	lwkt_gettoken(&mp->mnt_token);
242 	node = VP_TO_TMPFS_NODE(vp);
243 
244 #if 0
245 	/* The file is still active but all its names have been removed
246 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
247 	 * it is about to die. */
248 	if (node->tn_links < 1)
249 		return (ENOENT);
250 #endif
251 
252 	/* If the file is marked append-only, deny write requests. */
253 	if ((node->tn_flags & APPEND) &&
254 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
255 		error = EPERM;
256 	} else {
257 		error = (vop_stdopen(v));
258 	}
259 
260 	lwkt_reltoken(&mp->mnt_token);
261 	return (error);
262 }
263 
264 /* --------------------------------------------------------------------- */
265 
266 static int
267 tmpfs_close(struct vop_close_args *v)
268 {
269 	struct vnode *vp = v->a_vp;
270 	struct tmpfs_node *node;
271 	int error;
272 
273 	lwkt_gettoken(&vp->v_mount->mnt_token);
274 	node = VP_TO_TMPFS_NODE(vp);
275 
276 	if (node->tn_links > 0) {
277 		/*
278 		 * Update node times.  No need to do it if the node has
279 		 * been deleted, because it will vanish after we return.
280 		 */
281 		tmpfs_update(vp);
282 	}
283 
284 	error = vop_stdclose(v);
285 
286 	lwkt_reltoken(&vp->v_mount->mnt_token);
287 
288 	return (error);
289 }
290 
291 /* --------------------------------------------------------------------- */
292 
293 int
294 tmpfs_access(struct vop_access_args *v)
295 {
296 	struct vnode *vp = v->a_vp;
297 	int error;
298 	struct tmpfs_node *node;
299 
300 	lwkt_gettoken(&vp->v_mount->mnt_token);
301 	node = VP_TO_TMPFS_NODE(vp);
302 
303 	switch (vp->v_type) {
304 	case VDIR:
305 		/* FALLTHROUGH */
306 	case VLNK:
307 		/* FALLTHROUGH */
308 	case VREG:
309 		if ((v->a_mode & VWRITE) &&
310 	            (vp->v_mount->mnt_flag & MNT_RDONLY)) {
311 			error = EROFS;
312 			goto out;
313 		}
314 		break;
315 
316 	case VBLK:
317 		/* FALLTHROUGH */
318 	case VCHR:
319 		/* FALLTHROUGH */
320 	case VSOCK:
321 		/* FALLTHROUGH */
322 	case VFIFO:
323 		break;
324 
325 	default:
326 		error = EINVAL;
327 		goto out;
328 	}
329 
330 	if ((v->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
331 		error = EPERM;
332 		goto out;
333 	}
334 
335 	error = vop_helper_access(v, node->tn_uid, node->tn_gid,
336 			          node->tn_mode, 0);
337 
338 out:
339 	lwkt_reltoken(&vp->v_mount->mnt_token);
340 	return error;
341 }
342 
343 /* --------------------------------------------------------------------- */
344 
345 int
346 tmpfs_getattr(struct vop_getattr_args *v)
347 {
348 	struct vnode *vp = v->a_vp;
349 	struct vattr *vap = v->a_vap;
350 	struct tmpfs_node *node;
351 
352 	lwkt_gettoken(&vp->v_mount->mnt_token);
353 	node = VP_TO_TMPFS_NODE(vp);
354 
355 	tmpfs_update(vp);
356 
357 	vap->va_type = vp->v_type;
358 	vap->va_mode = node->tn_mode;
359 	vap->va_nlink = node->tn_links;
360 	vap->va_uid = node->tn_uid;
361 	vap->va_gid = node->tn_gid;
362 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
363 	vap->va_fileid = node->tn_id;
364 	vap->va_size = node->tn_size;
365 	vap->va_blocksize = PAGE_SIZE;
366 	vap->va_atime.tv_sec = node->tn_atime;
367 	vap->va_atime.tv_nsec = node->tn_atimensec;
368 	vap->va_mtime.tv_sec = node->tn_mtime;
369 	vap->va_mtime.tv_nsec = node->tn_mtimensec;
370 	vap->va_ctime.tv_sec = node->tn_ctime;
371 	vap->va_ctime.tv_nsec = node->tn_ctimensec;
372 	vap->va_gen = node->tn_gen;
373 	vap->va_flags = node->tn_flags;
374 	if (vp->v_type == VBLK || vp->v_type == VCHR)
375 	{
376 		vap->va_rmajor = umajor(node->tn_rdev);
377 		vap->va_rminor = uminor(node->tn_rdev);
378 	}
379 	vap->va_bytes = round_page(node->tn_size);
380 	vap->va_filerev = 0;
381 
382 	lwkt_reltoken(&vp->v_mount->mnt_token);
383 
384 	return 0;
385 }
386 
387 /* --------------------------------------------------------------------- */
388 
389 int
390 tmpfs_setattr(struct vop_setattr_args *v)
391 {
392 	struct vnode *vp = v->a_vp;
393 	struct vattr *vap = v->a_vap;
394 	struct ucred *cred = v->a_cred;
395 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
396 	int error = 0;
397 	int kflags = 0;
398 
399 	lwkt_gettoken(&vp->v_mount->mnt_token);
400 	if (error == 0 && (vap->va_flags != VNOVAL)) {
401 		error = tmpfs_chflags(vp, vap->va_flags, cred);
402 		kflags |= NOTE_ATTRIB;
403 	}
404 
405 	if (error == 0 && (vap->va_size != VNOVAL)) {
406 		if (vap->va_size > node->tn_size)
407 			kflags |= NOTE_WRITE | NOTE_EXTEND;
408 		else
409 			kflags |= NOTE_WRITE;
410 		error = tmpfs_chsize(vp, vap->va_size, cred);
411 	}
412 
413 	if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
414 			   vap->va_gid != (gid_t)VNOVAL)) {
415 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
416 		kflags |= NOTE_ATTRIB;
417 	}
418 
419 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
420 		error = tmpfs_chmod(vp, vap->va_mode, cred);
421 		kflags |= NOTE_ATTRIB;
422 	}
423 
424 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
425 	    vap->va_atime.tv_nsec != VNOVAL) ||
426 	    (vap->va_mtime.tv_sec != VNOVAL &&
427 	    vap->va_mtime.tv_nsec != VNOVAL) )) {
428 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
429 				      vap->va_vaflags, cred);
430 		kflags |= NOTE_ATTRIB;
431 	}
432 
433 	/* Update the node times.  We give preference to the error codes
434 	 * generated by this function rather than the ones that may arise
435 	 * from tmpfs_update. */
436 	tmpfs_update(vp);
437 	tmpfs_knote(vp, kflags);
438 
439 	lwkt_reltoken(&vp->v_mount->mnt_token);
440 
441 	return (error);
442 }
443 
444 /* --------------------------------------------------------------------- */
445 
446 /*
447  * fsync is usually a NOP, but we must take action when unmounting or
448  * when recycling.
449  */
450 static int
451 tmpfs_fsync(struct vop_fsync_args *v)
452 {
453 	struct tmpfs_node *node;
454 	struct vnode *vp = v->a_vp;
455 
456 	lwkt_gettoken(&vp->v_mount->mnt_token);
457 	node = VP_TO_TMPFS_NODE(vp);
458 
459 	tmpfs_update(vp);
460 	if (vp->v_type == VREG) {
461 		if (vp->v_flag & VRECLAIMED) {
462 			if (node->tn_links == 0)
463 				tmpfs_truncate(vp, 0);
464 			else
465 				vfsync(v->a_vp, v->a_waitfor, 1, NULL, NULL);
466 		}
467 	}
468 
469 	lwkt_reltoken(&vp->v_mount->mnt_token);
470 	return 0;
471 }
472 
473 /* --------------------------------------------------------------------- */
474 
475 static int
476 tmpfs_read (struct vop_read_args *ap)
477 {
478 	struct buf *bp;
479 	struct vnode *vp = ap->a_vp;
480 	struct uio *uio = ap->a_uio;
481 	struct tmpfs_node *node;
482 	off_t base_offset;
483 	size_t offset;
484 	size_t len;
485 	size_t resid;
486 	int error;
487 
488 	/*
489 	 * Check the basics
490 	 */
491 	if (uio->uio_offset < 0)
492 		return (EINVAL);
493 	if (vp->v_type != VREG)
494 		return (EINVAL);
495 
496 	/*
497 	 * Extract node, try to shortcut the operation through
498 	 * the VM page cache, allowing us to avoid buffer cache
499 	 * overheads.
500 	 */
501 	node = VP_TO_TMPFS_NODE(vp);
502         resid = uio->uio_resid;
503         error = vop_helper_read_shortcut(ap);
504         if (error)
505                 return error;
506         if (uio->uio_resid == 0) {
507 		if (resid)
508 			goto finished;
509 		return error;
510 	}
511 
512 	/*
513 	 * Fall-through to our normal read code.
514 	 */
515 	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
516 		/*
517 		 * Use buffer cache I/O (via tmpfs_strategy)
518 		 */
519 		offset = (size_t)uio->uio_offset & BMASK;
520 		base_offset = (off_t)uio->uio_offset - offset;
521 		bp = getcacheblk(vp, base_offset, BSIZE, 0);
522 		if (bp == NULL) {
523 			lwkt_gettoken(&vp->v_mount->mnt_token);
524 			error = bread(vp, base_offset, BSIZE, &bp);
525 			if (error) {
526 				brelse(bp);
527 				lwkt_reltoken(&vp->v_mount->mnt_token);
528 				kprintf("tmpfs_read bread error %d\n", error);
529 				break;
530 			}
531 			lwkt_reltoken(&vp->v_mount->mnt_token);
532 		}
533 
534 		/*
535 		 * Figure out how many bytes we can actually copy this loop.
536 		 */
537 		len = BSIZE - offset;
538 		if (len > uio->uio_resid)
539 			len = uio->uio_resid;
540 		if (len > node->tn_size - uio->uio_offset)
541 			len = (size_t)(node->tn_size - uio->uio_offset);
542 
543 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
544 		bqrelse(bp);
545 		if (error) {
546 			kprintf("tmpfs_read uiomove error %d\n", error);
547 			break;
548 		}
549 	}
550 
551 finished:
552 	TMPFS_NODE_LOCK(node);
553 	node->tn_status |= TMPFS_NODE_ACCESSED;
554 	TMPFS_NODE_UNLOCK(node);
555 
556 	return (error);
557 }
558 
559 static int
560 tmpfs_write (struct vop_write_args *ap)
561 {
562 	struct buf *bp;
563 	struct vnode *vp = ap->a_vp;
564 	struct uio *uio = ap->a_uio;
565 	struct thread *td = uio->uio_td;
566 	struct tmpfs_node *node;
567 	boolean_t extended;
568 	off_t oldsize;
569 	int error;
570 	off_t base_offset;
571 	size_t offset;
572 	size_t len;
573 	struct rlimit limit;
574 	int trivial = 0;
575 	int kflags = 0;
576 
577 	error = 0;
578 	if (uio->uio_resid == 0) {
579 		return error;
580 	}
581 
582 	node = VP_TO_TMPFS_NODE(vp);
583 
584 	if (vp->v_type != VREG)
585 		return (EINVAL);
586 
587 	lwkt_gettoken(&vp->v_mount->mnt_token);
588 
589 	oldsize = node->tn_size;
590 	if (ap->a_ioflag & IO_APPEND)
591 		uio->uio_offset = node->tn_size;
592 
593 	/*
594 	 * Check for illegal write offsets.
595 	 */
596 	if (uio->uio_offset + uio->uio_resid >
597 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
598 		lwkt_reltoken(&vp->v_mount->mnt_token);
599 		return (EFBIG);
600 	}
601 
602 	/*
603 	 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
604 	 */
605 	if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
606 		error = kern_getrlimit(RLIMIT_FSIZE, &limit);
607 		if (error != 0) {
608 			lwkt_reltoken(&vp->v_mount->mnt_token);
609 			return error;
610 		}
611 		if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
612 			ksignal(td->td_proc, SIGXFSZ);
613 			lwkt_reltoken(&vp->v_mount->mnt_token);
614 			return (EFBIG);
615 		}
616 	}
617 
618 
619 	/*
620 	 * Extend the file's size if necessary
621 	 */
622 	extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
623 
624 	while (uio->uio_resid > 0) {
625 		/*
626 		 * Use buffer cache I/O (via tmpfs_strategy)
627 		 */
628 		offset = (size_t)uio->uio_offset & BMASK;
629 		base_offset = (off_t)uio->uio_offset - offset;
630 		len = BSIZE - offset;
631 		if (len > uio->uio_resid)
632 			len = uio->uio_resid;
633 
634 		if ((uio->uio_offset + len) > node->tn_size) {
635 			trivial = (uio->uio_offset <= node->tn_size);
636 			error = tmpfs_reg_resize(vp, uio->uio_offset + len,  trivial);
637 			if (error)
638 				break;
639 		}
640 
641 		/*
642 		 * Read to fill in any gaps.  Theoretically we could
643 		 * optimize this if the write covers the entire buffer
644 		 * and is not a UIO_NOCOPY write, however this can lead
645 		 * to a security violation exposing random kernel memory
646 		 * (whatever junk was in the backing VM pages before).
647 		 *
648 		 * So just use bread() to do the right thing.
649 		 */
650 		error = bread(vp, base_offset, BSIZE, &bp);
651 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
652 		if (error) {
653 			kprintf("tmpfs_write uiomove error %d\n", error);
654 			brelse(bp);
655 			break;
656 		}
657 
658 		if (uio->uio_offset > node->tn_size) {
659 			node->tn_size = uio->uio_offset;
660 			kflags |= NOTE_EXTEND;
661 		}
662 		kflags |= NOTE_WRITE;
663 
664 		/*
665 		 * Always try to flush the page in the UIO_NOCOPY case.  This
666 		 * can come from the pageout daemon or during vnode eviction.
667 		 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
668 		 *
669 		 * For the normal case we buwrite(), dirtying the underlying
670 		 * VM pages instead of dirtying the buffer and releasing the
671 		 * buffer as a clean buffer.  This allows tmpfs to use
672 		 * essentially all available memory to cache file data.
673 		 * If we used bdwrite() the buffer cache would wind up
674 		 * flushing the data to swap too quickly.
675 		 */
676 		bp->b_flags |= B_AGE;
677 		if (uio->uio_segflg == UIO_NOCOPY) {
678 			bawrite(bp);
679 		} else {
680 			buwrite(bp);
681 		}
682 
683 		if (bp->b_error) {
684 			kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
685 			break;
686 		}
687 	}
688 
689 	if (error) {
690 		if (extended) {
691 			(void)tmpfs_reg_resize(vp, oldsize, trivial);
692 			kflags &= ~NOTE_EXTEND;
693 		}
694 		goto done;
695 	}
696 
697 	/*
698 	 * Currently we don't set the mtime on files modified via mmap()
699 	 * because we can't tell the difference between those modifications
700 	 * and an attempt by the pageout daemon to flush tmpfs pages to
701 	 * swap.
702 	 *
703 	 * This is because in order to defer flushes as long as possible
704 	 * buwrite() works by marking the underlying VM pages dirty in
705 	 * order to be able to dispose of the buffer cache buffer without
706 	 * flushing it.
707 	 */
708 	TMPFS_NODE_LOCK(node);
709 	if (uio->uio_segflg != UIO_NOCOPY)
710 		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED;
711 	if (extended)
712 		node->tn_status |= TMPFS_NODE_CHANGED;
713 
714 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
715 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
716 			node->tn_mode &= ~(S_ISUID | S_ISGID);
717 	}
718 	TMPFS_NODE_UNLOCK(node);
719 done:
720 
721 	tmpfs_knote(vp, kflags);
722 
723 	lwkt_reltoken(&vp->v_mount->mnt_token);
724 	return(error);
725 }
726 
727 static int
728 tmpfs_advlock (struct vop_advlock_args *ap)
729 {
730 	struct tmpfs_node *node;
731 	struct vnode *vp = ap->a_vp;
732 	int error;
733 
734 	lwkt_gettoken(&vp->v_mount->mnt_token);
735 	node = VP_TO_TMPFS_NODE(vp);
736 
737 	error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
738 	lwkt_reltoken(&vp->v_mount->mnt_token);
739 
740 	return (error);
741 }
742 
743 /*
744  * The strategy function is typically only called when memory pressure
745  * forces the system to attempt to pageout pages.  It can also be called
746  * by [n]vtruncbuf() when a truncation cuts a page in half.  Normal write
747  * operations
748  */
749 static int
750 tmpfs_strategy(struct vop_strategy_args *ap)
751 {
752 	struct bio *bio = ap->a_bio;
753 	struct bio *nbio;
754 	struct buf *bp = bio->bio_buf;
755 	struct vnode *vp = ap->a_vp;
756 	struct tmpfs_node *node;
757 	vm_object_t uobj;
758 	vm_page_t m;
759 	int i;
760 
761 	if (vp->v_type != VREG) {
762 		bp->b_resid = bp->b_bcount;
763 		bp->b_flags |= B_ERROR | B_INVAL;
764 		bp->b_error = EINVAL;
765 		biodone(bio);
766 		return(0);
767 	}
768 
769 	lwkt_gettoken(&vp->v_mount->mnt_token);
770 	node = VP_TO_TMPFS_NODE(vp);
771 
772 	uobj = node->tn_reg.tn_aobj;
773 
774 	/*
775 	 * Don't bother flushing to swap if there is no swap, just
776 	 * ensure that the pages are marked as needing a commit (still).
777 	 */
778 	if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
779 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
780 			m = bp->b_xio.xio_pages[i];
781 			vm_page_need_commit(m);
782 		}
783 		bp->b_resid = 0;
784 		bp->b_error = 0;
785 		biodone(bio);
786 	} else {
787 		nbio = push_bio(bio);
788 		nbio->bio_done = tmpfs_strategy_done;
789 		nbio->bio_offset = bio->bio_offset;
790 		swap_pager_strategy(uobj, nbio);
791 	}
792 
793 	lwkt_reltoken(&vp->v_mount->mnt_token);
794 	return 0;
795 }
796 
797 /*
798  * If we were unable to commit the pages to swap make sure they are marked
799  * as needing a commit (again).  If we were, clear the flag to allow the
800  * pages to be freed.
801  */
802 static void
803 tmpfs_strategy_done(struct bio *bio)
804 {
805 	struct buf *bp;
806 	vm_page_t m;
807 	int i;
808 
809 	bp = bio->bio_buf;
810 
811 	if (bp->b_flags & B_ERROR) {
812 		bp->b_flags &= ~B_ERROR;
813 		bp->b_error = 0;
814 		bp->b_resid = 0;
815 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
816 			m = bp->b_xio.xio_pages[i];
817 			vm_page_need_commit(m);
818 		}
819 	} else {
820 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
821 			m = bp->b_xio.xio_pages[i];
822 			vm_page_clear_commit(m);
823 		}
824 	}
825 	bio = pop_bio(bio);
826 	biodone(bio);
827 }
828 
829 static int
830 tmpfs_bmap(struct vop_bmap_args *ap)
831 {
832 	if (ap->a_doffsetp != NULL)
833 		*ap->a_doffsetp = ap->a_loffset;
834 	if (ap->a_runp != NULL)
835 		*ap->a_runp = 0;
836 	if (ap->a_runb != NULL)
837 		*ap->a_runb = 0;
838 
839 	return 0;
840 }
841 
842 /* --------------------------------------------------------------------- */
843 
844 static int
845 tmpfs_nremove(struct vop_nremove_args *v)
846 {
847 	struct vnode *dvp = v->a_dvp;
848 	struct namecache *ncp = v->a_nch->ncp;
849 	struct vnode *vp;
850 	int error;
851 	struct tmpfs_dirent *de;
852 	struct tmpfs_mount *tmp;
853 	struct tmpfs_node *dnode;
854 	struct tmpfs_node *node;
855 	struct mount *mp;
856 
857 	mp = dvp->v_mount;
858 
859 	lwkt_gettoken(&mp->mnt_token);
860 
861 	/*
862 	 * We have to acquire the vp from v->a_nch because we will likely
863 	 * unresolve the namecache entry, and a vrele/vput is needed to
864 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
865 	 *
866 	 * We have to use vget to clear any inactive state on the vnode,
867 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
868 	 * will not get called when we release it.
869 	 */
870 	error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp);
871 	KKASSERT(vp->v_mount == dvp->v_mount);
872 	KKASSERT(error == 0);
873 	vn_unlock(vp);
874 
875 	if (vp->v_type == VDIR) {
876 		error = EISDIR;
877 		goto out;
878 	}
879 
880 	dnode = VP_TO_TMPFS_DIR(dvp);
881 	node = VP_TO_TMPFS_NODE(vp);
882 	tmp = VFS_TO_TMPFS(vp->v_mount);
883 	de = tmpfs_dir_lookup(dnode, node, ncp);
884 	if (de == NULL) {
885 		error = ENOENT;
886 		goto out;
887 	}
888 
889 	/* Files marked as immutable or append-only cannot be deleted. */
890 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
891 	    (dnode->tn_flags & APPEND)) {
892 		error = EPERM;
893 		goto out;
894 	}
895 
896 	/* Remove the entry from the directory; as it is a file, we do not
897 	 * have to change the number of hard links of the directory. */
898 	tmpfs_dir_detach(dnode, de);
899 
900 	/* Free the directory entry we just deleted.  Note that the node
901 	 * referred by it will not be removed until the vnode is really
902 	 * reclaimed. */
903 	tmpfs_free_dirent(tmp, de);
904 
905 	if (node->tn_links > 0) {
906 	        TMPFS_NODE_LOCK(node);
907 		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
908 	                TMPFS_NODE_MODIFIED;
909 	        TMPFS_NODE_UNLOCK(node);
910 	}
911 
912 	cache_unlink(v->a_nch);
913 	tmpfs_knote(vp, NOTE_DELETE);
914 	tmpfs_knote(dvp, NOTE_WRITE);
915 	error = 0;
916 
917 out:
918 	vrele(vp);
919 	lwkt_reltoken(&mp->mnt_token);
920 
921 	return error;
922 }
923 
924 /* --------------------------------------------------------------------- */
925 
926 static int
927 tmpfs_nlink(struct vop_nlink_args *v)
928 {
929 	struct vnode *dvp = v->a_dvp;
930 	struct vnode *vp = v->a_vp;
931 	struct namecache *ncp = v->a_nch->ncp;
932 	struct tmpfs_dirent *de;
933 	struct tmpfs_node *node;
934 	struct tmpfs_node *dnode;
935 	struct mount *mp;
936 	int error;
937 
938 	if (dvp->v_mount != vp->v_mount)
939 		return(EXDEV);
940 	mp = dvp->v_mount;
941 
942 	lwkt_gettoken(&mp->mnt_token);
943 	KKASSERT(dvp != vp); /* XXX When can this be false? */
944 
945 	node = VP_TO_TMPFS_NODE(vp);
946 	dnode = VP_TO_TMPFS_NODE(dvp);
947 
948 	/* XXX: Why aren't the following two tests done by the caller? */
949 
950 	/* Hard links of directories are forbidden. */
951 	if (vp->v_type == VDIR) {
952 		error = EPERM;
953 		goto out;
954 	}
955 
956 	/* Cannot create cross-device links. */
957 	if (dvp->v_mount != vp->v_mount) {
958 		error = EXDEV;
959 		goto out;
960 	}
961 
962 	/* Ensure that we do not overflow the maximum number of links imposed
963 	 * by the system. */
964 	KKASSERT(node->tn_links <= LINK_MAX);
965 	if (node->tn_links == LINK_MAX) {
966 		error = EMLINK;
967 		goto out;
968 	}
969 
970 	/* We cannot create links of files marked immutable or append-only. */
971 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
972 		error = EPERM;
973 		goto out;
974 	}
975 
976 	/* Allocate a new directory entry to represent the node. */
977 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
978 				   ncp->nc_name, ncp->nc_nlen, &de);
979 	if (error != 0)
980 		goto out;
981 
982 	/* Insert the new directory entry into the appropriate directory. */
983 	tmpfs_dir_attach(dnode, de);
984 
985 	/* vp link count has changed, so update node times. */
986 
987 	TMPFS_NODE_LOCK(node);
988 	node->tn_status |= TMPFS_NODE_CHANGED;
989 	TMPFS_NODE_UNLOCK(node);
990 	tmpfs_update(vp);
991 
992 	tmpfs_knote(vp, NOTE_LINK);
993 	cache_setunresolved(v->a_nch);
994 	cache_setvp(v->a_nch, vp);
995 	tmpfs_knote(dvp, NOTE_WRITE);
996 	error = 0;
997 
998 out:
999 	lwkt_reltoken(&mp->mnt_token);
1000 	return error;
1001 }
1002 
1003 /* --------------------------------------------------------------------- */
1004 
1005 static int
1006 tmpfs_nrename(struct vop_nrename_args *v)
1007 {
1008 	struct vnode *fdvp = v->a_fdvp;
1009 	struct namecache *fncp = v->a_fnch->ncp;
1010 	struct vnode *fvp = fncp->nc_vp;
1011 	struct vnode *tdvp = v->a_tdvp;
1012 	struct namecache *tncp = v->a_tnch->ncp;
1013 	struct vnode *tvp;
1014 	struct tmpfs_dirent *de, *tde;
1015 	struct tmpfs_mount *tmp;
1016 	struct tmpfs_node *fdnode;
1017 	struct tmpfs_node *fnode;
1018 	struct tmpfs_node *tnode;
1019 	struct tmpfs_node *tdnode;
1020 	struct mount *mp;
1021 	char *newname;
1022 	char *oldname;
1023 	int error;
1024 
1025 	mp = fdvp->v_mount;
1026 	KKASSERT(fdvp->v_mount == fvp->v_mount);
1027 
1028 	lwkt_gettoken(&mp->mnt_token);
1029 	/*
1030 	 * Because tvp can get overwritten we have to vget it instead of
1031 	 * just vref or use it, otherwise it's VINACTIVE flag may not get
1032 	 * cleared and the node won't get destroyed.
1033 	 */
1034 	error = cache_vget(v->a_tnch, v->a_cred, LK_SHARED, &tvp);
1035 	if (error == 0) {
1036 		tnode = VP_TO_TMPFS_NODE(tvp);
1037 		vn_unlock(tvp);
1038 	} else {
1039 		tnode = NULL;
1040 	}
1041 
1042 	/* Disallow cross-device renames.
1043 	 * XXX Why isn't this done by the caller? */
1044 	if (fvp->v_mount != tdvp->v_mount ||
1045 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1046 		error = EXDEV;
1047 		goto out;
1048 	}
1049 
1050 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1051 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1052 
1053 	/* If source and target are the same file, there is nothing to do. */
1054 	if (fvp == tvp) {
1055 		error = 0;
1056 		goto out;
1057 	}
1058 
1059 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1060 	fnode = VP_TO_TMPFS_NODE(fvp);
1061 	de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1062 
1063 	/* Avoid manipulating '.' and '..' entries. */
1064 	if (de == NULL) {
1065 		error = ENOENT;
1066 		goto out_locked;
1067 	}
1068 	KKASSERT(de->td_node == fnode);
1069 
1070 	/*
1071 	 * If replacing an entry in the target directory and that entry
1072 	 * is a directory, it must be empty.
1073 	 *
1074 	 * Kern_rename gurantees the destination to be a directory
1075 	 * if the source is one (it does?).
1076 	 */
1077 	if (tvp != NULL) {
1078 		KKASSERT(tnode != NULL);
1079 
1080 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1081 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1082 			error = EPERM;
1083 			goto out_locked;
1084 		}
1085 
1086 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1087 			if (tnode->tn_size > 0) {
1088 				error = ENOTEMPTY;
1089 				goto out_locked;
1090 			}
1091 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1092 			error = ENOTDIR;
1093 			goto out_locked;
1094 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1095 			error = EISDIR;
1096 			goto out_locked;
1097 		} else {
1098 			KKASSERT(fnode->tn_type != VDIR &&
1099 				tnode->tn_type != VDIR);
1100 		}
1101 	}
1102 
1103 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1104 	    (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1105 		error = EPERM;
1106 		goto out_locked;
1107 	}
1108 
1109 	/*
1110 	 * Ensure that we have enough memory to hold the new name, if it
1111 	 * has to be changed.
1112 	 */
1113 	if (fncp->nc_nlen != tncp->nc_nlen ||
1114 	    bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1115 		newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1116 				  M_WAITOK | M_NULLOK);
1117 		if (newname == NULL) {
1118 			error = ENOSPC;
1119 			goto out_locked;
1120 		}
1121 		bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1122 		newname[tncp->nc_nlen] = '\0';
1123 	} else {
1124 		newname = NULL;
1125 	}
1126 
1127 	/*
1128 	 * Unlink entry from source directory.  Note that the kernel has
1129 	 * already checked for illegal recursion cases (renaming a directory
1130 	 * into a subdirectory of itself).
1131 	 */
1132 	if (fdnode != tdnode) {
1133 		tmpfs_dir_detach(fdnode, de);
1134 	} else {
1135 		RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1136 	}
1137 
1138 	/*
1139 	 * Handle any name change.  Swap with newname, we will
1140 	 * deallocate it at the end.
1141 	 */
1142 	if (newname != NULL) {
1143 #if 0
1144 		TMPFS_NODE_LOCK(fnode);
1145 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1146 		TMPFS_NODE_UNLOCK(fnode);
1147 #endif
1148 		oldname = de->td_name;
1149 		de->td_name = newname;
1150 		de->td_namelen = (uint16_t)tncp->nc_nlen;
1151 		newname = oldname;
1152 	}
1153 
1154 	/*
1155 	 * If we are overwriting an entry, we have to remove the old one
1156 	 * from the target directory.
1157 	 */
1158 	if (tvp != NULL) {
1159 		/* Remove the old entry from the target directory. */
1160 		tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1161 		tmpfs_dir_detach(tdnode, tde);
1162 		tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1163 
1164 		/*
1165 		 * Free the directory entry we just deleted.  Note that the
1166 		 * node referred by it will not be removed until the vnode is
1167 		 * really reclaimed.
1168 		 */
1169 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1170 		/*cache_inval_vp(tvp, CINV_DESTROY);*/
1171 	}
1172 
1173 	/*
1174 	 * Link entry to target directory.  If the entry
1175 	 * represents a directory move the parent linkage
1176 	 * as well.
1177 	 */
1178 	if (fdnode != tdnode) {
1179 		if (de->td_node->tn_type == VDIR) {
1180 			TMPFS_VALIDATE_DIR(fnode);
1181 		}
1182 		tmpfs_dir_attach(tdnode, de);
1183 	} else {
1184 		TMPFS_NODE_LOCK(tdnode);
1185 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1186 		RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1187 		TMPFS_NODE_UNLOCK(tdnode);
1188 	}
1189 
1190 	/*
1191 	 * Finish up
1192 	 */
1193 	if (newname) {
1194 		kfree(newname, tmp->tm_name_zone);
1195 		newname = NULL;
1196 	}
1197 	cache_rename(v->a_fnch, v->a_tnch);
1198 	tmpfs_knote(v->a_fdvp, NOTE_WRITE);
1199 	tmpfs_knote(v->a_tdvp, NOTE_WRITE);
1200 	if (fnode->tn_vnode)
1201 		tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1202 	error = 0;
1203 
1204 out_locked:
1205 	;
1206 
1207 out:
1208 	if (tvp)
1209 		vrele(tvp);
1210 
1211 	lwkt_reltoken(&mp->mnt_token);
1212 
1213 	return error;
1214 }
1215 
1216 /* --------------------------------------------------------------------- */
1217 
1218 static int
1219 tmpfs_nmkdir(struct vop_nmkdir_args *v)
1220 {
1221 	struct vnode *dvp = v->a_dvp;
1222 	struct vnode **vpp = v->a_vpp;
1223 	struct namecache *ncp = v->a_nch->ncp;
1224 	struct vattr *vap = v->a_vap;
1225 	struct ucred *cred = v->a_cred;
1226 	struct mount *mp;
1227 	int error;
1228 
1229 	mp = dvp->v_mount;
1230 
1231 	lwkt_gettoken(&mp->mnt_token);
1232 	KKASSERT(vap->va_type == VDIR);
1233 
1234 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1235 	if (error == 0) {
1236 		cache_setunresolved(v->a_nch);
1237 		cache_setvp(v->a_nch, *vpp);
1238 		tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1239 	}
1240 
1241 	lwkt_reltoken(&mp->mnt_token);
1242 
1243 	return error;
1244 }
1245 
1246 /* --------------------------------------------------------------------- */
1247 
1248 static int
1249 tmpfs_nrmdir(struct vop_nrmdir_args *v)
1250 {
1251 	struct vnode *dvp = v->a_dvp;
1252 	struct namecache *ncp = v->a_nch->ncp;
1253 	struct vnode *vp;
1254 	struct tmpfs_dirent *de;
1255 	struct tmpfs_mount *tmp;
1256 	struct tmpfs_node *dnode;
1257 	struct tmpfs_node *node;
1258 	struct mount *mp;
1259 	int error;
1260 
1261 	mp = dvp->v_mount;
1262 	lwkt_gettoken(&mp->mnt_token);
1263 
1264 	/*
1265 	 * We have to acquire the vp from v->a_nch because we will likely
1266 	 * unresolve the namecache entry, and a vrele/vput is needed to
1267 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1268 	 *
1269 	 * We have to use vget to clear any inactive state on the vnode,
1270 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1271 	 * will not get called when we release it.
1272 	 */
1273 	error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp);
1274 	KKASSERT(error == 0);
1275 	vn_unlock(vp);
1276 
1277 	/*
1278 	 * Prevalidate so we don't hit an assertion later
1279 	 */
1280 	if (vp->v_type != VDIR) {
1281 		error = ENOTDIR;
1282 		goto out;
1283 	}
1284 
1285 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1286 	dnode = VP_TO_TMPFS_DIR(dvp);
1287 	node = VP_TO_TMPFS_DIR(vp);
1288 
1289 	/* Directories with more than two entries ('.' and '..') cannot be
1290 	 * removed. */
1291 	 if (node->tn_size > 0) {
1292 		 error = ENOTEMPTY;
1293 		 goto out;
1294 	 }
1295 
1296 	if ((dnode->tn_flags & APPEND)
1297 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1298 		error = EPERM;
1299 		goto out;
1300 	}
1301 
1302 	/* This invariant holds only if we are not trying to remove "..".
1303 	  * We checked for that above so this is safe now. */
1304 	KKASSERT(node->tn_dir.tn_parent == dnode);
1305 
1306 	/* Get the directory entry associated with node (vp).  This was
1307 	 * filled by tmpfs_lookup while looking up the entry. */
1308 	de = tmpfs_dir_lookup(dnode, node, ncp);
1309 	KKASSERT(TMPFS_DIRENT_MATCHES(de,
1310 	    ncp->nc_name,
1311 	    ncp->nc_nlen));
1312 
1313 	/* Check flags to see if we are allowed to remove the directory. */
1314 	if ((dnode->tn_flags & APPEND) ||
1315 	    node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1316 		error = EPERM;
1317 		goto out;
1318 	}
1319 
1320 
1321 	/* Detach the directory entry from the directory (dnode). */
1322 	tmpfs_dir_detach(dnode, de);
1323 
1324 	/* No vnode should be allocated for this entry from this point */
1325 	TMPFS_NODE_LOCK(node);
1326 	TMPFS_ASSERT_ELOCKED(node);
1327 	TMPFS_NODE_LOCK(dnode);
1328 	TMPFS_ASSERT_ELOCKED(dnode);
1329 
1330 	/*
1331 	 * Must set parent linkage to NULL (tested by ncreate to disallow
1332 	 * the creation of new files/dirs in a deleted directory)
1333 	 */
1334 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1335 	    TMPFS_NODE_MODIFIED;
1336 
1337 	dnode->tn_status |= TMPFS_NODE_ACCESSED | \
1338 	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1339 
1340 	TMPFS_NODE_UNLOCK(dnode);
1341 	TMPFS_NODE_UNLOCK(node);
1342 
1343 	/* Free the directory entry we just deleted.  Note that the node
1344 	 * referred by it will not be removed until the vnode is really
1345 	 * reclaimed. */
1346 	tmpfs_free_dirent(tmp, de);
1347 
1348 	/* Release the deleted vnode (will destroy the node, notify
1349 	 * interested parties and clean it from the cache). */
1350 
1351 	TMPFS_NODE_LOCK(dnode);
1352 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1353 	TMPFS_NODE_UNLOCK(dnode);
1354 	tmpfs_update(dvp);
1355 
1356 	cache_unlink(v->a_nch);
1357 	tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1358 	error = 0;
1359 
1360 out:
1361 	vrele(vp);
1362 
1363 	lwkt_reltoken(&mp->mnt_token);
1364 
1365 	return error;
1366 }
1367 
1368 /* --------------------------------------------------------------------- */
1369 
1370 static int
1371 tmpfs_nsymlink(struct vop_nsymlink_args *v)
1372 {
1373 	struct vnode *dvp = v->a_dvp;
1374 	struct vnode **vpp = v->a_vpp;
1375 	struct namecache *ncp = v->a_nch->ncp;
1376 	struct vattr *vap = v->a_vap;
1377 	struct ucred *cred = v->a_cred;
1378 	char *target = v->a_target;
1379 	struct mount *mp = dvp->v_mount;
1380 	int error;
1381 
1382 	lwkt_gettoken(&mp->mnt_token);
1383 	vap->va_type = VLNK;
1384 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1385 	if (error == 0) {
1386 		tmpfs_knote(*vpp, NOTE_WRITE);
1387 		cache_setunresolved(v->a_nch);
1388 		cache_setvp(v->a_nch, *vpp);
1389 	}
1390 
1391 	lwkt_reltoken(&mp->mnt_token);
1392 
1393 	return error;
1394 }
1395 
1396 /* --------------------------------------------------------------------- */
1397 
1398 static int
1399 tmpfs_readdir(struct vop_readdir_args *v)
1400 {
1401 	struct vnode *vp = v->a_vp;
1402 	struct uio *uio = v->a_uio;
1403 	int *eofflag = v->a_eofflag;
1404 	off_t **cookies = v->a_cookies;
1405 	int *ncookies = v->a_ncookies;
1406 	struct tmpfs_mount *tmp;
1407 	int error;
1408 	off_t startoff;
1409 	off_t cnt = 0;
1410 	struct tmpfs_node *node;
1411 	struct mount *mp = vp->v_mount;
1412 
1413 	lwkt_gettoken(&mp->mnt_token);
1414 
1415 	/* This operation only makes sense on directory nodes. */
1416 	if (vp->v_type != VDIR) {
1417 		lwkt_reltoken(&mp->mnt_token);
1418 		return ENOTDIR;
1419 	}
1420 
1421 	tmp = VFS_TO_TMPFS(vp->v_mount);
1422 	node = VP_TO_TMPFS_DIR(vp);
1423 	startoff = uio->uio_offset;
1424 
1425 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1426 		error = tmpfs_dir_getdotdent(node, uio);
1427 		if (error != 0)
1428 			goto outok;
1429 		cnt++;
1430 	}
1431 
1432 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1433 		error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1434 		if (error != 0)
1435 			goto outok;
1436 		cnt++;
1437 	}
1438 
1439 	error = tmpfs_dir_getdents(node, uio, &cnt);
1440 
1441 outok:
1442 	KKASSERT(error >= -1);
1443 
1444 	if (error == -1)
1445 		error = 0;
1446 
1447 	if (eofflag != NULL)
1448 		*eofflag =
1449 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1450 
1451 	/* Update NFS-related variables. */
1452 	if (error == 0 && cookies != NULL && ncookies != NULL) {
1453 		off_t i;
1454 		off_t off = startoff;
1455 		struct tmpfs_dirent *de = NULL;
1456 
1457 		*ncookies = cnt;
1458 		*cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1459 
1460 		for (i = 0; i < cnt; i++) {
1461 			KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1462 			if (off == TMPFS_DIRCOOKIE_DOT) {
1463 				off = TMPFS_DIRCOOKIE_DOTDOT;
1464 			} else {
1465 				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1466 					de = RB_MIN(tmpfs_dirtree, &node->tn_dir.tn_dirtree);
1467 				} else if (de != NULL) {
1468 					de = RB_NEXT(tmpfs_dirtree, &node->tn_dir.tn_dirtree, de);
1469 				} else {
1470 					de = tmpfs_dir_lookupbycookie(node,
1471 					    off);
1472 					KKASSERT(de != NULL);
1473 					de = RB_NEXT(tmpfs_dirtree, &node->tn_dir.tn_dirtree, de);
1474 				}
1475 				if (de == NULL)
1476 					off = TMPFS_DIRCOOKIE_EOF;
1477 				else
1478 					off = tmpfs_dircookie(de);
1479 			}
1480 
1481 			(*cookies)[i] = off;
1482 		}
1483 		KKASSERT(uio->uio_offset == off);
1484 	}
1485 
1486 	lwkt_reltoken(&mp->mnt_token);
1487 
1488 	return error;
1489 }
1490 
1491 /* --------------------------------------------------------------------- */
1492 
1493 static int
1494 tmpfs_readlink(struct vop_readlink_args *v)
1495 {
1496 	struct vnode *vp = v->a_vp;
1497 	struct uio *uio = v->a_uio;
1498 	struct mount *mp = vp->v_mount;
1499 	int error;
1500 	struct tmpfs_node *node;
1501 
1502 	lwkt_gettoken(&mp->mnt_token);
1503 
1504 	KKASSERT(uio->uio_offset == 0);
1505 	KKASSERT(vp->v_type == VLNK);
1506 
1507 	node = VP_TO_TMPFS_NODE(vp);
1508 
1509 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1510 	    uio);
1511 	TMPFS_NODE_LOCK(node);
1512 	node->tn_status |= TMPFS_NODE_ACCESSED;
1513 	TMPFS_NODE_UNLOCK(node);
1514 
1515 	lwkt_reltoken(&mp->mnt_token);
1516 
1517 	return error;
1518 }
1519 
1520 /* --------------------------------------------------------------------- */
1521 
1522 static int
1523 tmpfs_inactive(struct vop_inactive_args *v)
1524 {
1525 	struct vnode *vp = v->a_vp;
1526 	struct tmpfs_node *node;
1527 	struct mount *mp;
1528 
1529 	mp = vp->v_mount;
1530 	lwkt_gettoken(&mp->mnt_token);
1531 	node = VP_TO_TMPFS_NODE(vp);
1532 
1533 	/*
1534 	 * Degenerate case
1535 	 */
1536 	if (node == NULL) {
1537 		vrecycle(vp);
1538 		lwkt_reltoken(&mp->mnt_token);
1539 		return(0);
1540 	}
1541 
1542 	/*
1543 	 * Get rid of unreferenced deleted vnodes sooner rather than
1544 	 * later so the data memory can be recovered immediately.
1545 	 *
1546 	 * We must truncate the vnode to prevent the normal reclamation
1547 	 * path from flushing the data for the removed file to disk.
1548 	 */
1549 	TMPFS_NODE_LOCK(node);
1550 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1551 	    node->tn_links == 0)
1552 	{
1553 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1554 		TMPFS_NODE_UNLOCK(node);
1555 		if (node->tn_type == VREG)
1556 			tmpfs_truncate(vp, 0);
1557 		vrecycle(vp);
1558 	} else {
1559 		TMPFS_NODE_UNLOCK(node);
1560 	}
1561 	lwkt_reltoken(&mp->mnt_token);
1562 
1563 	return 0;
1564 }
1565 
1566 /* --------------------------------------------------------------------- */
1567 
1568 int
1569 tmpfs_reclaim(struct vop_reclaim_args *v)
1570 {
1571 	struct vnode *vp = v->a_vp;
1572 	struct tmpfs_mount *tmp;
1573 	struct tmpfs_node *node;
1574 	struct mount *mp;
1575 
1576 	mp = vp->v_mount;
1577 	lwkt_gettoken(&mp->mnt_token);
1578 
1579 	node = VP_TO_TMPFS_NODE(vp);
1580 	tmp = VFS_TO_TMPFS(vp->v_mount);
1581 	KKASSERT(mp == tmp->tm_mount);
1582 
1583 	tmpfs_free_vp(vp);
1584 
1585 	/*
1586 	 * If the node referenced by this vnode was deleted by the
1587 	 * user, we must free its associated data structures now that
1588 	 * the vnode is being reclaimed.
1589 	 *
1590 	 * Directories have an extra link ref.
1591 	 */
1592 	TMPFS_NODE_LOCK(node);
1593 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1594 	    node->tn_links == 0) {
1595 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1596 		tmpfs_free_node(tmp, node);
1597 		/* eats the lock */
1598 	} else {
1599 		TMPFS_NODE_UNLOCK(node);
1600 	}
1601 	lwkt_reltoken(&mp->mnt_token);
1602 
1603 	KKASSERT(vp->v_data == NULL);
1604 	return 0;
1605 }
1606 
1607 /* --------------------------------------------------------------------- */
1608 
1609 static int
1610 tmpfs_mountctl(struct vop_mountctl_args *ap)
1611 {
1612 	struct tmpfs_mount *tmp;
1613 	struct mount *mp;
1614 	int rc;
1615 
1616 	mp = ap->a_head.a_ops->head.vv_mount;
1617 	lwkt_gettoken(&mp->mnt_token);
1618 
1619 	switch (ap->a_op) {
1620 	case (MOUNTCTL_SET_EXPORT):
1621 		tmp = (struct tmpfs_mount *) mp->mnt_data;
1622 
1623 		if (ap->a_ctllen != sizeof(struct export_args))
1624 			rc = (EINVAL);
1625 		else
1626 			rc = vfs_export(mp, &tmp->tm_export,
1627 					(const struct export_args *) ap->a_ctl);
1628 		break;
1629 	default:
1630 		rc = vop_stdmountctl(ap);
1631 		break;
1632 	}
1633 
1634 	lwkt_reltoken(&mp->mnt_token);
1635 	return (rc);
1636 }
1637 
1638 /* --------------------------------------------------------------------- */
1639 
1640 static int
1641 tmpfs_print(struct vop_print_args *v)
1642 {
1643 	struct vnode *vp = v->a_vp;
1644 
1645 	struct tmpfs_node *node;
1646 
1647 	node = VP_TO_TMPFS_NODE(vp);
1648 
1649 	kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1650 	    node, node->tn_flags, node->tn_links);
1651 	kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1652 	    node->tn_mode, node->tn_uid, node->tn_gid,
1653 	    (uintmax_t)node->tn_size, node->tn_status);
1654 
1655 	if (vp->v_type == VFIFO)
1656 		fifo_printinfo(vp);
1657 
1658 	kprintf("\n");
1659 
1660 	return 0;
1661 }
1662 
1663 /* --------------------------------------------------------------------- */
1664 
1665 static int
1666 tmpfs_pathconf(struct vop_pathconf_args *v)
1667 {
1668 	int name = v->a_name;
1669 	register_t *retval = v->a_retval;
1670 
1671 	int error;
1672 
1673 	error = 0;
1674 
1675 	switch (name) {
1676 	case _PC_LINK_MAX:
1677 		*retval = LINK_MAX;
1678 		break;
1679 
1680 	case _PC_NAME_MAX:
1681 		*retval = NAME_MAX;
1682 		break;
1683 
1684 	case _PC_PATH_MAX:
1685 		*retval = PATH_MAX;
1686 		break;
1687 
1688 	case _PC_PIPE_BUF:
1689 		*retval = PIPE_BUF;
1690 		break;
1691 
1692 	case _PC_CHOWN_RESTRICTED:
1693 		*retval = 1;
1694 		break;
1695 
1696 	case _PC_NO_TRUNC:
1697 		*retval = 1;
1698 		break;
1699 
1700 	case _PC_SYNC_IO:
1701 		*retval = 1;
1702 		break;
1703 
1704 	case _PC_FILESIZEBITS:
1705 		*retval = 0; /* XXX Don't know which value should I return. */
1706 		break;
1707 
1708 	default:
1709 		error = EINVAL;
1710 	}
1711 
1712 	return error;
1713 }
1714 
1715 /************************************************************************
1716  *                          KQFILTER OPS                                *
1717  ************************************************************************/
1718 
1719 static void filt_tmpfsdetach(struct knote *kn);
1720 static int filt_tmpfsread(struct knote *kn, long hint);
1721 static int filt_tmpfswrite(struct knote *kn, long hint);
1722 static int filt_tmpfsvnode(struct knote *kn, long hint);
1723 
1724 static struct filterops tmpfsread_filtops =
1725 	{ FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsread };
1726 static struct filterops tmpfswrite_filtops =
1727 	{ FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfswrite };
1728 static struct filterops tmpfsvnode_filtops =
1729 	{ FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1730 
1731 static int
1732 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1733 {
1734 	struct vnode *vp = ap->a_vp;
1735 	struct knote *kn = ap->a_kn;
1736 
1737 	switch (kn->kn_filter) {
1738 	case EVFILT_READ:
1739 		kn->kn_fop = &tmpfsread_filtops;
1740 		break;
1741 	case EVFILT_WRITE:
1742 		kn->kn_fop = &tmpfswrite_filtops;
1743 		break;
1744 	case EVFILT_VNODE:
1745 		kn->kn_fop = &tmpfsvnode_filtops;
1746 		break;
1747 	default:
1748 		return (EOPNOTSUPP);
1749 	}
1750 
1751 	kn->kn_hook = (caddr_t)vp;
1752 
1753 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1754 
1755 	return(0);
1756 }
1757 
1758 static void
1759 filt_tmpfsdetach(struct knote *kn)
1760 {
1761 	struct vnode *vp = (void *)kn->kn_hook;
1762 
1763 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1764 }
1765 
1766 static int
1767 filt_tmpfsread(struct knote *kn, long hint)
1768 {
1769 	struct vnode *vp = (void *)kn->kn_hook;
1770 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1771 	off_t off;
1772 
1773 	if (hint == NOTE_REVOKE) {
1774 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1775 		return(1);
1776 	}
1777 
1778 	/*
1779 	 * Interlock against MP races when performing this function.
1780 	 */
1781 	lwkt_gettoken(&vp->v_mount->mnt_token);
1782 	off = node->tn_size - kn->kn_fp->f_offset;
1783 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1784 	if (kn->kn_sfflags & NOTE_OLDAPI) {
1785 		lwkt_reltoken(&vp->v_mount->mnt_token);
1786 		return(1);
1787 	}
1788 
1789 	if (kn->kn_data == 0) {
1790 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1791 	}
1792 	lwkt_reltoken(&vp->v_mount->mnt_token);
1793 	return (kn->kn_data != 0);
1794 }
1795 
1796 static int
1797 filt_tmpfswrite(struct knote *kn, long hint)
1798 {
1799 	if (hint == NOTE_REVOKE)
1800 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1801 	kn->kn_data = 0;
1802 	return (1);
1803 }
1804 
1805 static int
1806 filt_tmpfsvnode(struct knote *kn, long hint)
1807 {
1808 	if (kn->kn_sfflags & hint)
1809 		kn->kn_fflags |= hint;
1810 	if (hint == NOTE_REVOKE) {
1811 		kn->kn_flags |= (EV_EOF | EV_NODATA);
1812 		return (1);
1813 	}
1814 	return (kn->kn_fflags != 0);
1815 }
1816 
1817 
1818 /* --------------------------------------------------------------------- */
1819 
1820 /*
1821  * vnode operations vector used for files stored in a tmpfs file system.
1822  */
1823 struct vop_ops tmpfs_vnode_vops = {
1824 	.vop_default =			vop_defaultop,
1825 	.vop_getpages = 		vop_stdgetpages,
1826 	.vop_putpages = 		vop_stdputpages,
1827 	.vop_ncreate =			tmpfs_ncreate,
1828 	.vop_nresolve =			tmpfs_nresolve,
1829 	.vop_nlookupdotdot =		tmpfs_nlookupdotdot,
1830 	.vop_nmknod =			tmpfs_nmknod,
1831 	.vop_open =			tmpfs_open,
1832 	.vop_close =			tmpfs_close,
1833 	.vop_access =			tmpfs_access,
1834 	.vop_getattr =			tmpfs_getattr,
1835 	.vop_setattr =			tmpfs_setattr,
1836 	.vop_read =			tmpfs_read,
1837 	.vop_write =			tmpfs_write,
1838 	.vop_fsync =			tmpfs_fsync,
1839 	.vop_mountctl =			tmpfs_mountctl,
1840 	.vop_nremove =			tmpfs_nremove,
1841 	.vop_nlink =			tmpfs_nlink,
1842 	.vop_nrename =			tmpfs_nrename,
1843 	.vop_nmkdir =			tmpfs_nmkdir,
1844 	.vop_nrmdir =			tmpfs_nrmdir,
1845 	.vop_nsymlink =			tmpfs_nsymlink,
1846 	.vop_readdir =			tmpfs_readdir,
1847 	.vop_readlink =			tmpfs_readlink,
1848 	.vop_inactive =			tmpfs_inactive,
1849 	.vop_reclaim =			tmpfs_reclaim,
1850 	.vop_print =			tmpfs_print,
1851 	.vop_pathconf =			tmpfs_pathconf,
1852 	.vop_bmap =			tmpfs_bmap,
1853 	.vop_strategy =			tmpfs_strategy,
1854 	.vop_advlock =			tmpfs_advlock,
1855 	.vop_kqfilter =			tmpfs_kqfilter
1856 };
1857