xref: /netbsd-src/sys/kern/vfs_subr.c (revision 4472dbe5e3bd91ef2540bada7a7ca7384627ff9b)
1 /*	$NetBSD: vfs_subr.c,v 1.126 2000/05/28 04:13:56 mycroft Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1989, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Redistribution and use in source and binary forms, with or without
50  * modification, are permitted provided that the following conditions
51  * are met:
52  * 1. Redistributions of source code must retain the above copyright
53  *    notice, this list of conditions and the following disclaimer.
54  * 2. Redistributions in binary form must reproduce the above copyright
55  *    notice, this list of conditions and the following disclaimer in the
56  *    documentation and/or other materials provided with the distribution.
57  * 3. All advertising materials mentioning features or use of this software
58  *    must display the following acknowledgement:
59  *	This product includes software developed by the University of
60  *	California, Berkeley and its contributors.
61  * 4. Neither the name of the University nor the names of its contributors
62  *    may be used to endorse or promote products derived from this software
63  *    without specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75  * SUCH DAMAGE.
76  *
77  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
78  */
79 
80 /*
81  * External virtual filesystem routines
82  */
83 
84 #include "opt_ddb.h"
85 #include "opt_compat_netbsd.h"
86 #include "opt_compat_43.h"
87 
88 #include <sys/param.h>
89 #include <sys/systm.h>
90 #include <sys/proc.h>
91 #include <sys/mount.h>
92 #include <sys/time.h>
93 #include <sys/fcntl.h>
94 #include <sys/vnode.h>
95 #include <sys/stat.h>
96 #include <sys/namei.h>
97 #include <sys/ucred.h>
98 #include <sys/buf.h>
99 #include <sys/errno.h>
100 #include <sys/malloc.h>
101 #include <sys/domain.h>
102 #include <sys/mbuf.h>
103 #include <sys/syscallargs.h>
104 #include <sys/device.h>
105 #include <sys/dirent.h>
106 
107 #include <vm/vm.h>
108 #include <sys/sysctl.h>
109 
110 #include <miscfs/specfs/specdev.h>
111 #include <miscfs/genfs/genfs.h>
112 #include <miscfs/syncfs/syncfs.h>
113 
114 #include <uvm/uvm_extern.h>
115 #include <uvm/uvm.h>
116 #include <uvm/uvm_ddb.h>
117 
118 enum vtype iftovt_tab[16] = {
119 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
120 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
121 };
122 int	vttoif_tab[9] = {
123 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
124 	S_IFSOCK, S_IFIFO, S_IFMT,
125 };
126 
127 int doforce = 1;		/* 1 => permit forcible unmounting */
128 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
129 
130 extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
131 
132 /*
133  * Insq/Remq for the vnode usage lists.
134  */
135 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
136 #define	bufremvn(bp) {							\
137 	LIST_REMOVE(bp, b_vnbufs);					\
138 	(bp)->b_vnbufs.le_next = NOLIST;				\
139 }
140 /* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
141 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
142 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
143 
144 struct mntlist mountlist =			/* mounted filesystem list */
145     CIRCLEQ_HEAD_INITIALIZER(mountlist);
146 struct vfs_list_head vfs_list =			/* vfs list */
147     LIST_HEAD_INITIALIZER(vfs_list);
148 
149 struct nfs_public nfs_pub;			/* publicly exported FS */
150 
151 struct simplelock mountlist_slock;
152 static struct simplelock mntid_slock;
153 struct simplelock mntvnode_slock;
154 struct simplelock vnode_free_list_slock;
155 struct simplelock spechash_slock;
156 
157 /*
158  * These define the root filesystem and device.
159  */
160 struct mount *rootfs;
161 struct vnode *rootvnode;
162 struct device *root_device;			/* root device */
163 
164 struct pool vnode_pool;				/* memory pool for vnodes */
165 
166 /*
167  * Local declarations.
168  */
169 void insmntque __P((struct vnode *, struct mount *));
170 int getdevvp __P((dev_t, struct vnode **, enum vtype));
171 void vgoneall __P((struct vnode *));
172 
173 static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
174 				  struct export_args *));
175 static int vfs_free_netcred __P((struct radix_node *, void *));
176 static void vfs_free_addrlist __P((struct netexport *));
177 
178 #ifdef DEBUG
179 void printlockedvnodes __P((void));
180 #endif
181 
182 /*
183  * Initialize the vnode management data structures.
184  */
185 void
186 vntblinit()
187 {
188 
189 	simple_lock_init(&mntvnode_slock);
190 	simple_lock_init(&mntid_slock);
191 	simple_lock_init(&spechash_slock);
192 	simple_lock_init(&vnode_free_list_slock);
193 
194 	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
195 	    0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
196 
197 	/*
198 	 * Initialize the filesystem syncer.
199 	 */
200 	vn_initialize_syncerd();
201 }
202 
203 /*
204  * Mark a mount point as busy. Used to synchronize access and to delay
205  * unmounting. Interlock is not released on failure.
206  */
207 int
208 vfs_busy(mp, flags, interlkp)
209 	struct mount *mp;
210 	int flags;
211 	struct simplelock *interlkp;
212 {
213 	int lkflags;
214 
215 	while (mp->mnt_flag & MNT_UNMOUNT) {
216 		int gone;
217 
218 		if (flags & LK_NOWAIT)
219 			return (ENOENT);
220 		if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
221 		    && mp->mnt_unmounter == curproc)
222 			return (EDEADLK);
223 		if (interlkp)
224 			simple_unlock(interlkp);
225 		/*
226 		 * Since all busy locks are shared except the exclusive
227 		 * lock granted when unmounting, the only place that a
228 		 * wakeup needs to be done is at the release of the
229 		 * exclusive lock at the end of dounmount.
230 		 *
231 		 * XXX MP: add spinlock protecting mnt_wcnt here once you
232 		 * can atomically unlock-and-sleep.
233 		 */
234 		mp->mnt_wcnt++;
235 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
236 		mp->mnt_wcnt--;
237 		gone = mp->mnt_flag & MNT_GONE;
238 
239 		if (mp->mnt_wcnt == 0)
240 			wakeup(&mp->mnt_wcnt);
241 		if (interlkp)
242 			simple_lock(interlkp);
243 		if (gone)
244 			return (ENOENT);
245 	}
246 	lkflags = LK_SHARED;
247 	if (interlkp)
248 		lkflags |= LK_INTERLOCK;
249 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
250 		panic("vfs_busy: unexpected lock failure");
251 	return (0);
252 }
253 
254 /*
255  * Free a busy filesystem.
256  */
257 void
258 vfs_unbusy(mp)
259 	struct mount *mp;
260 {
261 
262 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
263 }
264 
265 /*
266  * Lookup a filesystem type, and if found allocate and initialize
267  * a mount structure for it.
268  *
269  * Devname is usually updated by mount(8) after booting.
270  */
271 int
272 vfs_rootmountalloc(fstypename, devname, mpp)
273 	char *fstypename;
274 	char *devname;
275 	struct mount **mpp;
276 {
277 	struct vfsops *vfsp = NULL;
278 	struct mount *mp;
279 
280 	for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL;
281 	     vfsp = LIST_NEXT(vfsp, vfs_list))
282 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
283 			break;
284 
285 	if (vfsp == NULL)
286 		return (ENODEV);
287 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
288 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
289 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
290 	(void)vfs_busy(mp, LK_NOWAIT, 0);
291 	LIST_INIT(&mp->mnt_vnodelist);
292 	mp->mnt_op = vfsp;
293 	mp->mnt_flag = MNT_RDONLY;
294 	mp->mnt_vnodecovered = NULLVP;
295 	vfsp->vfs_refcount++;
296 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
297 	mp->mnt_stat.f_mntonname[0] = '/';
298 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
299 	*mpp = mp;
300 	return (0);
301 }
302 
303 /*
304  * Lookup a mount point by filesystem identifier.
305  */
306 struct mount *
307 vfs_getvfs(fsid)
308 	fsid_t *fsid;
309 {
310 	struct mount *mp;
311 
312 	simple_lock(&mountlist_slock);
313 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
314 	     mp = mp->mnt_list.cqe_next) {
315 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
316 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
317 			simple_unlock(&mountlist_slock);
318 			return (mp);
319 		}
320 	}
321 	simple_unlock(&mountlist_slock);
322 	return ((struct mount *)0);
323 }
324 
325 /*
326  * Get a new unique fsid
327  */
328 void
329 vfs_getnewfsid(mp, fstypename)
330 	struct mount *mp;
331 	char *fstypename;
332 {
333 	static u_short xxxfs_mntid;
334 	fsid_t tfsid;
335 	int mtype;
336 
337 	simple_lock(&mntid_slock);
338 	mtype = makefstype(fstypename);
339 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
340 	mp->mnt_stat.f_fsid.val[1] = mtype;
341 	if (xxxfs_mntid == 0)
342 		++xxxfs_mntid;
343 	tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
344 	tfsid.val[1] = mtype;
345 	if (mountlist.cqh_first != (void *)&mountlist) {
346 		while (vfs_getvfs(&tfsid)) {
347 			tfsid.val[0]++;
348 			xxxfs_mntid++;
349 		}
350 	}
351 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
352 	simple_unlock(&mntid_slock);
353 }
354 
355 /*
356  * Make a 'unique' number from a mount type name.
357  */
358 long
359 makefstype(type)
360 	char *type;
361 {
362 	long rv;
363 
364 	for (rv = 0; *type; type++) {
365 		rv <<= 2;
366 		rv ^= *type;
367 	}
368 	return rv;
369 }
370 
371 
372 /*
373  * Set vnode attributes to VNOVAL
374  */
375 void
376 vattr_null(vap)
377 	struct vattr *vap;
378 {
379 
380 	vap->va_type = VNON;
381 
382 	/*
383 	 * Assign individually so that it is safe even if size and
384 	 * sign of each member are varied.
385 	 */
386 	vap->va_mode = VNOVAL;
387 	vap->va_nlink = VNOVAL;
388 	vap->va_uid = VNOVAL;
389 	vap->va_gid = VNOVAL;
390 	vap->va_fsid = VNOVAL;
391 	vap->va_fileid = VNOVAL;
392 	vap->va_size = VNOVAL;
393 	vap->va_blocksize = VNOVAL;
394 	vap->va_atime.tv_sec =
395 	    vap->va_mtime.tv_sec =
396 	    vap->va_ctime.tv_sec = VNOVAL;
397 	vap->va_atime.tv_nsec =
398 	    vap->va_mtime.tv_nsec =
399 	    vap->va_ctime.tv_nsec = VNOVAL;
400 	vap->va_gen = VNOVAL;
401 	vap->va_flags = VNOVAL;
402 	vap->va_rdev = VNOVAL;
403 	vap->va_bytes = VNOVAL;
404 	vap->va_vaflags = 0;
405 }
406 
407 /*
408  * Routines having to do with the management of the vnode table.
409  */
410 extern int (**dead_vnodeop_p) __P((void *));
411 long numvnodes;
412 
413 /*
414  * Return the next vnode from the free list.
415  */
416 int
417 getnewvnode(tag, mp, vops, vpp)
418 	enum vtagtype tag;
419 	struct mount *mp;
420 	int (**vops) __P((void *));
421 	struct vnode **vpp;
422 {
423 	struct proc *p = curproc;	/* XXX */
424 	struct freelst *listhd;
425 	static int toggle;
426 	struct vnode *vp;
427 	int error = 0;
428 #ifdef DIAGNOSTIC
429 	int s;
430 #endif
431 	if (mp) {
432 		/*
433 		 * Mark filesystem busy while we're creating a vnode.
434 		 * If unmount is in progress, this will wait; if the
435 		 * unmount succeeds (only if umount -f), this will
436 		 * return an error.  If the unmount fails, we'll keep
437 		 * going afterwards.
438 		 * (This puts the per-mount vnode list logically under
439 		 * the protection of the vfs_busy lock).
440 		 */
441 		error = vfs_busy(mp, LK_RECURSEFAIL, 0);
442 		if (error && error != EDEADLK)
443 			return error;
444 	}
445 
446 	/*
447 	 * We must choose whether to allocate a new vnode or recycle an
448 	 * existing one. The criterion for allocating a new one is that
449 	 * the total number of vnodes is less than the number desired or
450 	 * there are no vnodes on either free list. Generally we only
451 	 * want to recycle vnodes that have no buffers associated with
452 	 * them, so we look first on the vnode_free_list. If it is empty,
453 	 * we next consider vnodes with referencing buffers on the
454 	 * vnode_hold_list. The toggle ensures that half the time we
455 	 * will use a buffer from the vnode_hold_list, and half the time
456 	 * we will allocate a new one unless the list has grown to twice
457 	 * the desired size. We are reticent to recycle vnodes from the
458 	 * vnode_hold_list because we will lose the identity of all its
459 	 * referencing buffers.
460 	 */
461 	toggle ^= 1;
462 	if (numvnodes > 2 * desiredvnodes)
463 		toggle = 0;
464 
465 	simple_lock(&vnode_free_list_slock);
466 	if (numvnodes < desiredvnodes ||
467 	    (TAILQ_FIRST(listhd = &vnode_free_list) == NULL &&
468 	    (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) {
469 		simple_unlock(&vnode_free_list_slock);
470 		vp = pool_get(&vnode_pool, PR_WAITOK);
471 		memset((char *)vp, 0, sizeof(*vp));
472 		simple_lock_init(&vp->v_interlock);
473 		numvnodes++;
474 	} else {
475 		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
476 		    vp = TAILQ_NEXT(vp, v_freelist)) {
477 			if (simple_lock_try(&vp->v_interlock)) {
478 				if ((vp->v_flag & VLAYER) == 0) {
479 					break;
480 				}
481 				if (VOP_ISLOCKED(vp) == 0)
482 					break;
483 				else
484 					simple_unlock(&vp->v_interlock);
485 			}
486 		}
487 		/*
488 		 * Unless this is a bad time of the month, at most
489 		 * the first NCPUS items on the free list are
490 		 * locked, so this is close enough to being empty.
491 		 */
492 		if (vp == NULLVP) {
493 			simple_unlock(&vnode_free_list_slock);
494 			if (mp && error != EDEADLK)
495 				vfs_unbusy(mp);
496 			tablefull("vnode");
497 			*vpp = 0;
498 			return (ENFILE);
499 		}
500 		if (vp->v_usecount)
501 			panic("free vnode isn't, vp %p", vp);
502 		TAILQ_REMOVE(listhd, vp, v_freelist);
503 		/* see comment on why 0xdeadb is set at end of vgone (below) */
504 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
505 		simple_unlock(&vnode_free_list_slock);
506 		vp->v_lease = NULL;
507 		if (vp->v_type != VBAD)
508 			vgonel(vp, p);
509 		else
510 			simple_unlock(&vp->v_interlock);
511 #ifdef DIAGNOSTIC
512 		if (vp->v_data)
513 			panic("cleaned vnode isn't, vp %p", vp);
514 		s = splbio();
515 		if (vp->v_numoutput)
516 			panic("clean vnode has pending I/O's, vp %p", vp);
517 		splx(s);
518 #endif
519 		vp->v_flag = 0;
520 		vp->v_lastr = 0;
521 		vp->v_ralen = 0;
522 		vp->v_maxra = 0;
523 		vp->v_lastw = 0;
524 		vp->v_lasta = 0;
525 		vp->v_cstart = 0;
526 		vp->v_clen = 0;
527 		vp->v_socket = 0;
528 	}
529 	vp->v_type = VNON;
530 	vp->v_vnlock = &vp->v_lock;
531 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
532 	cache_purge(vp);
533 	vp->v_tag = tag;
534 	vp->v_op = vops;
535 	insmntque(vp, mp);
536 	*vpp = vp;
537 	vp->v_usecount = 1;
538 	vp->v_data = 0;
539 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
540 	if (mp && error != EDEADLK)
541 		vfs_unbusy(mp);
542 	return (0);
543 }
544 
545 /*
546  * Move a vnode from one mount queue to another.
547  */
548 void
549 insmntque(vp, mp)
550 	struct vnode *vp;
551 	struct mount *mp;
552 {
553 
554 #ifdef DIAGNOSTIC
555 	if ((mp != NULL) &&
556 	    (mp->mnt_flag & MNT_UNMOUNT) &&
557 	    !(mp->mnt_flag & MNT_SOFTDEP) &&
558 	    vp->v_tag != VT_VFS) {
559 		panic("insmntque into dying filesystem");
560 	}
561 #endif
562 
563 	simple_lock(&mntvnode_slock);
564 	/*
565 	 * Delete from old mount point vnode list, if on one.
566 	 */
567 	if (vp->v_mount != NULL)
568 		LIST_REMOVE(vp, v_mntvnodes);
569 	/*
570 	 * Insert into list of vnodes for the new mount point, if available.
571 	 */
572 	if ((vp->v_mount = mp) != NULL)
573 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
574 	simple_unlock(&mntvnode_slock);
575 }
576 
577 /*
578  * Update outstanding I/O count and do wakeup if requested.
579  */
580 void
581 vwakeup(bp)
582 	struct buf *bp;
583 {
584 	struct vnode *vp;
585 
586 	bp->b_flags &= ~B_WRITEINPROG;
587 	if ((vp = bp->b_vp) != NULL) {
588 		if (--vp->v_numoutput < 0)
589 			panic("vwakeup: neg numoutput, vp %p", vp);
590 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
591 			vp->v_flag &= ~VBWAIT;
592 			wakeup((caddr_t)&vp->v_numoutput);
593 		}
594 	}
595 }
596 
597 /*
598  * Flush out and invalidate all buffers associated with a vnode.
599  * Called with the underlying vnode locked, which should prevent new dirty
600  * buffers from being queued.
601  */
602 int
603 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
604 	struct vnode *vp;
605 	int flags;
606 	struct ucred *cred;
607 	struct proc *p;
608 	int slpflag, slptimeo;
609 {
610 	struct buf *bp, *nbp;
611 	int s, error;
612 
613 	if (flags & V_SAVE) {
614 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p);
615 		if (error)
616 		        return (error);
617 #ifdef DIAGNOSTIC
618 		s = splbio();
619 		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
620 		        panic("vinvalbuf: dirty bufs, vp %p", vp);
621 		splx(s);
622 #endif
623 	}
624 
625 	s = splbio();
626 
627 restart:
628 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
629 		nbp = LIST_NEXT(bp, b_vnbufs);
630 		if (bp->b_flags & B_BUSY) {
631 			bp->b_flags |= B_WANTED;
632 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
633 			    "vinvalbuf", slptimeo);
634 			if (error) {
635 				splx(s);
636 				return (error);
637 			}
638 			goto restart;
639 		}
640 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
641 		brelse(bp);
642 	}
643 
644 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
645 		nbp = LIST_NEXT(bp, b_vnbufs);
646 		if (bp->b_flags & B_BUSY) {
647 			bp->b_flags |= B_WANTED;
648 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
649 			    "vinvalbuf", slptimeo);
650 			if (error) {
651 				splx(s);
652 				return (error);
653 			}
654 			goto restart;
655 		}
656 		/*
657 		 * XXX Since there are no node locks for NFS, I believe
658 		 * there is a slight chance that a delayed write will
659 		 * occur while sleeping just above, so check for it.
660 		 */
661 		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
662 #ifdef DEBUG
663 			printf("buffer still DELWRI\n");
664 #endif
665 			bp->b_flags |= B_BUSY | B_VFLUSH;
666 			VOP_BWRITE(bp);
667 			goto restart;
668 		}
669 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
670 		brelse(bp);
671 	}
672 
673 #ifdef DIAGNOSTIC
674 	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
675 		panic("vinvalbuf: flush failed, vp %p", vp);
676 #endif
677 
678 	splx(s);
679 
680 	return (0);
681 }
682 
683 /*
684  * Destroy any in core blocks past the truncation length.
685  * Called with the underlying vnode locked, which should prevent new dirty
686  * buffers from being queued.
687  */
688 int
689 vtruncbuf(vp, lbn, slpflag, slptimeo)
690 	struct vnode *vp;
691 	daddr_t lbn;
692 	int slpflag, slptimeo;
693 {
694 	struct buf *bp, *nbp;
695 	int s, error;
696 
697 	s = splbio();
698 
699 restart:
700 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
701 		nbp = LIST_NEXT(bp, b_vnbufs);
702 		if (bp->b_lblkno < lbn)
703 			continue;
704 		if (bp->b_flags & B_BUSY) {
705 			bp->b_flags |= B_WANTED;
706 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
707 			    "vtruncbuf", slptimeo);
708 			if (error) {
709 				splx(s);
710 				return (error);
711 			}
712 			goto restart;
713 		}
714 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
715 		brelse(bp);
716 	}
717 
718 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
719 		nbp = LIST_NEXT(bp, b_vnbufs);
720 		if (bp->b_lblkno < lbn)
721 			continue;
722 		if (bp->b_flags & B_BUSY) {
723 			bp->b_flags |= B_WANTED;
724 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
725 			    "vtruncbuf", slptimeo);
726 			if (error) {
727 				splx(s);
728 				return (error);
729 			}
730 			goto restart;
731 		}
732 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
733 		brelse(bp);
734 	}
735 
736 	splx(s);
737 
738 	return (0);
739 }
740 
741 void
742 vflushbuf(vp, sync)
743 	struct vnode *vp;
744 	int sync;
745 {
746 	struct buf *bp, *nbp;
747 	int s;
748 
749 loop:
750 	s = splbio();
751 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
752 		nbp = LIST_NEXT(bp, b_vnbufs);
753 		if ((bp->b_flags & B_BUSY))
754 			continue;
755 		if ((bp->b_flags & B_DELWRI) == 0)
756 			panic("vflushbuf: not dirty, bp %p", bp);
757 		bp->b_flags |= B_BUSY | B_VFLUSH;
758 		splx(s);
759 		/*
760 		 * Wait for I/O associated with indirect blocks to complete,
761 		 * since there is no way to quickly wait for them below.
762 		 */
763 		if (bp->b_vp == vp || sync == 0)
764 			(void) bawrite(bp);
765 		else
766 			(void) bwrite(bp);
767 		goto loop;
768 	}
769 	if (sync == 0) {
770 		splx(s);
771 		return;
772 	}
773 	while (vp->v_numoutput) {
774 		vp->v_flag |= VBWAIT;
775 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
776 	}
777 	splx(s);
778 	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
779 		vprint("vflushbuf: dirty", vp);
780 		goto loop;
781 	}
782 }
783 
784 /*
785  * Associate a buffer with a vnode.
786  */
787 void
788 bgetvp(vp, bp)
789 	struct vnode *vp;
790 	struct buf *bp;
791 {
792 	int s;
793 
794 	if (bp->b_vp)
795 		panic("bgetvp: not free, bp %p", bp);
796 	VHOLD(vp);
797 	s = splbio();
798 	bp->b_vp = vp;
799 	if (vp->v_type == VBLK || vp->v_type == VCHR)
800 		bp->b_dev = vp->v_rdev;
801 	else
802 		bp->b_dev = NODEV;
803 	/*
804 	 * Insert onto list for new vnode.
805 	 */
806 	bufinsvn(bp, &vp->v_cleanblkhd);
807 	splx(s);
808 }
809 
810 /*
811  * Disassociate a buffer from a vnode.
812  */
813 void
814 brelvp(bp)
815 	struct buf *bp;
816 {
817 	struct vnode *vp;
818 	int s;
819 
820 	if (bp->b_vp == NULL)
821 		panic("brelvp: vp NULL, bp %p", bp);
822 
823 	s = splbio();
824 	vp = bp->b_vp;
825 	/*
826 	 * Delete from old vnode list, if on one.
827 	 */
828 	if (bp->b_vnbufs.le_next != NOLIST)
829 		bufremvn(bp);
830 	if ((vp->v_flag & VONWORKLST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
831 		vp->v_flag &= ~VONWORKLST;
832 		LIST_REMOVE(vp, v_synclist);
833 	}
834 	bp->b_vp = (struct vnode *) 0;
835 	HOLDRELE(vp);
836 	splx(s);
837 }
838 
839 /*
840  * Reassign a buffer from one vnode to another.
841  * Used to assign file specific control information
842  * (indirect blocks) to the vnode to which they belong.
843  *
844  * This function must be called at splbio().
845  */
846 void
847 reassignbuf(bp, newvp)
848 	struct buf *bp;
849 	struct vnode *newvp;
850 {
851 	struct buflists *listheadp;
852 	int delay;
853 
854 	if (newvp == NULL) {
855 		printf("reassignbuf: NULL");
856 		return;
857 	}
858 
859 	/*
860 	 * Delete from old vnode list, if on one.
861 	 */
862 	if (bp->b_vnbufs.le_next != NOLIST)
863 		bufremvn(bp);
864 	/*
865 	 * If dirty, put on list of dirty buffers;
866 	 * otherwise insert onto list of clean buffers.
867 	 */
868 	if ((bp->b_flags & B_DELWRI) == 0) {
869 		listheadp = &newvp->v_cleanblkhd;
870 		if ((newvp->v_flag & VONWORKLST) &&
871 		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
872 			newvp->v_flag &= ~VONWORKLST;
873 			LIST_REMOVE(newvp, v_synclist);
874 		}
875 	} else {
876 		listheadp = &newvp->v_dirtyblkhd;
877 		if ((newvp->v_flag & VONWORKLST) == 0) {
878 			switch (newvp->v_type) {
879 			case VDIR:
880 				delay = dirdelay;
881 				break;
882 			case VBLK:
883 				if (newvp->v_specmountpoint != NULL) {
884 					delay = metadelay;
885 					break;
886 				}
887 				/* fall through */
888 			default:
889 				delay = filedelay;
890 				break;
891 			}
892 			if (!newvp->v_mount ||
893 			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
894 				vn_syncer_add_to_worklist(newvp, delay);
895 		}
896 	}
897 	bufinsvn(bp, listheadp);
898 }
899 
900 /*
901  * Create a vnode for a block device.
902  * Used for root filesystem and swap areas.
903  * Also used for memory file system special devices.
904  */
905 int
906 bdevvp(dev, vpp)
907 	dev_t dev;
908 	struct vnode **vpp;
909 {
910 
911 	return (getdevvp(dev, vpp, VBLK));
912 }
913 
914 /*
915  * Create a vnode for a character device.
916  * Used for kernfs and some console handling.
917  */
918 int
919 cdevvp(dev, vpp)
920 	dev_t dev;
921 	struct vnode **vpp;
922 {
923 
924 	return (getdevvp(dev, vpp, VCHR));
925 }
926 
927 /*
928  * Create a vnode for a device.
929  * Used by bdevvp (block device) for root file system etc.,
930  * and by cdevvp (character device) for console and kernfs.
931  */
932 int
933 getdevvp(dev, vpp, type)
934 	dev_t dev;
935 	struct vnode **vpp;
936 	enum vtype type;
937 {
938 	struct vnode *vp;
939 	struct vnode *nvp;
940 	int error;
941 
942 	if (dev == NODEV) {
943 		*vpp = NULLVP;
944 		return (0);
945 	}
946 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
947 	if (error) {
948 		*vpp = NULLVP;
949 		return (error);
950 	}
951 	vp = nvp;
952 	vp->v_type = type;
953 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
954 		vput(vp);
955 		vp = nvp;
956 	}
957 	*vpp = vp;
958 	return (0);
959 }
960 
961 /*
962  * Check to see if the new vnode represents a special device
963  * for which we already have a vnode (either because of
964  * bdevvp() or because of a different vnode representing
965  * the same block device). If such an alias exists, deallocate
966  * the existing contents and return the aliased vnode. The
967  * caller is responsible for filling it with its new contents.
968  */
969 struct vnode *
970 checkalias(nvp, nvp_rdev, mp)
971 	struct vnode *nvp;
972 	dev_t nvp_rdev;
973 	struct mount *mp;
974 {
975 	struct proc *p = curproc;       /* XXX */
976 	struct vnode *vp;
977 	struct vnode **vpp;
978 
979 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
980 		return (NULLVP);
981 
982 	vpp = &speclisth[SPECHASH(nvp_rdev)];
983 loop:
984 	simple_lock(&spechash_slock);
985 	for (vp = *vpp; vp; vp = vp->v_specnext) {
986 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
987 			continue;
988 		/*
989 		 * Alias, but not in use, so flush it out.
990 		 */
991 		simple_lock(&vp->v_interlock);
992 		if (vp->v_usecount == 0) {
993 			simple_unlock(&spechash_slock);
994 			vgonel(vp, p);
995 			goto loop;
996 		}
997 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
998 			simple_unlock(&spechash_slock);
999 			goto loop;
1000 		}
1001 		break;
1002 	}
1003 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1004 		MALLOC(nvp->v_specinfo, struct specinfo *,
1005 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
1006 		nvp->v_rdev = nvp_rdev;
1007 		nvp->v_hashchain = vpp;
1008 		nvp->v_specnext = *vpp;
1009 		nvp->v_specmountpoint = NULL;
1010 		simple_unlock(&spechash_slock);
1011 		nvp->v_speclockf = NULL;
1012 		*vpp = nvp;
1013 		if (vp != NULLVP) {
1014 			nvp->v_flag |= VALIASED;
1015 			vp->v_flag |= VALIASED;
1016 			vput(vp);
1017 		}
1018 		return (NULLVP);
1019 	}
1020 	simple_unlock(&spechash_slock);
1021 	VOP_UNLOCK(vp, 0);
1022 	simple_lock(&vp->v_interlock);
1023 	vclean(vp, 0, p);
1024 	vp->v_op = nvp->v_op;
1025 	vp->v_tag = nvp->v_tag;
1026 	vp->v_vnlock = &vp->v_lock;
1027 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1028 	nvp->v_type = VNON;
1029 	insmntque(vp, mp);
1030 	return (vp);
1031 }
1032 
1033 /*
1034  * Grab a particular vnode from the free list, increment its
1035  * reference count and lock it. If the vnode lock bit is set the
1036  * vnode is being eliminated in vgone. In that case, we can not
1037  * grab the vnode, so the process is awakened when the transition is
1038  * completed, and an error returned to indicate that the vnode is no
1039  * longer usable (possibly having been changed to a new file system type).
1040  */
1041 int
1042 vget(vp, flags)
1043 	struct vnode *vp;
1044 	int flags;
1045 {
1046 	int error;
1047 
1048 	/*
1049 	 * If the vnode is in the process of being cleaned out for
1050 	 * another use, we wait for the cleaning to finish and then
1051 	 * return failure. Cleaning is determined by checking that
1052 	 * the VXLOCK flag is set.
1053 	 */
1054 	if ((flags & LK_INTERLOCK) == 0)
1055 		simple_lock(&vp->v_interlock);
1056 	if (vp->v_flag & VXLOCK) {
1057 		vp->v_flag |= VXWANT;
1058 		simple_unlock(&vp->v_interlock);
1059 		tsleep((caddr_t)vp, PINOD, "vget", 0);
1060 		return (ENOENT);
1061 	}
1062 	if (vp->v_usecount == 0) {
1063 		simple_lock(&vnode_free_list_slock);
1064 		if (vp->v_holdcnt > 0)
1065 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1066 		else
1067 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1068 		simple_unlock(&vnode_free_list_slock);
1069 	}
1070 	vp->v_usecount++;
1071 #ifdef DIAGNOSTIC
1072 	if (vp->v_usecount == 0) {
1073 		vprint("vget", vp);
1074 		panic("vget: usecount overflow, vp %p", vp);
1075 	}
1076 #endif
1077 	if (flags & LK_TYPE_MASK) {
1078 		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1079 			/*
1080 			 * must expand vrele here because we do not want
1081 			 * to call VOP_INACTIVE if the reference count
1082 			 * drops back to zero since it was never really
1083 			 * active. We must remove it from the free list
1084 			 * before sleeping so that multiple processes do
1085 			 * not try to recycle it.
1086 			 */
1087 			simple_lock(&vp->v_interlock);
1088 			vp->v_usecount--;
1089 			if (vp->v_usecount > 0) {
1090 				simple_unlock(&vp->v_interlock);
1091 				return (error);
1092 			}
1093 			/*
1094 			 * insert at tail of LRU list
1095 			 */
1096 			simple_lock(&vnode_free_list_slock);
1097 			if (vp->v_holdcnt > 0)
1098 				TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1099 				    v_freelist);
1100 			else
1101 				TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1102 				    v_freelist);
1103 			simple_unlock(&vnode_free_list_slock);
1104 			simple_unlock(&vp->v_interlock);
1105 		}
1106 		return (error);
1107 	}
1108 	simple_unlock(&vp->v_interlock);
1109 	return (0);
1110 }
1111 
1112 /*
1113  * vput(), just unlock and vrele()
1114  */
1115 void
1116 vput(vp)
1117 	struct vnode *vp;
1118 {
1119 	struct proc *p = curproc;	/* XXX */
1120 
1121 #ifdef DIAGNOSTIC
1122 	if (vp == NULL)
1123 		panic("vput: null vp");
1124 #endif
1125 	simple_lock(&vp->v_interlock);
1126 	vp->v_usecount--;
1127 	if (vp->v_usecount > 0) {
1128 		simple_unlock(&vp->v_interlock);
1129 		VOP_UNLOCK(vp, 0);
1130 		return;
1131 	}
1132 #ifdef DIAGNOSTIC
1133 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1134 		vprint("vput: bad ref count", vp);
1135 		panic("vput: ref cnt");
1136 	}
1137 #endif
1138 	/*
1139 	 * Insert at tail of LRU list.
1140 	 */
1141 	simple_lock(&vnode_free_list_slock);
1142 	if (vp->v_holdcnt > 0)
1143 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1144 	else
1145 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1146 	simple_unlock(&vnode_free_list_slock);
1147 	simple_unlock(&vp->v_interlock);
1148 	VOP_INACTIVE(vp, p);
1149 }
1150 
1151 /*
1152  * Vnode release.
1153  * If count drops to zero, call inactive routine and return to freelist.
1154  */
1155 void
1156 vrele(vp)
1157 	struct vnode *vp;
1158 {
1159 	struct proc *p = curproc;	/* XXX */
1160 
1161 #ifdef DIAGNOSTIC
1162 	if (vp == NULL)
1163 		panic("vrele: null vp");
1164 #endif
1165 	simple_lock(&vp->v_interlock);
1166 	vp->v_usecount--;
1167 	if (vp->v_usecount > 0) {
1168 		simple_unlock(&vp->v_interlock);
1169 		return;
1170 	}
1171 #ifdef DIAGNOSTIC
1172 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1173 		vprint("vrele: bad ref count", vp);
1174 		panic("vrele: ref cnt");
1175 	}
1176 #endif
1177 	/*
1178 	 * Insert at tail of LRU list.
1179 	 */
1180 	simple_lock(&vnode_free_list_slock);
1181 	if (vp->v_holdcnt > 0)
1182 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1183 	else
1184 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1185 	simple_unlock(&vnode_free_list_slock);
1186 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1187 		VOP_INACTIVE(vp, p);
1188 }
1189 
1190 #ifdef DIAGNOSTIC
1191 /*
1192  * Page or buffer structure gets a reference.
1193  */
1194 void
1195 vhold(vp)
1196 	struct vnode *vp;
1197 {
1198 
1199 	/*
1200 	 * If it is on the freelist and the hold count is currently
1201 	 * zero, move it to the hold list. The test of the back
1202 	 * pointer and the use reference count of zero is because
1203 	 * it will be removed from a free list by getnewvnode,
1204 	 * but will not have its reference count incremented until
1205 	 * after calling vgone. If the reference count were
1206 	 * incremented first, vgone would (incorrectly) try to
1207 	 * close the previous instance of the underlying object.
1208 	 * So, the back pointer is explicitly set to `0xdeadb' in
1209 	 * getnewvnode after removing it from a freelist to ensure
1210 	 * that we do not try to move it here.
1211 	 */
1212   	simple_lock(&vp->v_interlock);
1213 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1214 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1215 		simple_lock(&vnode_free_list_slock);
1216 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1217 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1218 		simple_unlock(&vnode_free_list_slock);
1219 	}
1220 	vp->v_holdcnt++;
1221 	simple_unlock(&vp->v_interlock);
1222 }
1223 
1224 /*
1225  * Page or buffer structure frees a reference.
1226  */
1227 void
1228 holdrele(vp)
1229 	struct vnode *vp;
1230 {
1231 
1232 	simple_lock(&vp->v_interlock);
1233 	if (vp->v_holdcnt <= 0)
1234 		panic("holdrele: holdcnt vp %p", vp);
1235 	vp->v_holdcnt--;
1236 	/*
1237 	 * If it is on the holdlist and the hold count drops to
1238 	 * zero, move it to the free list. The test of the back
1239 	 * pointer and the use reference count of zero is because
1240 	 * it will be removed from a free list by getnewvnode,
1241 	 * but will not have its reference count incremented until
1242 	 * after calling vgone. If the reference count were
1243 	 * incremented first, vgone would (incorrectly) try to
1244 	 * close the previous instance of the underlying object.
1245 	 * So, the back pointer is explicitly set to `0xdeadb' in
1246 	 * getnewvnode after removing it from a freelist to ensure
1247 	 * that we do not try to move it here.
1248 	 */
1249 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1250 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1251 		simple_lock(&vnode_free_list_slock);
1252 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1253 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1254 		simple_unlock(&vnode_free_list_slock);
1255 	}
1256 	simple_unlock(&vp->v_interlock);
1257 }
1258 
1259 /*
1260  * Vnode reference.
1261  */
1262 void
1263 vref(vp)
1264 	struct vnode *vp;
1265 {
1266 
1267 	simple_lock(&vp->v_interlock);
1268 	if (vp->v_usecount <= 0)
1269 		panic("vref used where vget required, vp %p", vp);
1270 	vp->v_usecount++;
1271 #ifdef DIAGNOSTIC
1272 	if (vp->v_usecount == 0) {
1273 		vprint("vref", vp);
1274 		panic("vref: usecount overflow, vp %p", vp);
1275 	}
1276 #endif
1277 	simple_unlock(&vp->v_interlock);
1278 }
1279 #endif /* DIAGNOSTIC */
1280 
1281 /*
1282  * Remove any vnodes in the vnode table belonging to mount point mp.
1283  *
1284  * If MNT_NOFORCE is specified, there should not be any active ones,
1285  * return error if any are found (nb: this is a user error, not a
1286  * system error). If MNT_FORCE is specified, detach any active vnodes
1287  * that are found.
1288  */
1289 #ifdef DEBUG
1290 int busyprt = 0;	/* print out busy vnodes */
1291 struct ctldebug debug1 = { "busyprt", &busyprt };
1292 #endif
1293 
1294 int
1295 vflush(mp, skipvp, flags)
1296 	struct mount *mp;
1297 	struct vnode *skipvp;
1298 	int flags;
1299 {
1300 	struct proc *p = curproc;	/* XXX */
1301 	struct vnode *vp, *nvp;
1302 	int busy = 0;
1303 
1304 	simple_lock(&mntvnode_slock);
1305 loop:
1306 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1307 		if (vp->v_mount != mp)
1308 			goto loop;
1309 		nvp = vp->v_mntvnodes.le_next;
1310 		/*
1311 		 * Skip over a selected vnode.
1312 		 */
1313 		if (vp == skipvp)
1314 			continue;
1315 		simple_lock(&vp->v_interlock);
1316 		/*
1317 		 * Skip over a vnodes marked VSYSTEM.
1318 		 */
1319 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1320 			simple_unlock(&vp->v_interlock);
1321 			continue;
1322 		}
1323 		/*
1324 		 * If WRITECLOSE is set, only flush out regular file
1325 		 * vnodes open for writing.
1326 		 */
1327 		if ((flags & WRITECLOSE) &&
1328 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1329 			simple_unlock(&vp->v_interlock);
1330 			continue;
1331 		}
1332 		/*
1333 		 * With v_usecount == 0, all we need to do is clear
1334 		 * out the vnode data structures and we are done.
1335 		 */
1336 		if (vp->v_usecount == 0) {
1337 			simple_unlock(&mntvnode_slock);
1338 			vgonel(vp, p);
1339 			simple_lock(&mntvnode_slock);
1340 			continue;
1341 		}
1342 		/*
1343 		 * If FORCECLOSE is set, forcibly close the vnode.
1344 		 * For block or character devices, revert to an
1345 		 * anonymous device. For all other files, just kill them.
1346 		 */
1347 		if (flags & FORCECLOSE) {
1348 			simple_unlock(&mntvnode_slock);
1349 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1350 				vgonel(vp, p);
1351 			} else {
1352 				vclean(vp, 0, p);
1353 				vp->v_op = spec_vnodeop_p;
1354 				insmntque(vp, (struct mount *)0);
1355 			}
1356 			simple_lock(&mntvnode_slock);
1357 			continue;
1358 		}
1359 #ifdef DEBUG
1360 		if (busyprt)
1361 			vprint("vflush: busy vnode", vp);
1362 #endif
1363 		simple_unlock(&vp->v_interlock);
1364 		busy++;
1365 	}
1366 	simple_unlock(&mntvnode_slock);
1367 	if (busy)
1368 		return (EBUSY);
1369 	return (0);
1370 }
1371 
1372 /*
1373  * Disassociate the underlying file system from a vnode.
1374  */
1375 void
1376 vclean(vp, flags, p)
1377 	struct vnode *vp;
1378 	int flags;
1379 	struct proc *p;
1380 {
1381 	int active;
1382 
1383 	/*
1384 	 * Check to see if the vnode is in use.
1385 	 * If so we have to reference it before we clean it out
1386 	 * so that its count cannot fall to zero and generate a
1387 	 * race against ourselves to recycle it.
1388 	 */
1389 	if ((active = vp->v_usecount) != 0) {
1390 		/* We have the vnode interlock. */
1391 		vp->v_usecount++;
1392 #ifdef DIAGNOSTIC
1393 		if (vp->v_usecount == 0) {
1394 			vprint("vclean", vp);
1395 			panic("vclean: usecount overflow");
1396 		}
1397 #endif
1398 	}
1399 
1400 	/*
1401 	 * Prevent the vnode from being recycled or
1402 	 * brought into use while we clean it out.
1403 	 */
1404 	if (vp->v_flag & VXLOCK)
1405 		panic("vclean: deadlock, vp %p", vp);
1406 	vp->v_flag |= VXLOCK;
1407 	/*
1408 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1409 	 * have the object locked while it cleans it out. The VOP_LOCK
1410 	 * ensures that the VOP_INACTIVE routine is done with its work.
1411 	 * For active vnodes, it ensures that no other activity can
1412 	 * occur while the underlying object is being cleaned out.
1413 	 */
1414 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1415 
1416 	/*
1417 	 * clean out any VM data associated with the vnode.
1418 	 */
1419 	uvm_vnp_terminate(vp);
1420 	/*
1421 	 * Clean out any buffers associated with the vnode.
1422 	 */
1423 	if (flags & DOCLOSE)
1424 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1425 
1426 	/*
1427 	 * If purging an active vnode, it must be closed and
1428 	 * deactivated before being reclaimed. Note that the
1429 	 * VOP_INACTIVE will unlock the vnode.
1430 	 */
1431 	if (active) {
1432 		if (flags & DOCLOSE)
1433 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1434 		VOP_INACTIVE(vp, p);
1435 	} else {
1436 		/*
1437 		 * Any other processes trying to obtain this lock must first
1438 		 * wait for VXLOCK to clear, then call the new lock operation.
1439 		 */
1440 		VOP_UNLOCK(vp, 0);
1441 	}
1442 	/*
1443 	 * Reclaim the vnode.
1444 	 */
1445 	if (VOP_RECLAIM(vp, p))
1446 		panic("vclean: cannot reclaim, vp %p", vp);
1447 
1448 	if (active) {
1449 		/*
1450 		 * Inline copy of vrele() since VOP_INACTIVE
1451 		 * has already been called.
1452 		 */
1453 		simple_lock(&vp->v_interlock);
1454 		if (--vp->v_usecount <= 0) {
1455 #ifdef DIAGNOSTIC
1456 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1457 				vprint("vclean: bad ref count", vp);
1458 				panic("vclean: ref cnt");
1459 			}
1460 #endif
1461 			/*
1462 			 * Insert at tail of LRU list.
1463 			 */
1464 			simple_unlock(&vp->v_interlock);
1465 			simple_lock(&vnode_free_list_slock);
1466 #ifdef DIAGNOSTIC
1467 			if (vp->v_vnlock) {
1468 				if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1469 					vprint("vclean: lock not drained", vp);
1470 			}
1471 			if (vp->v_holdcnt > 0)
1472 				panic("vclean: not clean, vp %p", vp);
1473 #endif
1474 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1475 			simple_unlock(&vnode_free_list_slock);
1476 		} else
1477 			simple_unlock(&vp->v_interlock);
1478 	}
1479 
1480 	cache_purge(vp);
1481 
1482 	/*
1483 	 * Done with purge, notify sleepers of the grim news.
1484 	 */
1485 	vp->v_op = dead_vnodeop_p;
1486 	vp->v_tag = VT_NON;
1487 	vp->v_flag &= ~VXLOCK;
1488 	if (vp->v_flag & VXWANT) {
1489 		vp->v_flag &= ~VXWANT;
1490 		wakeup((caddr_t)vp);
1491 	}
1492 }
1493 
1494 /*
1495  * Recycle an unused vnode to the front of the free list.
1496  * Release the passed interlock if the vnode will be recycled.
1497  */
1498 int
1499 vrecycle(vp, inter_lkp, p)
1500 	struct vnode *vp;
1501 	struct simplelock *inter_lkp;
1502 	struct proc *p;
1503 {
1504 
1505 	simple_lock(&vp->v_interlock);
1506 	if (vp->v_usecount == 0) {
1507 		if (inter_lkp)
1508 			simple_unlock(inter_lkp);
1509 		vgonel(vp, p);
1510 		return (1);
1511 	}
1512 	simple_unlock(&vp->v_interlock);
1513 	return (0);
1514 }
1515 
1516 /*
1517  * Eliminate all activity associated with a vnode
1518  * in preparation for reuse.
1519  */
1520 void
1521 vgone(vp)
1522 	struct vnode *vp;
1523 {
1524 	struct proc *p = curproc;	/* XXX */
1525 
1526 	simple_lock(&vp->v_interlock);
1527 	vgonel(vp, p);
1528 }
1529 
1530 /*
1531  * vgone, with the vp interlock held.
1532  */
1533 void
1534 vgonel(vp, p)
1535 	struct vnode *vp;
1536 	struct proc *p;
1537 {
1538 	struct vnode *vq;
1539 	struct vnode *vx;
1540 
1541 	/*
1542 	 * If a vgone (or vclean) is already in progress,
1543 	 * wait until it is done and return.
1544 	 */
1545 	if (vp->v_flag & VXLOCK) {
1546 		vp->v_flag |= VXWANT;
1547 		simple_unlock(&vp->v_interlock);
1548 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1549 		return;
1550 	}
1551 	/*
1552 	 * Clean out the filesystem specific data.
1553 	 */
1554 	vclean(vp, DOCLOSE, p);
1555 	/*
1556 	 * Delete from old mount point vnode list, if on one.
1557 	 */
1558 	if (vp->v_mount != NULL)
1559 		insmntque(vp, (struct mount *)0);
1560 	/*
1561 	 * If special device, remove it from special device alias list.
1562 	 * if it is on one.
1563 	 */
1564 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1565 		simple_lock(&spechash_slock);
1566 		if (vp->v_hashchain != NULL) {
1567 			if (*vp->v_hashchain == vp) {
1568 				*vp->v_hashchain = vp->v_specnext;
1569 			} else {
1570 				for (vq = *vp->v_hashchain; vq;
1571 							vq = vq->v_specnext) {
1572 					if (vq->v_specnext != vp)
1573 						continue;
1574 					vq->v_specnext = vp->v_specnext;
1575 					break;
1576 				}
1577 				if (vq == NULL)
1578 					panic("missing bdev");
1579 			}
1580 			if (vp->v_flag & VALIASED) {
1581 				vx = NULL;
1582 				for (vq = *vp->v_hashchain; vq;
1583 							vq = vq->v_specnext) {
1584 					if (vq->v_rdev != vp->v_rdev ||
1585 					    vq->v_type != vp->v_type)
1586 						continue;
1587 					if (vx)
1588 						break;
1589 					vx = vq;
1590 				}
1591 				if (vx == NULL)
1592 					panic("missing alias");
1593 				if (vq == NULL)
1594 					vx->v_flag &= ~VALIASED;
1595 				vp->v_flag &= ~VALIASED;
1596 			}
1597 		}
1598 		simple_unlock(&spechash_slock);
1599 		FREE(vp->v_specinfo, M_VNODE);
1600 		vp->v_specinfo = NULL;
1601 	}
1602 	/*
1603 	 * If it is on the freelist and not already at the head,
1604 	 * move it to the head of the list. The test of the back
1605 	 * pointer and the reference count of zero is because
1606 	 * it will be removed from the free list by getnewvnode,
1607 	 * but will not have its reference count incremented until
1608 	 * after calling vgone. If the reference count were
1609 	 * incremented first, vgone would (incorrectly) try to
1610 	 * close the previous instance of the underlying object.
1611 	 * So, the back pointer is explicitly set to `0xdeadb' in
1612 	 * getnewvnode after removing it from the freelist to ensure
1613 	 * that we do not try to move it here.
1614 	 */
1615 	if (vp->v_usecount == 0) {
1616 		simple_lock(&vnode_free_list_slock);
1617 		if (vp->v_holdcnt > 0)
1618 			panic("vgonel: not clean, vp %p", vp);
1619 		if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1620 		    TAILQ_FIRST(&vnode_free_list) != vp) {
1621 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1622 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1623 		}
1624 		simple_unlock(&vnode_free_list_slock);
1625 	}
1626 	vp->v_type = VBAD;
1627 }
1628 
1629 /*
1630  * Lookup a vnode by device number.
1631  */
1632 int
1633 vfinddev(dev, type, vpp)
1634 	dev_t dev;
1635 	enum vtype type;
1636 	struct vnode **vpp;
1637 {
1638 	struct vnode *vp;
1639 	int rc = 0;
1640 
1641 	simple_lock(&spechash_slock);
1642 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1643 		if (dev != vp->v_rdev || type != vp->v_type)
1644 			continue;
1645 		*vpp = vp;
1646 		rc = 1;
1647 		break;
1648 	}
1649 	simple_unlock(&spechash_slock);
1650 	return (rc);
1651 }
1652 
1653 /*
1654  * Revoke all the vnodes corresponding to the specified minor number
1655  * range (endpoints inclusive) of the specified major.
1656  */
1657 void
1658 vdevgone(maj, minl, minh, type)
1659 	int maj, minl, minh;
1660 	enum vtype type;
1661 {
1662 	struct vnode *vp;
1663 	int mn;
1664 
1665 	for (mn = minl; mn <= minh; mn++)
1666 		if (vfinddev(makedev(maj, mn), type, &vp))
1667 			VOP_REVOKE(vp, REVOKEALL);
1668 }
1669 
1670 /*
1671  * Calculate the total number of references to a special device.
1672  */
1673 int
1674 vcount(vp)
1675 	struct vnode *vp;
1676 {
1677 	struct vnode *vq, *vnext;
1678 	int count;
1679 
1680 loop:
1681 	if ((vp->v_flag & VALIASED) == 0)
1682 		return (vp->v_usecount);
1683 	simple_lock(&spechash_slock);
1684 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1685 		vnext = vq->v_specnext;
1686 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1687 			continue;
1688 		/*
1689 		 * Alias, but not in use, so flush it out.
1690 		 */
1691 		if (vq->v_usecount == 0 && vq != vp) {
1692 			simple_unlock(&spechash_slock);
1693 			vgone(vq);
1694 			goto loop;
1695 		}
1696 		count += vq->v_usecount;
1697 	}
1698 	simple_unlock(&spechash_slock);
1699 	return (count);
1700 }
1701 
1702 /*
1703  * Print out a description of a vnode.
1704  */
1705 static char *typename[] =
1706    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1707 
1708 void
1709 vprint(label, vp)
1710 	char *label;
1711 	struct vnode *vp;
1712 {
1713 	char buf[64];
1714 
1715 	if (label != NULL)
1716 		printf("%s: ", label);
1717 	printf("tag %d type %s, usecount %ld, writecount %ld, refcount %ld,",
1718 	    vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1719 	    vp->v_holdcnt);
1720 	buf[0] = '\0';
1721 	if (vp->v_flag & VROOT)
1722 		strcat(buf, "|VROOT");
1723 	if (vp->v_flag & VTEXT)
1724 		strcat(buf, "|VTEXT");
1725 	if (vp->v_flag & VSYSTEM)
1726 		strcat(buf, "|VSYSTEM");
1727 	if (vp->v_flag & VXLOCK)
1728 		strcat(buf, "|VXLOCK");
1729 	if (vp->v_flag & VXWANT)
1730 		strcat(buf, "|VXWANT");
1731 	if (vp->v_flag & VBWAIT)
1732 		strcat(buf, "|VBWAIT");
1733 	if (vp->v_flag & VALIASED)
1734 		strcat(buf, "|VALIASED");
1735 	if (buf[0] != '\0')
1736 		printf(" flags (%s)", &buf[1]);
1737 	if (vp->v_data == NULL) {
1738 		printf("\n");
1739 	} else {
1740 		printf("\n\t");
1741 		VOP_PRINT(vp);
1742 	}
1743 }
1744 
1745 #ifdef DEBUG
1746 /*
1747  * List all of the locked vnodes in the system.
1748  * Called when debugging the kernel.
1749  */
1750 void
1751 printlockedvnodes()
1752 {
1753 	struct mount *mp, *nmp;
1754 	struct vnode *vp;
1755 
1756 	printf("Locked vnodes\n");
1757 	simple_lock(&mountlist_slock);
1758 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1759 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1760 			nmp = mp->mnt_list.cqe_next;
1761 			continue;
1762 		}
1763 		for (vp = mp->mnt_vnodelist.lh_first;
1764 		     vp != NULL;
1765 		     vp = vp->v_mntvnodes.le_next) {
1766 			if (VOP_ISLOCKED(vp))
1767 				vprint((char *)0, vp);
1768 		}
1769 		simple_lock(&mountlist_slock);
1770 		nmp = mp->mnt_list.cqe_next;
1771 		vfs_unbusy(mp);
1772 	}
1773 	simple_unlock(&mountlist_slock);
1774 }
1775 #endif
1776 
1777 extern const char *mountcompatnames[];
1778 extern const int nmountcompatnames;
1779 
1780 /*
1781  * Top level filesystem related information gathering.
1782  */
1783 int
1784 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1785 	int *name;
1786 	u_int namelen;
1787 	void *oldp;
1788 	size_t *oldlenp;
1789 	void *newp;
1790 	size_t newlen;
1791 	struct proc *p;
1792 {
1793 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1794 	struct vfsconf vfc;
1795 #endif
1796 	struct vfsops *vfsp;
1797 
1798 	/* all sysctl names at this level are at least name and field */
1799 	if (namelen < 2)
1800 		return (ENOTDIR);		/* overloaded */
1801 
1802 	/* Not generic: goes to file system. */
1803 	if (name[0] != VFS_GENERIC) {
1804 		if (name[0] >= nmountcompatnames || name[0] < 0 ||
1805 		    mountcompatnames[name[0]] == NULL)
1806 			return (EOPNOTSUPP);
1807 		vfsp = vfs_getopsbyname(mountcompatnames[name[0]]);
1808 		if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
1809 			return (EOPNOTSUPP);
1810 		return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
1811 		    oldp, oldlenp, newp, newlen, p));
1812 	}
1813 
1814 	/* The rest are generic vfs sysctls. */
1815 	switch (name[1]) {
1816 	case VFS_USERMOUNT:
1817 		return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
1818 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1819 	case VFS_MAXTYPENUM:
1820 		/*
1821 		 * Provided for 4.4BSD-Lite2 compatibility.
1822 		 */
1823 		return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
1824 	case VFS_CONF:
1825 		/*
1826 		 * Special: a node, next is a file system name.
1827 		 * Provided for 4.4BSD-Lite2 compatibility.
1828 		 */
1829 		if (namelen < 3)
1830 			return (ENOTDIR);	/* overloaded */
1831 		if (name[2] >= nmountcompatnames || name[2] < 0 ||
1832 		    mountcompatnames[name[2]] == NULL)
1833 			return (EOPNOTSUPP);
1834 		vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
1835 		if (vfsp == NULL)
1836 			return (EOPNOTSUPP);
1837 		vfc.vfc_vfsops = vfsp;
1838 		strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
1839 		vfc.vfc_typenum = name[2];
1840 		vfc.vfc_refcount = vfsp->vfs_refcount;
1841 		vfc.vfc_flags = 0;
1842 		vfc.vfc_mountroot = vfsp->vfs_mountroot;
1843 		vfc.vfc_next = NULL;
1844 		return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
1845 		    sizeof(struct vfsconf)));
1846 #endif
1847 	default:
1848 		break;
1849 	}
1850 	return (EOPNOTSUPP);
1851 }
1852 
1853 int kinfo_vdebug = 1;
1854 int kinfo_vgetfailed;
1855 #define KINFO_VNODESLOP	10
1856 /*
1857  * Dump vnode list (via sysctl).
1858  * Copyout address of vnode followed by vnode.
1859  */
1860 /* ARGSUSED */
1861 int
1862 sysctl_vnode(where, sizep, p)
1863 	char *where;
1864 	size_t *sizep;
1865 	struct proc *p;
1866 {
1867 	struct mount *mp, *nmp;
1868 	struct vnode *nvp, *vp;
1869 	char *bp = where, *savebp;
1870 	char *ewhere;
1871 	int error;
1872 
1873 #define VPTRSZ	sizeof(struct vnode *)
1874 #define VNODESZ	sizeof(struct vnode)
1875 	if (where == NULL) {
1876 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1877 		return (0);
1878 	}
1879 	ewhere = where + *sizep;
1880 
1881 	simple_lock(&mountlist_slock);
1882 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1883 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1884 			nmp = mp->mnt_list.cqe_next;
1885 			continue;
1886 		}
1887 		savebp = bp;
1888 again:
1889 		simple_lock(&mntvnode_slock);
1890 		for (vp = mp->mnt_vnodelist.lh_first;
1891 		     vp != NULL;
1892 		     vp = nvp) {
1893 			/*
1894 			 * Check that the vp is still associated with
1895 			 * this filesystem.  RACE: could have been
1896 			 * recycled onto the same filesystem.
1897 			 */
1898 			if (vp->v_mount != mp) {
1899 				simple_unlock(&mntvnode_slock);
1900 				if (kinfo_vdebug)
1901 					printf("kinfo: vp changed\n");
1902 				bp = savebp;
1903 				goto again;
1904 			}
1905 			nvp = vp->v_mntvnodes.le_next;
1906 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1907 				simple_unlock(&mntvnode_slock);
1908 				*sizep = bp - where;
1909 				return (ENOMEM);
1910 			}
1911 			simple_unlock(&mntvnode_slock);
1912 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1913 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1914 				return (error);
1915 			bp += VPTRSZ + VNODESZ;
1916 			simple_lock(&mntvnode_slock);
1917 		}
1918 		simple_unlock(&mntvnode_slock);
1919 		simple_lock(&mountlist_slock);
1920 		nmp = mp->mnt_list.cqe_next;
1921 		vfs_unbusy(mp);
1922 	}
1923 	simple_unlock(&mountlist_slock);
1924 
1925 	*sizep = bp - where;
1926 	return (0);
1927 }
1928 
1929 /*
1930  * Check to see if a filesystem is mounted on a block device.
1931  */
1932 int
1933 vfs_mountedon(vp)
1934 	struct vnode *vp;
1935 {
1936 	struct vnode *vq;
1937 	int error = 0;
1938 
1939 	if (vp->v_specmountpoint != NULL)
1940 		return (EBUSY);
1941 	if (vp->v_flag & VALIASED) {
1942 		simple_lock(&spechash_slock);
1943 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1944 			if (vq->v_rdev != vp->v_rdev ||
1945 			    vq->v_type != vp->v_type)
1946 				continue;
1947 			if (vq->v_specmountpoint != NULL) {
1948 				error = EBUSY;
1949 				break;
1950 			}
1951 		}
1952 		simple_unlock(&spechash_slock);
1953 	}
1954 	return (error);
1955 }
1956 
1957 /*
1958  * Build hash lists of net addresses and hang them off the mount point.
1959  * Called by ufs_mount() to set up the lists of export addresses.
1960  */
1961 static int
1962 vfs_hang_addrlist(mp, nep, argp)
1963 	struct mount *mp;
1964 	struct netexport *nep;
1965 	struct export_args *argp;
1966 {
1967 	struct netcred *np, *enp;
1968 	struct radix_node_head *rnh;
1969 	int i;
1970 	struct radix_node *rn;
1971 	struct sockaddr *saddr, *smask = 0;
1972 	struct domain *dom;
1973 	int error;
1974 
1975 	if (argp->ex_addrlen == 0) {
1976 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1977 			return (EPERM);
1978 		np = &nep->ne_defexported;
1979 		np->netc_exflags = argp->ex_flags;
1980 		np->netc_anon = argp->ex_anon;
1981 		np->netc_anon.cr_ref = 1;
1982 		mp->mnt_flag |= MNT_DEFEXPORTED;
1983 		return (0);
1984 	}
1985 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1986 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1987 	memset((caddr_t)np, 0, i);
1988 	saddr = (struct sockaddr *)(np + 1);
1989 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1990 	if (error)
1991 		goto out;
1992 	if (saddr->sa_len > argp->ex_addrlen)
1993 		saddr->sa_len = argp->ex_addrlen;
1994 	if (argp->ex_masklen) {
1995 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1996 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1997 		if (error)
1998 			goto out;
1999 		if (smask->sa_len > argp->ex_masklen)
2000 			smask->sa_len = argp->ex_masklen;
2001 	}
2002 	i = saddr->sa_family;
2003 	if ((rnh = nep->ne_rtable[i]) == 0) {
2004 		/*
2005 		 * Seems silly to initialize every AF when most are not
2006 		 * used, do so on demand here
2007 		 */
2008 		for (dom = domains; dom; dom = dom->dom_next)
2009 			if (dom->dom_family == i && dom->dom_rtattach) {
2010 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
2011 					dom->dom_rtoffset);
2012 				break;
2013 			}
2014 		if ((rnh = nep->ne_rtable[i]) == 0) {
2015 			error = ENOBUFS;
2016 			goto out;
2017 		}
2018 	}
2019 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2020 		np->netc_rnodes);
2021 	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
2022 		if (rn == 0) {
2023 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2024 				smask, rnh);
2025 			if (enp == 0) {
2026 				error = EPERM;
2027 				goto out;
2028 			}
2029 		} else
2030 			enp = (struct netcred *)rn;
2031 
2032 		if (enp->netc_exflags != argp->ex_flags ||
2033 		    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
2034 		    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
2035 		    enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
2036 		    memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
2037 			enp->netc_anon.cr_ngroups))
2038 				error = EPERM;
2039 		else
2040 			error = 0;
2041 		goto out;
2042 	}
2043 	np->netc_exflags = argp->ex_flags;
2044 	np->netc_anon = argp->ex_anon;
2045 	np->netc_anon.cr_ref = 1;
2046 	return (0);
2047 out:
2048 	free(np, M_NETADDR);
2049 	return (error);
2050 }
2051 
2052 /* ARGSUSED */
2053 static int
2054 vfs_free_netcred(rn, w)
2055 	struct radix_node *rn;
2056 	void *w;
2057 {
2058 	struct radix_node_head *rnh = (struct radix_node_head *)w;
2059 
2060 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2061 	free((caddr_t)rn, M_NETADDR);
2062 	return (0);
2063 }
2064 
2065 /*
2066  * Free the net address hash lists that are hanging off the mount points.
2067  */
2068 static void
2069 vfs_free_addrlist(nep)
2070 	struct netexport *nep;
2071 {
2072 	int i;
2073 	struct radix_node_head *rnh;
2074 
2075 	for (i = 0; i <= AF_MAX; i++)
2076 		if ((rnh = nep->ne_rtable[i]) != NULL) {
2077 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
2078 			free((caddr_t)rnh, M_RTABLE);
2079 			nep->ne_rtable[i] = 0;
2080 		}
2081 }
2082 
2083 int
2084 vfs_export(mp, nep, argp)
2085 	struct mount *mp;
2086 	struct netexport *nep;
2087 	struct export_args *argp;
2088 {
2089 	int error;
2090 
2091 	if (argp->ex_flags & MNT_DELEXPORT) {
2092 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2093 			vfs_setpublicfs(NULL, NULL, NULL);
2094 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2095 		}
2096 		vfs_free_addrlist(nep);
2097 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2098 	}
2099 	if (argp->ex_flags & MNT_EXPORTED) {
2100 		if (argp->ex_flags & MNT_EXPUBLIC) {
2101 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2102 				return (error);
2103 			mp->mnt_flag |= MNT_EXPUBLIC;
2104 		}
2105 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
2106 			return (error);
2107 		mp->mnt_flag |= MNT_EXPORTED;
2108 	}
2109 	return (0);
2110 }
2111 
2112 /*
2113  * Set the publicly exported filesystem (WebNFS). Currently, only
2114  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2115  */
2116 int
2117 vfs_setpublicfs(mp, nep, argp)
2118 	struct mount *mp;
2119 	struct netexport *nep;
2120 	struct export_args *argp;
2121 {
2122 	int error;
2123 	struct vnode *rvp;
2124 	char *cp;
2125 
2126 	/*
2127 	 * mp == NULL -> invalidate the current info, the FS is
2128 	 * no longer exported. May be called from either vfs_export
2129 	 * or unmount, so check if it hasn't already been done.
2130 	 */
2131 	if (mp == NULL) {
2132 		if (nfs_pub.np_valid) {
2133 			nfs_pub.np_valid = 0;
2134 			if (nfs_pub.np_index != NULL) {
2135 				FREE(nfs_pub.np_index, M_TEMP);
2136 				nfs_pub.np_index = NULL;
2137 			}
2138 		}
2139 		return (0);
2140 	}
2141 
2142 	/*
2143 	 * Only one allowed at a time.
2144 	 */
2145 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2146 		return (EBUSY);
2147 
2148 	/*
2149 	 * Get real filehandle for root of exported FS.
2150 	 */
2151 	memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
2152 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2153 
2154 	if ((error = VFS_ROOT(mp, &rvp)))
2155 		return (error);
2156 
2157 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2158 		return (error);
2159 
2160 	vput(rvp);
2161 
2162 	/*
2163 	 * If an indexfile was specified, pull it in.
2164 	 */
2165 	if (argp->ex_indexfile != NULL) {
2166 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2167 		    M_WAITOK);
2168 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2169 		    MAXNAMLEN, (size_t *)0);
2170 		if (!error) {
2171 			/*
2172 			 * Check for illegal filenames.
2173 			 */
2174 			for (cp = nfs_pub.np_index; *cp; cp++) {
2175 				if (*cp == '/') {
2176 					error = EINVAL;
2177 					break;
2178 				}
2179 			}
2180 		}
2181 		if (error) {
2182 			FREE(nfs_pub.np_index, M_TEMP);
2183 			return (error);
2184 		}
2185 	}
2186 
2187 	nfs_pub.np_mount = mp;
2188 	nfs_pub.np_valid = 1;
2189 	return (0);
2190 }
2191 
2192 struct netcred *
2193 vfs_export_lookup(mp, nep, nam)
2194 	struct mount *mp;
2195 	struct netexport *nep;
2196 	struct mbuf *nam;
2197 {
2198 	struct netcred *np;
2199 	struct radix_node_head *rnh;
2200 	struct sockaddr *saddr;
2201 
2202 	np = NULL;
2203 	if (mp->mnt_flag & MNT_EXPORTED) {
2204 		/*
2205 		 * Lookup in the export list first.
2206 		 */
2207 		if (nam != NULL) {
2208 			saddr = mtod(nam, struct sockaddr *);
2209 			rnh = nep->ne_rtable[saddr->sa_family];
2210 			if (rnh != NULL) {
2211 				np = (struct netcred *)
2212 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2213 							      rnh);
2214 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2215 					np = NULL;
2216 			}
2217 		}
2218 		/*
2219 		 * If no address match, use the default if it exists.
2220 		 */
2221 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2222 			np = &nep->ne_defexported;
2223 	}
2224 	return (np);
2225 }
2226 
2227 /*
2228  * Do the usual access checking.
2229  * file_mode, uid and gid are from the vnode in question,
2230  * while acc_mode and cred are from the VOP_ACCESS parameter list
2231  */
2232 int
2233 vaccess(type, file_mode, uid, gid, acc_mode, cred)
2234 	enum vtype type;
2235 	mode_t file_mode;
2236 	uid_t uid;
2237 	gid_t gid;
2238 	mode_t acc_mode;
2239 	struct ucred *cred;
2240 {
2241 	mode_t mask;
2242 
2243 	/*
2244 	 * Super-user always gets read/write access, but execute access depends
2245 	 * on at least one execute bit being set.
2246 	 */
2247 	if (cred->cr_uid == 0) {
2248 		if ((acc_mode & VEXEC) && type != VDIR &&
2249 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2250 			return (EACCES);
2251 		return (0);
2252 	}
2253 
2254 	mask = 0;
2255 
2256 	/* Otherwise, check the owner. */
2257 	if (cred->cr_uid == uid) {
2258 		if (acc_mode & VEXEC)
2259 			mask |= S_IXUSR;
2260 		if (acc_mode & VREAD)
2261 			mask |= S_IRUSR;
2262 		if (acc_mode & VWRITE)
2263 			mask |= S_IWUSR;
2264 		return ((file_mode & mask) == mask ? 0 : EACCES);
2265 	}
2266 
2267 	/* Otherwise, check the groups. */
2268 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
2269 		if (acc_mode & VEXEC)
2270 			mask |= S_IXGRP;
2271 		if (acc_mode & VREAD)
2272 			mask |= S_IRGRP;
2273 		if (acc_mode & VWRITE)
2274 			mask |= S_IWGRP;
2275 		return ((file_mode & mask) == mask ? 0 : EACCES);
2276 	}
2277 
2278 	/* Otherwise, check everyone else. */
2279 	if (acc_mode & VEXEC)
2280 		mask |= S_IXOTH;
2281 	if (acc_mode & VREAD)
2282 		mask |= S_IROTH;
2283 	if (acc_mode & VWRITE)
2284 		mask |= S_IWOTH;
2285 	return ((file_mode & mask) == mask ? 0 : EACCES);
2286 }
2287 
2288 /*
2289  * Unmount all file systems.
2290  * We traverse the list in reverse order under the assumption that doing so
2291  * will avoid needing to worry about dependencies.
2292  */
2293 void
2294 vfs_unmountall()
2295 {
2296 	struct mount *mp, *nmp;
2297 	int allerror, error;
2298 	struct proc *p = curproc;	/* XXX */
2299 
2300 	/*
2301 	 * Unmounting a file system blocks the requesting process.
2302 	 * However, it's possible for this routine to be called when
2303 	 * curproc is NULL (e.g. panic situation, or via the debugger).
2304 	 * If we get stuck in this situation, just abort, since any
2305 	 * attempts to sleep will fault.
2306 	 */
2307 	if (p == NULL) {
2308 		printf("vfs_unmountall: no context, aborting\n");
2309 		return;
2310 	}
2311 
2312 	for (allerror = 0,
2313 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2314 		nmp = mp->mnt_list.cqe_prev;
2315 #ifdef DEBUG
2316 		printf("unmounting %s (%s)...\n",
2317 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2318 #endif
2319 		if (vfs_busy(mp, 0, 0))
2320 			continue;
2321 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2322 			printf("unmount of %s failed with error %d\n",
2323 			    mp->mnt_stat.f_mntonname, error);
2324 			allerror = 1;
2325 		}
2326 	}
2327 	if (allerror)
2328 		printf("WARNING: some file systems would not unmount\n");
2329 }
2330 
2331 /*
2332  * Sync and unmount file systems before shutting down.
2333  */
2334 void
2335 vfs_shutdown()
2336 {
2337 	struct buf *bp;
2338 	int iter, nbusy, dcount, s;
2339 
2340 	printf("syncing disks... ");
2341 
2342 	/* XXX Should suspend scheduling. */
2343 	(void) spl0();
2344 
2345 	sys_sync(&proc0, (void *)0, (register_t *)0);
2346 
2347 	/* Wait for sync to finish. */
2348 	dcount = 10000;
2349 	for (iter = 0; iter < 20; iter++) {
2350 		nbusy = 0;
2351 		for (bp = &buf[nbuf]; --bp >= buf; ) {
2352 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2353 				nbusy++;
2354 			/*
2355 			 * With soft updates, some buffers that are
2356 			 * written will be remarked as dirty until other
2357 			 * buffers are written.
2358 			 */
2359 			if (bp->b_vp && bp->b_vp->v_mount
2360 			    && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
2361 			    && (bp->b_flags & B_DELWRI)) {
2362 				s = splbio();
2363 				bremfree(bp);
2364 				bp->b_flags |= B_BUSY;
2365 				splx(s);
2366 				nbusy++;
2367 				bawrite(bp);
2368 				if (dcount-- <= 0) {
2369 					printf("softdep ");
2370 					goto fail;
2371 				}
2372 			}
2373 		}
2374 		if (nbusy == 0)
2375 			break;
2376 		printf("%d ", nbusy);
2377 		DELAY(40000 * iter);
2378 	}
2379 	if (nbusy) {
2380 fail:
2381 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
2382 		printf("giving up\nPrinting vnodes for busy buffers\n");
2383 		for (bp = &buf[nbuf]; --bp >= buf; )
2384 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2385 				vprint(NULL, bp->b_vp);
2386 
2387 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2388 		Debugger();
2389 #endif
2390 
2391 #else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
2392 		printf("giving up\n");
2393 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
2394 		return;
2395 	} else
2396 		printf("done\n");
2397 
2398 	/*
2399 	 * If we've panic'd, don't make the situation potentially
2400 	 * worse by unmounting the file systems.
2401 	 */
2402 	if (panicstr != NULL)
2403 		return;
2404 
2405 	/* Release inodes held by texts before update. */
2406 #ifdef notdef
2407 	vnshutdown();
2408 #endif
2409 	/* Unmount file systems. */
2410 	vfs_unmountall();
2411 }
2412 
2413 /*
2414  * Mount the root file system.  If the operator didn't specify a
2415  * file system to use, try all possible file systems until one
2416  * succeeds.
2417  */
2418 int
2419 vfs_mountroot()
2420 {
2421 	extern int (*mountroot) __P((void));
2422 	struct vfsops *v;
2423 
2424 	if (root_device == NULL)
2425 		panic("vfs_mountroot: root device unknown");
2426 
2427 	switch (root_device->dv_class) {
2428 	case DV_IFNET:
2429 		if (rootdev != NODEV)
2430 			panic("vfs_mountroot: rootdev set for DV_IFNET");
2431 		break;
2432 
2433 	case DV_DISK:
2434 		if (rootdev == NODEV)
2435 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2436 		break;
2437 
2438 	default:
2439 		printf("%s: inappropriate for root file system\n",
2440 		    root_device->dv_xname);
2441 		return (ENODEV);
2442 	}
2443 
2444 	/*
2445 	 * If user specified a file system, use it.
2446 	 */
2447 	if (mountroot != NULL)
2448 		return ((*mountroot)());
2449 
2450 	/*
2451 	 * Try each file system currently configured into the kernel.
2452 	 */
2453 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2454 		if (v->vfs_mountroot == NULL)
2455 			continue;
2456 #ifdef DEBUG
2457 		printf("mountroot: trying %s...\n", v->vfs_name);
2458 #endif
2459 		if ((*v->vfs_mountroot)() == 0) {
2460 			printf("root file system type: %s\n", v->vfs_name);
2461 			break;
2462 		}
2463 	}
2464 
2465 	if (v == NULL) {
2466 		printf("no file system for %s", root_device->dv_xname);
2467 		if (root_device->dv_class == DV_DISK)
2468 			printf(" (dev 0x%x)", rootdev);
2469 		printf("\n");
2470 		return (EFTYPE);
2471 	}
2472 	return (0);
2473 }
2474 
2475 /*
2476  * Given a file system name, look up the vfsops for that
2477  * file system, or return NULL if file system isn't present
2478  * in the kernel.
2479  */
2480 struct vfsops *
2481 vfs_getopsbyname(name)
2482 	const char *name;
2483 {
2484 	struct vfsops *v;
2485 
2486 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2487 		if (strcmp(v->vfs_name, name) == 0)
2488 			break;
2489 	}
2490 
2491 	return (v);
2492 }
2493 
2494 /*
2495  * Establish a file system and initialize it.
2496  */
2497 int
2498 vfs_attach(vfs)
2499 	struct vfsops *vfs;
2500 {
2501 	struct vfsops *v;
2502 	int error = 0;
2503 
2504 
2505 	/*
2506 	 * Make sure this file system doesn't already exist.
2507 	 */
2508 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2509 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2510 			error = EEXIST;
2511 			goto out;
2512 		}
2513 	}
2514 
2515 	/*
2516 	 * Initialize the vnode operations for this file system.
2517 	 */
2518 	vfs_opv_init(vfs->vfs_opv_descs);
2519 
2520 	/*
2521 	 * Now initialize the file system itself.
2522 	 */
2523 	(*vfs->vfs_init)();
2524 
2525 	/*
2526 	 * ...and link it into the kernel's list.
2527 	 */
2528 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2529 
2530 	/*
2531 	 * Sanity: make sure the reference count is 0.
2532 	 */
2533 	vfs->vfs_refcount = 0;
2534 
2535  out:
2536 	return (error);
2537 }
2538 
2539 /*
2540  * Remove a file system from the kernel.
2541  */
2542 int
2543 vfs_detach(vfs)
2544 	struct vfsops *vfs;
2545 {
2546 	struct vfsops *v;
2547 
2548 	/*
2549 	 * Make sure no one is using the filesystem.
2550 	 */
2551 	if (vfs->vfs_refcount != 0)
2552 		return (EBUSY);
2553 
2554 	/*
2555 	 * ...and remove it from the kernel's list.
2556 	 */
2557 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2558 		if (v == vfs) {
2559 			LIST_REMOVE(v, vfs_list);
2560 			break;
2561 		}
2562 	}
2563 
2564 	if (v == NULL)
2565 		return (ESRCH);
2566 
2567 	/*
2568 	 * Now run the file system-specific cleanups.
2569 	 */
2570 	(*vfs->vfs_done)();
2571 
2572 	/*
2573 	 * Free the vnode operations vector.
2574 	 */
2575 	vfs_opv_free(vfs->vfs_opv_descs);
2576 	return (0);
2577 }
2578 
2579 #ifdef DDB
2580 const char buf_flagbits[] =
2581 	"\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6CACHE\7CALL\10DELWRI"
2582 	"\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
2583 	"\21PAGET\22PGIN\23PHYS\24RAW\25READ\26TAPE\27UAREA\30WANTED"
2584 	"\31WRITEINPROG\32XXX\33VFLUSH";
2585 
2586 void
2587 vfs_buf_print(bp, full, pr)
2588 	struct buf *bp;
2589 	int full;
2590 	void (*pr) __P((const char *, ...));
2591 {
2592 	char buf[1024];
2593 
2594 	(*pr)("  vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
2595 		  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
2596 
2597 	bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
2598 	(*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
2599 
2600 	(*pr)("  bufsize 0x%x bcount 0x%x resid 0x%x\n",
2601 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
2602 	(*pr)("  data %p saveaddr %p\n",
2603 		  bp->b_data, bp->b_saveaddr);
2604 	(*pr)("  iodone %p\n", bp->b_iodone);
2605 
2606 	(*pr)("  dirtyoff 0x%x dirtyend 0x%x validoff 0x%x validend 0x%x\n",
2607 		  bp->b_dirtyoff, bp->b_dirtyend,
2608 		  bp->b_validoff, bp->b_validend);
2609 
2610 	(*pr)("  rcred %p wcred %p\n", bp->b_rcred, bp->b_wcred);
2611 }
2612 
2613 
2614 const char vnode_flagbits[] =
2615 	"\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED"
2616 	"\15DIROP\17DIRTY";
2617 
2618 const char *vnode_types[] = {
2619 	"VNON",
2620 	"VREG",
2621 	"VDIR",
2622 	"VBLK",
2623 	"VCHR",
2624 	"VLNK",
2625 	"VSOCK",
2626 	"VFIFO",
2627 	"VBAD",
2628 };
2629 
2630 const char *vnode_tags[] = {
2631 	"VT_NON",
2632 	"VT_UFS",
2633 	"VT_NFS",
2634 	"VT_MFS",
2635 	"VT_MSDOSFS",
2636 	"VT_LFS",
2637 	"VT_LOFS",
2638 	"VT_FDESC",
2639 	"VT_PORTAL",
2640 	"VT_NULL",
2641 	"VT_UMAP",
2642 	"VT_KERNFS",
2643 	"VT_PROCFS",
2644 	"VT_AFS",
2645 	"VT_ISOFS",
2646 	"VT_UNION",
2647 	"VT_ADOSFS",
2648 	"VT_EXT2FS",
2649 	"VT_CODA",
2650 	"VT_FILECORE",
2651 	"VT_NTFS",
2652 	"VT_VFS",
2653 	"VT_OVERLAY"
2654 };
2655 
2656 void
2657 vfs_vnode_print(vp, full, pr)
2658 	struct vnode *vp;
2659 	int full;
2660 	void (*pr) __P((const char *, ...));
2661 {
2662 	char buf[1024];
2663 
2664 	const char *vtype, *vtag;
2665 
2666 	uvm_object_printit(&vp->v_uvm.u_obj, full, pr);
2667 	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
2668 	(*pr)("\nVNODE flags %s\n", buf);
2669 	(*pr)("nio %d size 0x%x wlist %s\n",
2670 	      vp->v_uvm.u_nio, vp->v_uvm.u_size,
2671 	      vp->v_uvm.u_wlist.le_next ? "YES" : "NO");
2672 
2673 	(*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
2674 	      vp->v_data, vp->v_usecount, vp->v_writecount,
2675 	      vp->v_holdcnt, vp->v_numoutput);
2676 
2677 	vtype = (vp->v_type >= 0 &&
2678 		 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
2679 		vnode_types[vp->v_type] : "UNKNOWN";
2680 	vtag = (vp->v_tag >= 0 &&
2681 		vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
2682 		vnode_tags[vp->v_tag] : "UNKNOWN";
2683 
2684 	(*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n",
2685 	      vtype, vp->v_type, vtag, vp->v_tag,
2686 	      vp->v_id, vp->v_mount, vp->v_mountedhere);
2687 	(*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n",
2688 	      vp->v_lastr, vp->v_lastw, vp->v_lasta);
2689 	(*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n",
2690 	      vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra);
2691 
2692 	if (full) {
2693 		struct buf *bp;
2694 
2695 		(*pr)("clean bufs:\n");
2696 		for (bp = LIST_FIRST(&vp->v_cleanblkhd);
2697 		     bp != NULL;
2698 		     bp = LIST_NEXT(bp, b_vnbufs)) {
2699 			vfs_buf_print(bp, full, pr);
2700 		}
2701 
2702 		(*pr)("dirty bufs:\n");
2703 		for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
2704 		     bp != NULL;
2705 		     bp = LIST_NEXT(bp, b_vnbufs)) {
2706 			vfs_buf_print(bp, full, pr);
2707 		}
2708 	}
2709 }
2710 #endif
2711