xref: /netbsd-src/sys/kern/vfs_subr.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: vfs_subr.c,v 1.132 2000/07/04 15:33:32 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1989, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Redistribution and use in source and binary forms, with or without
50  * modification, are permitted provided that the following conditions
51  * are met:
52  * 1. Redistributions of source code must retain the above copyright
53  *    notice, this list of conditions and the following disclaimer.
54  * 2. Redistributions in binary form must reproduce the above copyright
55  *    notice, this list of conditions and the following disclaimer in the
56  *    documentation and/or other materials provided with the distribution.
57  * 3. All advertising materials mentioning features or use of this software
58  *    must display the following acknowledgement:
59  *	This product includes software developed by the University of
60  *	California, Berkeley and its contributors.
61  * 4. Neither the name of the University nor the names of its contributors
62  *    may be used to endorse or promote products derived from this software
63  *    without specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75  * SUCH DAMAGE.
76  *
77  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
78  */
79 
80 /*
81  * External virtual filesystem routines
82  */
83 
84 #include "opt_ddb.h"
85 #include "opt_compat_netbsd.h"
86 #include "opt_compat_43.h"
87 
88 #include <sys/param.h>
89 #include <sys/systm.h>
90 #include <sys/proc.h>
91 #include <sys/mount.h>
92 #include <sys/time.h>
93 #include <sys/fcntl.h>
94 #include <sys/vnode.h>
95 #include <sys/stat.h>
96 #include <sys/namei.h>
97 #include <sys/ucred.h>
98 #include <sys/buf.h>
99 #include <sys/errno.h>
100 #include <sys/malloc.h>
101 #include <sys/domain.h>
102 #include <sys/mbuf.h>
103 #include <sys/syscallargs.h>
104 #include <sys/device.h>
105 #include <sys/dirent.h>
106 
107 #include <miscfs/specfs/specdev.h>
108 #include <miscfs/genfs/genfs.h>
109 #include <miscfs/syncfs/syncfs.h>
110 
111 #include <uvm/uvm.h>
112 #include <uvm/uvm_ddb.h>
113 
114 #include <sys/sysctl.h>
115 
116 enum vtype iftovt_tab[16] = {
117 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
118 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
119 };
120 int	vttoif_tab[9] = {
121 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
122 	S_IFSOCK, S_IFIFO, S_IFMT,
123 };
124 
125 int doforce = 1;		/* 1 => permit forcible unmounting */
126 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
127 
128 extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
129 
130 /*
131  * Insq/Remq for the vnode usage lists.
132  */
133 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
134 #define	bufremvn(bp) {							\
135 	LIST_REMOVE(bp, b_vnbufs);					\
136 	(bp)->b_vnbufs.le_next = NOLIST;				\
137 }
138 /* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
139 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
140 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
141 
142 struct mntlist mountlist =			/* mounted filesystem list */
143     CIRCLEQ_HEAD_INITIALIZER(mountlist);
144 struct vfs_list_head vfs_list =			/* vfs list */
145     LIST_HEAD_INITIALIZER(vfs_list);
146 
147 struct nfs_public nfs_pub;			/* publicly exported FS */
148 
149 struct simplelock mountlist_slock;
150 static struct simplelock mntid_slock;
151 struct simplelock mntvnode_slock;
152 struct simplelock vnode_free_list_slock;
153 struct simplelock spechash_slock;
154 
155 /*
156  * These define the root filesystem and device.
157  */
158 struct mount *rootfs;
159 struct vnode *rootvnode;
160 struct device *root_device;			/* root device */
161 
162 struct pool vnode_pool;				/* memory pool for vnodes */
163 
164 /*
165  * Local declarations.
166  */
167 void insmntque __P((struct vnode *, struct mount *));
168 int getdevvp __P((dev_t, struct vnode **, enum vtype));
169 void vgoneall __P((struct vnode *));
170 
171 static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
172 				  struct export_args *));
173 static int vfs_free_netcred __P((struct radix_node *, void *));
174 static void vfs_free_addrlist __P((struct netexport *));
175 
176 #ifdef DEBUG
177 void printlockedvnodes __P((void));
178 #endif
179 
180 /*
181  * Initialize the vnode management data structures.
182  */
183 void
184 vntblinit()
185 {
186 
187 	simple_lock_init(&mntvnode_slock);
188 	simple_lock_init(&mntid_slock);
189 	simple_lock_init(&spechash_slock);
190 	simple_lock_init(&vnode_free_list_slock);
191 
192 	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
193 	    0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
194 
195 	/*
196 	 * Initialize the filesystem syncer.
197 	 */
198 	vn_initialize_syncerd();
199 }
200 
201 /*
202  * Mark a mount point as busy. Used to synchronize access and to delay
203  * unmounting. Interlock is not released on failure.
204  */
205 int
206 vfs_busy(mp, flags, interlkp)
207 	struct mount *mp;
208 	int flags;
209 	struct simplelock *interlkp;
210 {
211 	int lkflags;
212 
213 	while (mp->mnt_flag & MNT_UNMOUNT) {
214 		int gone;
215 
216 		if (flags & LK_NOWAIT)
217 			return (ENOENT);
218 		if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
219 		    && mp->mnt_unmounter == curproc)
220 			return (EDEADLK);
221 		if (interlkp)
222 			simple_unlock(interlkp);
223 		/*
224 		 * Since all busy locks are shared except the exclusive
225 		 * lock granted when unmounting, the only place that a
226 		 * wakeup needs to be done is at the release of the
227 		 * exclusive lock at the end of dounmount.
228 		 *
229 		 * XXX MP: add spinlock protecting mnt_wcnt here once you
230 		 * can atomically unlock-and-sleep.
231 		 */
232 		mp->mnt_wcnt++;
233 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
234 		mp->mnt_wcnt--;
235 		gone = mp->mnt_flag & MNT_GONE;
236 
237 		if (mp->mnt_wcnt == 0)
238 			wakeup(&mp->mnt_wcnt);
239 		if (interlkp)
240 			simple_lock(interlkp);
241 		if (gone)
242 			return (ENOENT);
243 	}
244 	lkflags = LK_SHARED;
245 	if (interlkp)
246 		lkflags |= LK_INTERLOCK;
247 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
248 		panic("vfs_busy: unexpected lock failure");
249 	return (0);
250 }
251 
252 /*
253  * Free a busy filesystem.
254  */
255 void
256 vfs_unbusy(mp)
257 	struct mount *mp;
258 {
259 
260 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
261 }
262 
263 /*
264  * Lookup a filesystem type, and if found allocate and initialize
265  * a mount structure for it.
266  *
267  * Devname is usually updated by mount(8) after booting.
268  */
269 int
270 vfs_rootmountalloc(fstypename, devname, mpp)
271 	char *fstypename;
272 	char *devname;
273 	struct mount **mpp;
274 {
275 	struct vfsops *vfsp = NULL;
276 	struct mount *mp;
277 
278 	for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL;
279 	     vfsp = LIST_NEXT(vfsp, vfs_list))
280 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
281 			break;
282 
283 	if (vfsp == NULL)
284 		return (ENODEV);
285 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
286 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
287 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
288 	(void)vfs_busy(mp, LK_NOWAIT, 0);
289 	LIST_INIT(&mp->mnt_vnodelist);
290 	mp->mnt_op = vfsp;
291 	mp->mnt_flag = MNT_RDONLY;
292 	mp->mnt_vnodecovered = NULLVP;
293 	vfsp->vfs_refcount++;
294 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
295 	mp->mnt_stat.f_mntonname[0] = '/';
296 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
297 	*mpp = mp;
298 	return (0);
299 }
300 
301 /*
302  * Lookup a mount point by filesystem identifier.
303  */
304 struct mount *
305 vfs_getvfs(fsid)
306 	fsid_t *fsid;
307 {
308 	struct mount *mp;
309 
310 	simple_lock(&mountlist_slock);
311 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
312 	     mp = mp->mnt_list.cqe_next) {
313 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
314 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
315 			simple_unlock(&mountlist_slock);
316 			return (mp);
317 		}
318 	}
319 	simple_unlock(&mountlist_slock);
320 	return ((struct mount *)0);
321 }
322 
323 /*
324  * Get a new unique fsid
325  */
326 void
327 vfs_getnewfsid(mp)
328 	struct mount *mp;
329 {
330 	static u_short xxxfs_mntid;
331 	fsid_t tfsid;
332 	int mtype;
333 
334 	simple_lock(&mntid_slock);
335 	mtype = makefstype(mp->mnt_op->vfs_name);
336 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
337 	mp->mnt_stat.f_fsid.val[1] = mtype;
338 	if (xxxfs_mntid == 0)
339 		++xxxfs_mntid;
340 	tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
341 	tfsid.val[1] = mtype;
342 	if (mountlist.cqh_first != (void *)&mountlist) {
343 		while (vfs_getvfs(&tfsid)) {
344 			tfsid.val[0]++;
345 			xxxfs_mntid++;
346 		}
347 	}
348 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
349 	simple_unlock(&mntid_slock);
350 }
351 
352 /*
353  * Make a 'unique' number from a mount type name.
354  */
355 long
356 makefstype(type)
357 	const char *type;
358 {
359 	long rv;
360 
361 	for (rv = 0; *type; type++) {
362 		rv <<= 2;
363 		rv ^= *type;
364 	}
365 	return rv;
366 }
367 
368 
369 /*
370  * Set vnode attributes to VNOVAL
371  */
372 void
373 vattr_null(vap)
374 	struct vattr *vap;
375 {
376 
377 	vap->va_type = VNON;
378 
379 	/*
380 	 * Assign individually so that it is safe even if size and
381 	 * sign of each member are varied.
382 	 */
383 	vap->va_mode = VNOVAL;
384 	vap->va_nlink = VNOVAL;
385 	vap->va_uid = VNOVAL;
386 	vap->va_gid = VNOVAL;
387 	vap->va_fsid = VNOVAL;
388 	vap->va_fileid = VNOVAL;
389 	vap->va_size = VNOVAL;
390 	vap->va_blocksize = VNOVAL;
391 	vap->va_atime.tv_sec =
392 	    vap->va_mtime.tv_sec =
393 	    vap->va_ctime.tv_sec = VNOVAL;
394 	vap->va_atime.tv_nsec =
395 	    vap->va_mtime.tv_nsec =
396 	    vap->va_ctime.tv_nsec = VNOVAL;
397 	vap->va_gen = VNOVAL;
398 	vap->va_flags = VNOVAL;
399 	vap->va_rdev = VNOVAL;
400 	vap->va_bytes = VNOVAL;
401 	vap->va_vaflags = 0;
402 }
403 
404 /*
405  * Routines having to do with the management of the vnode table.
406  */
407 extern int (**dead_vnodeop_p) __P((void *));
408 long numvnodes;
409 
410 /*
411  * Return the next vnode from the free list.
412  */
413 int
414 getnewvnode(tag, mp, vops, vpp)
415 	enum vtagtype tag;
416 	struct mount *mp;
417 	int (**vops) __P((void *));
418 	struct vnode **vpp;
419 {
420 	struct proc *p = curproc;	/* XXX */
421 	struct freelst *listhd;
422 	static int toggle;
423 	struct vnode *vp;
424 	int error = 0;
425 #ifdef DIAGNOSTIC
426 	int s;
427 #endif
428 	if (mp) {
429 		/*
430 		 * Mark filesystem busy while we're creating a vnode.
431 		 * If unmount is in progress, this will wait; if the
432 		 * unmount succeeds (only if umount -f), this will
433 		 * return an error.  If the unmount fails, we'll keep
434 		 * going afterwards.
435 		 * (This puts the per-mount vnode list logically under
436 		 * the protection of the vfs_busy lock).
437 		 */
438 		error = vfs_busy(mp, LK_RECURSEFAIL, 0);
439 		if (error && error != EDEADLK)
440 			return error;
441 	}
442 
443 	/*
444 	 * We must choose whether to allocate a new vnode or recycle an
445 	 * existing one. The criterion for allocating a new one is that
446 	 * the total number of vnodes is less than the number desired or
447 	 * there are no vnodes on either free list. Generally we only
448 	 * want to recycle vnodes that have no buffers associated with
449 	 * them, so we look first on the vnode_free_list. If it is empty,
450 	 * we next consider vnodes with referencing buffers on the
451 	 * vnode_hold_list. The toggle ensures that half the time we
452 	 * will use a buffer from the vnode_hold_list, and half the time
453 	 * we will allocate a new one unless the list has grown to twice
454 	 * the desired size. We are reticent to recycle vnodes from the
455 	 * vnode_hold_list because we will lose the identity of all its
456 	 * referencing buffers.
457 	 */
458 	toggle ^= 1;
459 	if (numvnodes > 2 * desiredvnodes)
460 		toggle = 0;
461 
462 	simple_lock(&vnode_free_list_slock);
463 	if (numvnodes < desiredvnodes ||
464 	    (TAILQ_FIRST(listhd = &vnode_free_list) == NULL &&
465 	    (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) {
466 		simple_unlock(&vnode_free_list_slock);
467 		vp = pool_get(&vnode_pool, PR_WAITOK);
468 		memset((char *)vp, 0, sizeof(*vp));
469 		simple_lock_init(&vp->v_interlock);
470 		numvnodes++;
471 	} else {
472 		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
473 		    vp = TAILQ_NEXT(vp, v_freelist)) {
474 			if (simple_lock_try(&vp->v_interlock)) {
475 				if ((vp->v_flag & VLAYER) == 0) {
476 					break;
477 				}
478 				if (VOP_ISLOCKED(vp) == 0)
479 					break;
480 				else
481 					simple_unlock(&vp->v_interlock);
482 			}
483 		}
484 		/*
485 		 * Unless this is a bad time of the month, at most
486 		 * the first NCPUS items on the free list are
487 		 * locked, so this is close enough to being empty.
488 		 */
489 		if (vp == NULLVP) {
490 			simple_unlock(&vnode_free_list_slock);
491 			if (mp && error != EDEADLK)
492 				vfs_unbusy(mp);
493 			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
494 			*vpp = 0;
495 			return (ENFILE);
496 		}
497 		if (vp->v_usecount)
498 			panic("free vnode isn't, vp %p", vp);
499 		TAILQ_REMOVE(listhd, vp, v_freelist);
500 		/* see comment on why 0xdeadb is set at end of vgone (below) */
501 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
502 		simple_unlock(&vnode_free_list_slock);
503 		vp->v_lease = NULL;
504 		if (vp->v_type != VBAD)
505 			vgonel(vp, p);
506 		else
507 			simple_unlock(&vp->v_interlock);
508 #ifdef DIAGNOSTIC
509 		if (vp->v_data)
510 			panic("cleaned vnode isn't, vp %p", vp);
511 		s = splbio();
512 		if (vp->v_numoutput)
513 			panic("clean vnode has pending I/O's, vp %p", vp);
514 		splx(s);
515 #endif
516 		vp->v_flag = 0;
517 		vp->v_lastr = 0;
518 		vp->v_ralen = 0;
519 		vp->v_maxra = 0;
520 		vp->v_lastw = 0;
521 		vp->v_lasta = 0;
522 		vp->v_cstart = 0;
523 		vp->v_clen = 0;
524 		vp->v_socket = 0;
525 	}
526 	vp->v_type = VNON;
527 	vp->v_vnlock = &vp->v_lock;
528 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
529 	cache_purge(vp);
530 	vp->v_tag = tag;
531 	vp->v_op = vops;
532 	insmntque(vp, mp);
533 	*vpp = vp;
534 	vp->v_usecount = 1;
535 	vp->v_data = 0;
536 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
537 	if (mp && error != EDEADLK)
538 		vfs_unbusy(mp);
539 	return (0);
540 }
541 
542 /*
543  * This is really just the reverse of getnewvnode(). Needed for
544  * VFS_VGET functions who may need to push back a vnode in case
545  * of a locking race.
546  */
547 void
548 ungetnewvnode(vp)
549 	struct vnode *vp;
550 {
551 #ifdef DIAGNOSTIC
552 	if (vp->v_usecount != 1)
553 		panic("ungetnewvnode: busy vnode");
554 #endif
555 	vp->v_usecount--;
556 	insmntque(vp, NULL);
557 	vp->v_type = VBAD;
558 
559 	simple_lock(&vp->v_interlock);
560 	/*
561 	 * Insert at head of LRU list
562 	 */
563 	simple_lock(&vnode_free_list_slock);
564 	if (vp->v_holdcnt > 0)
565 		TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
566 	else
567 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
568 	simple_unlock(&vnode_free_list_slock);
569 	simple_unlock(&vp->v_interlock);
570 }
571 
572 /*
573  * Move a vnode from one mount queue to another.
574  */
575 void
576 insmntque(vp, mp)
577 	struct vnode *vp;
578 	struct mount *mp;
579 {
580 
581 #ifdef DIAGNOSTIC
582 	if ((mp != NULL) &&
583 	    (mp->mnt_flag & MNT_UNMOUNT) &&
584 	    !(mp->mnt_flag & MNT_SOFTDEP) &&
585 	    vp->v_tag != VT_VFS) {
586 		panic("insmntque into dying filesystem");
587 	}
588 #endif
589 
590 	simple_lock(&mntvnode_slock);
591 	/*
592 	 * Delete from old mount point vnode list, if on one.
593 	 */
594 	if (vp->v_mount != NULL)
595 		LIST_REMOVE(vp, v_mntvnodes);
596 	/*
597 	 * Insert into list of vnodes for the new mount point, if available.
598 	 */
599 	if ((vp->v_mount = mp) != NULL)
600 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
601 	simple_unlock(&mntvnode_slock);
602 }
603 
604 /*
605  * Update outstanding I/O count and do wakeup if requested.
606  */
607 void
608 vwakeup(bp)
609 	struct buf *bp;
610 {
611 	struct vnode *vp;
612 
613 	bp->b_flags &= ~B_WRITEINPROG;
614 	if ((vp = bp->b_vp) != NULL) {
615 		if (--vp->v_numoutput < 0)
616 			panic("vwakeup: neg numoutput, vp %p", vp);
617 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
618 			vp->v_flag &= ~VBWAIT;
619 			wakeup((caddr_t)&vp->v_numoutput);
620 		}
621 	}
622 }
623 
624 /*
625  * Flush out and invalidate all buffers associated with a vnode.
626  * Called with the underlying vnode locked, which should prevent new dirty
627  * buffers from being queued.
628  */
629 int
630 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
631 	struct vnode *vp;
632 	int flags;
633 	struct ucred *cred;
634 	struct proc *p;
635 	int slpflag, slptimeo;
636 {
637 	struct buf *bp, *nbp;
638 	int s, error;
639 
640 	if (flags & V_SAVE) {
641 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p);
642 		if (error)
643 		        return (error);
644 #ifdef DIAGNOSTIC
645 		s = splbio();
646 		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
647 		        panic("vinvalbuf: dirty bufs, vp %p", vp);
648 		splx(s);
649 #endif
650 	}
651 
652 	s = splbio();
653 
654 restart:
655 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
656 		nbp = LIST_NEXT(bp, b_vnbufs);
657 		if (bp->b_flags & B_BUSY) {
658 			bp->b_flags |= B_WANTED;
659 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
660 			    "vinvalbuf", slptimeo);
661 			if (error) {
662 				splx(s);
663 				return (error);
664 			}
665 			goto restart;
666 		}
667 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
668 		brelse(bp);
669 	}
670 
671 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
672 		nbp = LIST_NEXT(bp, b_vnbufs);
673 		if (bp->b_flags & B_BUSY) {
674 			bp->b_flags |= B_WANTED;
675 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
676 			    "vinvalbuf", slptimeo);
677 			if (error) {
678 				splx(s);
679 				return (error);
680 			}
681 			goto restart;
682 		}
683 		/*
684 		 * XXX Since there are no node locks for NFS, I believe
685 		 * there is a slight chance that a delayed write will
686 		 * occur while sleeping just above, so check for it.
687 		 */
688 		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
689 #ifdef DEBUG
690 			printf("buffer still DELWRI\n");
691 #endif
692 			bp->b_flags |= B_BUSY | B_VFLUSH;
693 			VOP_BWRITE(bp);
694 			goto restart;
695 		}
696 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
697 		brelse(bp);
698 	}
699 
700 #ifdef DIAGNOSTIC
701 	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
702 		panic("vinvalbuf: flush failed, vp %p", vp);
703 #endif
704 
705 	splx(s);
706 
707 	return (0);
708 }
709 
710 /*
711  * Destroy any in core blocks past the truncation length.
712  * Called with the underlying vnode locked, which should prevent new dirty
713  * buffers from being queued.
714  */
715 int
716 vtruncbuf(vp, lbn, slpflag, slptimeo)
717 	struct vnode *vp;
718 	daddr_t lbn;
719 	int slpflag, slptimeo;
720 {
721 	struct buf *bp, *nbp;
722 	int s, error;
723 
724 	s = splbio();
725 
726 restart:
727 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
728 		nbp = LIST_NEXT(bp, b_vnbufs);
729 		if (bp->b_lblkno < lbn)
730 			continue;
731 		if (bp->b_flags & B_BUSY) {
732 			bp->b_flags |= B_WANTED;
733 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
734 			    "vtruncbuf", slptimeo);
735 			if (error) {
736 				splx(s);
737 				return (error);
738 			}
739 			goto restart;
740 		}
741 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
742 		brelse(bp);
743 	}
744 
745 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
746 		nbp = LIST_NEXT(bp, b_vnbufs);
747 		if (bp->b_lblkno < lbn)
748 			continue;
749 		if (bp->b_flags & B_BUSY) {
750 			bp->b_flags |= B_WANTED;
751 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
752 			    "vtruncbuf", slptimeo);
753 			if (error) {
754 				splx(s);
755 				return (error);
756 			}
757 			goto restart;
758 		}
759 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
760 		brelse(bp);
761 	}
762 
763 	splx(s);
764 
765 	return (0);
766 }
767 
768 void
769 vflushbuf(vp, sync)
770 	struct vnode *vp;
771 	int sync;
772 {
773 	struct buf *bp, *nbp;
774 	int s;
775 
776 loop:
777 	s = splbio();
778 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
779 		nbp = LIST_NEXT(bp, b_vnbufs);
780 		if ((bp->b_flags & B_BUSY))
781 			continue;
782 		if ((bp->b_flags & B_DELWRI) == 0)
783 			panic("vflushbuf: not dirty, bp %p", bp);
784 		bp->b_flags |= B_BUSY | B_VFLUSH;
785 		splx(s);
786 		/*
787 		 * Wait for I/O associated with indirect blocks to complete,
788 		 * since there is no way to quickly wait for them below.
789 		 */
790 		if (bp->b_vp == vp || sync == 0)
791 			(void) bawrite(bp);
792 		else
793 			(void) bwrite(bp);
794 		goto loop;
795 	}
796 	if (sync == 0) {
797 		splx(s);
798 		return;
799 	}
800 	while (vp->v_numoutput) {
801 		vp->v_flag |= VBWAIT;
802 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
803 	}
804 	splx(s);
805 	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
806 		vprint("vflushbuf: dirty", vp);
807 		goto loop;
808 	}
809 }
810 
811 /*
812  * Associate a buffer with a vnode.
813  */
814 void
815 bgetvp(vp, bp)
816 	struct vnode *vp;
817 	struct buf *bp;
818 {
819 	int s;
820 
821 	if (bp->b_vp)
822 		panic("bgetvp: not free, bp %p", bp);
823 	VHOLD(vp);
824 	s = splbio();
825 	bp->b_vp = vp;
826 	if (vp->v_type == VBLK || vp->v_type == VCHR)
827 		bp->b_dev = vp->v_rdev;
828 	else
829 		bp->b_dev = NODEV;
830 	/*
831 	 * Insert onto list for new vnode.
832 	 */
833 	bufinsvn(bp, &vp->v_cleanblkhd);
834 	splx(s);
835 }
836 
837 /*
838  * Disassociate a buffer from a vnode.
839  */
840 void
841 brelvp(bp)
842 	struct buf *bp;
843 {
844 	struct vnode *vp;
845 	int s;
846 
847 	if (bp->b_vp == NULL)
848 		panic("brelvp: vp NULL, bp %p", bp);
849 
850 	s = splbio();
851 	vp = bp->b_vp;
852 	/*
853 	 * Delete from old vnode list, if on one.
854 	 */
855 	if (bp->b_vnbufs.le_next != NOLIST)
856 		bufremvn(bp);
857 	if ((vp->v_flag & VONWORKLST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
858 		vp->v_flag &= ~VONWORKLST;
859 		LIST_REMOVE(vp, v_synclist);
860 	}
861 	bp->b_vp = (struct vnode *) 0;
862 	HOLDRELE(vp);
863 	splx(s);
864 }
865 
866 /*
867  * Reassign a buffer from one vnode to another.
868  * Used to assign file specific control information
869  * (indirect blocks) to the vnode to which they belong.
870  *
871  * This function must be called at splbio().
872  */
873 void
874 reassignbuf(bp, newvp)
875 	struct buf *bp;
876 	struct vnode *newvp;
877 {
878 	struct buflists *listheadp;
879 	int delay;
880 
881 	if (newvp == NULL) {
882 		printf("reassignbuf: NULL");
883 		return;
884 	}
885 
886 	/*
887 	 * Delete from old vnode list, if on one.
888 	 */
889 	if (bp->b_vnbufs.le_next != NOLIST)
890 		bufremvn(bp);
891 	/*
892 	 * If dirty, put on list of dirty buffers;
893 	 * otherwise insert onto list of clean buffers.
894 	 */
895 	if ((bp->b_flags & B_DELWRI) == 0) {
896 		listheadp = &newvp->v_cleanblkhd;
897 		if ((newvp->v_flag & VONWORKLST) &&
898 		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
899 			newvp->v_flag &= ~VONWORKLST;
900 			LIST_REMOVE(newvp, v_synclist);
901 		}
902 	} else {
903 		listheadp = &newvp->v_dirtyblkhd;
904 		if ((newvp->v_flag & VONWORKLST) == 0) {
905 			switch (newvp->v_type) {
906 			case VDIR:
907 				delay = dirdelay;
908 				break;
909 			case VBLK:
910 				if (newvp->v_specmountpoint != NULL) {
911 					delay = metadelay;
912 					break;
913 				}
914 				/* fall through */
915 			default:
916 				delay = filedelay;
917 				break;
918 			}
919 			if (!newvp->v_mount ||
920 			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
921 				vn_syncer_add_to_worklist(newvp, delay);
922 		}
923 	}
924 	bufinsvn(bp, listheadp);
925 }
926 
927 /*
928  * Create a vnode for a block device.
929  * Used for root filesystem and swap areas.
930  * Also used for memory file system special devices.
931  */
932 int
933 bdevvp(dev, vpp)
934 	dev_t dev;
935 	struct vnode **vpp;
936 {
937 
938 	return (getdevvp(dev, vpp, VBLK));
939 }
940 
941 /*
942  * Create a vnode for a character device.
943  * Used for kernfs and some console handling.
944  */
945 int
946 cdevvp(dev, vpp)
947 	dev_t dev;
948 	struct vnode **vpp;
949 {
950 
951 	return (getdevvp(dev, vpp, VCHR));
952 }
953 
954 /*
955  * Create a vnode for a device.
956  * Used by bdevvp (block device) for root file system etc.,
957  * and by cdevvp (character device) for console and kernfs.
958  */
959 int
960 getdevvp(dev, vpp, type)
961 	dev_t dev;
962 	struct vnode **vpp;
963 	enum vtype type;
964 {
965 	struct vnode *vp;
966 	struct vnode *nvp;
967 	int error;
968 
969 	if (dev == NODEV) {
970 		*vpp = NULLVP;
971 		return (0);
972 	}
973 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
974 	if (error) {
975 		*vpp = NULLVP;
976 		return (error);
977 	}
978 	vp = nvp;
979 	vp->v_type = type;
980 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
981 		vput(vp);
982 		vp = nvp;
983 	}
984 	*vpp = vp;
985 	return (0);
986 }
987 
988 /*
989  * Check to see if the new vnode represents a special device
990  * for which we already have a vnode (either because of
991  * bdevvp() or because of a different vnode representing
992  * the same block device). If such an alias exists, deallocate
993  * the existing contents and return the aliased vnode. The
994  * caller is responsible for filling it with its new contents.
995  */
996 struct vnode *
997 checkalias(nvp, nvp_rdev, mp)
998 	struct vnode *nvp;
999 	dev_t nvp_rdev;
1000 	struct mount *mp;
1001 {
1002 	struct proc *p = curproc;       /* XXX */
1003 	struct vnode *vp;
1004 	struct vnode **vpp;
1005 
1006 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1007 		return (NULLVP);
1008 
1009 	vpp = &speclisth[SPECHASH(nvp_rdev)];
1010 loop:
1011 	simple_lock(&spechash_slock);
1012 	for (vp = *vpp; vp; vp = vp->v_specnext) {
1013 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1014 			continue;
1015 		/*
1016 		 * Alias, but not in use, so flush it out.
1017 		 */
1018 		simple_lock(&vp->v_interlock);
1019 		if (vp->v_usecount == 0) {
1020 			simple_unlock(&spechash_slock);
1021 			vgonel(vp, p);
1022 			goto loop;
1023 		}
1024 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
1025 			simple_unlock(&spechash_slock);
1026 			goto loop;
1027 		}
1028 		break;
1029 	}
1030 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1031 		MALLOC(nvp->v_specinfo, struct specinfo *,
1032 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
1033 		nvp->v_rdev = nvp_rdev;
1034 		nvp->v_hashchain = vpp;
1035 		nvp->v_specnext = *vpp;
1036 		nvp->v_specmountpoint = NULL;
1037 		simple_unlock(&spechash_slock);
1038 		nvp->v_speclockf = NULL;
1039 		*vpp = nvp;
1040 		if (vp != NULLVP) {
1041 			nvp->v_flag |= VALIASED;
1042 			vp->v_flag |= VALIASED;
1043 			vput(vp);
1044 		}
1045 		return (NULLVP);
1046 	}
1047 	simple_unlock(&spechash_slock);
1048 	VOP_UNLOCK(vp, 0);
1049 	simple_lock(&vp->v_interlock);
1050 	vclean(vp, 0, p);
1051 	vp->v_op = nvp->v_op;
1052 	vp->v_tag = nvp->v_tag;
1053 	vp->v_vnlock = &vp->v_lock;
1054 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1055 	nvp->v_type = VNON;
1056 	insmntque(vp, mp);
1057 	return (vp);
1058 }
1059 
1060 /*
1061  * Grab a particular vnode from the free list, increment its
1062  * reference count and lock it. If the vnode lock bit is set the
1063  * vnode is being eliminated in vgone. In that case, we can not
1064  * grab the vnode, so the process is awakened when the transition is
1065  * completed, and an error returned to indicate that the vnode is no
1066  * longer usable (possibly having been changed to a new file system type).
1067  */
1068 int
1069 vget(vp, flags)
1070 	struct vnode *vp;
1071 	int flags;
1072 {
1073 	int error;
1074 
1075 	/*
1076 	 * If the vnode is in the process of being cleaned out for
1077 	 * another use, we wait for the cleaning to finish and then
1078 	 * return failure. Cleaning is determined by checking that
1079 	 * the VXLOCK flag is set.
1080 	 */
1081 	if ((flags & LK_INTERLOCK) == 0)
1082 		simple_lock(&vp->v_interlock);
1083 	if (vp->v_flag & VXLOCK) {
1084 		vp->v_flag |= VXWANT;
1085 		simple_unlock(&vp->v_interlock);
1086 		tsleep((caddr_t)vp, PINOD, "vget", 0);
1087 		return (ENOENT);
1088 	}
1089 	if (vp->v_usecount == 0) {
1090 		simple_lock(&vnode_free_list_slock);
1091 		if (vp->v_holdcnt > 0)
1092 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1093 		else
1094 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1095 		simple_unlock(&vnode_free_list_slock);
1096 	}
1097 	vp->v_usecount++;
1098 #ifdef DIAGNOSTIC
1099 	if (vp->v_usecount == 0) {
1100 		vprint("vget", vp);
1101 		panic("vget: usecount overflow, vp %p", vp);
1102 	}
1103 #endif
1104 	if (flags & LK_TYPE_MASK) {
1105 		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1106 			/*
1107 			 * must expand vrele here because we do not want
1108 			 * to call VOP_INACTIVE if the reference count
1109 			 * drops back to zero since it was never really
1110 			 * active. We must remove it from the free list
1111 			 * before sleeping so that multiple processes do
1112 			 * not try to recycle it.
1113 			 */
1114 			simple_lock(&vp->v_interlock);
1115 			vp->v_usecount--;
1116 			if (vp->v_usecount > 0) {
1117 				simple_unlock(&vp->v_interlock);
1118 				return (error);
1119 			}
1120 			/*
1121 			 * insert at tail of LRU list
1122 			 */
1123 			simple_lock(&vnode_free_list_slock);
1124 			if (vp->v_holdcnt > 0)
1125 				TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1126 				    v_freelist);
1127 			else
1128 				TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1129 				    v_freelist);
1130 			simple_unlock(&vnode_free_list_slock);
1131 			simple_unlock(&vp->v_interlock);
1132 		}
1133 		return (error);
1134 	}
1135 	simple_unlock(&vp->v_interlock);
1136 	return (0);
1137 }
1138 
1139 /*
1140  * vput(), just unlock and vrele()
1141  */
1142 void
1143 vput(vp)
1144 	struct vnode *vp;
1145 {
1146 	struct proc *p = curproc;	/* XXX */
1147 
1148 #ifdef DIAGNOSTIC
1149 	if (vp == NULL)
1150 		panic("vput: null vp");
1151 #endif
1152 	simple_lock(&vp->v_interlock);
1153 	vp->v_usecount--;
1154 	if (vp->v_usecount > 0) {
1155 		simple_unlock(&vp->v_interlock);
1156 		VOP_UNLOCK(vp, 0);
1157 		return;
1158 	}
1159 #ifdef DIAGNOSTIC
1160 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1161 		vprint("vput: bad ref count", vp);
1162 		panic("vput: ref cnt");
1163 	}
1164 #endif
1165 	/*
1166 	 * Insert at tail of LRU list.
1167 	 */
1168 	simple_lock(&vnode_free_list_slock);
1169 	if (vp->v_holdcnt > 0)
1170 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1171 	else
1172 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1173 	simple_unlock(&vnode_free_list_slock);
1174 	simple_unlock(&vp->v_interlock);
1175 	VOP_INACTIVE(vp, p);
1176 }
1177 
1178 /*
1179  * Vnode release.
1180  * If count drops to zero, call inactive routine and return to freelist.
1181  */
1182 void
1183 vrele(vp)
1184 	struct vnode *vp;
1185 {
1186 	struct proc *p = curproc;	/* XXX */
1187 
1188 #ifdef DIAGNOSTIC
1189 	if (vp == NULL)
1190 		panic("vrele: null vp");
1191 #endif
1192 	simple_lock(&vp->v_interlock);
1193 	vp->v_usecount--;
1194 	if (vp->v_usecount > 0) {
1195 		simple_unlock(&vp->v_interlock);
1196 		return;
1197 	}
1198 #ifdef DIAGNOSTIC
1199 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1200 		vprint("vrele: bad ref count", vp);
1201 		panic("vrele: ref cnt");
1202 	}
1203 #endif
1204 	/*
1205 	 * Insert at tail of LRU list.
1206 	 */
1207 	simple_lock(&vnode_free_list_slock);
1208 	if (vp->v_holdcnt > 0)
1209 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1210 	else
1211 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1212 	simple_unlock(&vnode_free_list_slock);
1213 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1214 		VOP_INACTIVE(vp, p);
1215 }
1216 
1217 #ifdef DIAGNOSTIC
1218 /*
1219  * Page or buffer structure gets a reference.
1220  */
1221 void
1222 vhold(vp)
1223 	struct vnode *vp;
1224 {
1225 
1226 	/*
1227 	 * If it is on the freelist and the hold count is currently
1228 	 * zero, move it to the hold list. The test of the back
1229 	 * pointer and the use reference count of zero is because
1230 	 * it will be removed from a free list by getnewvnode,
1231 	 * but will not have its reference count incremented until
1232 	 * after calling vgone. If the reference count were
1233 	 * incremented first, vgone would (incorrectly) try to
1234 	 * close the previous instance of the underlying object.
1235 	 * So, the back pointer is explicitly set to `0xdeadb' in
1236 	 * getnewvnode after removing it from a freelist to ensure
1237 	 * that we do not try to move it here.
1238 	 */
1239   	simple_lock(&vp->v_interlock);
1240 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1241 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1242 		simple_lock(&vnode_free_list_slock);
1243 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1244 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1245 		simple_unlock(&vnode_free_list_slock);
1246 	}
1247 	vp->v_holdcnt++;
1248 	simple_unlock(&vp->v_interlock);
1249 }
1250 
1251 /*
1252  * Page or buffer structure frees a reference.
1253  */
1254 void
1255 holdrele(vp)
1256 	struct vnode *vp;
1257 {
1258 
1259 	simple_lock(&vp->v_interlock);
1260 	if (vp->v_holdcnt <= 0)
1261 		panic("holdrele: holdcnt vp %p", vp);
1262 	vp->v_holdcnt--;
1263 	/*
1264 	 * If it is on the holdlist and the hold count drops to
1265 	 * zero, move it to the free list. The test of the back
1266 	 * pointer and the use reference count of zero is because
1267 	 * it will be removed from a free list by getnewvnode,
1268 	 * but will not have its reference count incremented until
1269 	 * after calling vgone. If the reference count were
1270 	 * incremented first, vgone would (incorrectly) try to
1271 	 * close the previous instance of the underlying object.
1272 	 * So, the back pointer is explicitly set to `0xdeadb' in
1273 	 * getnewvnode after removing it from a freelist to ensure
1274 	 * that we do not try to move it here.
1275 	 */
1276 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1277 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1278 		simple_lock(&vnode_free_list_slock);
1279 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1280 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1281 		simple_unlock(&vnode_free_list_slock);
1282 	}
1283 	simple_unlock(&vp->v_interlock);
1284 }
1285 
1286 /*
1287  * Vnode reference.
1288  */
1289 void
1290 vref(vp)
1291 	struct vnode *vp;
1292 {
1293 
1294 	simple_lock(&vp->v_interlock);
1295 	if (vp->v_usecount <= 0)
1296 		panic("vref used where vget required, vp %p", vp);
1297 	vp->v_usecount++;
1298 #ifdef DIAGNOSTIC
1299 	if (vp->v_usecount == 0) {
1300 		vprint("vref", vp);
1301 		panic("vref: usecount overflow, vp %p", vp);
1302 	}
1303 #endif
1304 	simple_unlock(&vp->v_interlock);
1305 }
1306 #endif /* DIAGNOSTIC */
1307 
1308 /*
1309  * Remove any vnodes in the vnode table belonging to mount point mp.
1310  *
1311  * If MNT_NOFORCE is specified, there should not be any active ones,
1312  * return error if any are found (nb: this is a user error, not a
1313  * system error). If MNT_FORCE is specified, detach any active vnodes
1314  * that are found.
1315  */
1316 #ifdef DEBUG
1317 int busyprt = 0;	/* print out busy vnodes */
1318 struct ctldebug debug1 = { "busyprt", &busyprt };
1319 #endif
1320 
1321 int
1322 vflush(mp, skipvp, flags)
1323 	struct mount *mp;
1324 	struct vnode *skipvp;
1325 	int flags;
1326 {
1327 	struct proc *p = curproc;	/* XXX */
1328 	struct vnode *vp, *nvp;
1329 	int busy = 0;
1330 
1331 	simple_lock(&mntvnode_slock);
1332 loop:
1333 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1334 		if (vp->v_mount != mp)
1335 			goto loop;
1336 		nvp = vp->v_mntvnodes.le_next;
1337 		/*
1338 		 * Skip over a selected vnode.
1339 		 */
1340 		if (vp == skipvp)
1341 			continue;
1342 		simple_lock(&vp->v_interlock);
1343 		/*
1344 		 * Skip over a vnodes marked VSYSTEM.
1345 		 */
1346 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1347 			simple_unlock(&vp->v_interlock);
1348 			continue;
1349 		}
1350 		/*
1351 		 * If WRITECLOSE is set, only flush out regular file
1352 		 * vnodes open for writing.
1353 		 */
1354 		if ((flags & WRITECLOSE) &&
1355 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1356 			simple_unlock(&vp->v_interlock);
1357 			continue;
1358 		}
1359 		/*
1360 		 * With v_usecount == 0, all we need to do is clear
1361 		 * out the vnode data structures and we are done.
1362 		 */
1363 		if (vp->v_usecount == 0) {
1364 			simple_unlock(&mntvnode_slock);
1365 			vgonel(vp, p);
1366 			simple_lock(&mntvnode_slock);
1367 			continue;
1368 		}
1369 		/*
1370 		 * If FORCECLOSE is set, forcibly close the vnode.
1371 		 * For block or character devices, revert to an
1372 		 * anonymous device. For all other files, just kill them.
1373 		 */
1374 		if (flags & FORCECLOSE) {
1375 			simple_unlock(&mntvnode_slock);
1376 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1377 				vgonel(vp, p);
1378 			} else {
1379 				vclean(vp, 0, p);
1380 				vp->v_op = spec_vnodeop_p;
1381 				insmntque(vp, (struct mount *)0);
1382 			}
1383 			simple_lock(&mntvnode_slock);
1384 			continue;
1385 		}
1386 #ifdef DEBUG
1387 		if (busyprt)
1388 			vprint("vflush: busy vnode", vp);
1389 #endif
1390 		simple_unlock(&vp->v_interlock);
1391 		busy++;
1392 	}
1393 	simple_unlock(&mntvnode_slock);
1394 	if (busy)
1395 		return (EBUSY);
1396 	return (0);
1397 }
1398 
1399 /*
1400  * Disassociate the underlying file system from a vnode.
1401  */
1402 void
1403 vclean(vp, flags, p)
1404 	struct vnode *vp;
1405 	int flags;
1406 	struct proc *p;
1407 {
1408 	int active;
1409 
1410 	/*
1411 	 * Check to see if the vnode is in use.
1412 	 * If so we have to reference it before we clean it out
1413 	 * so that its count cannot fall to zero and generate a
1414 	 * race against ourselves to recycle it.
1415 	 */
1416 	if ((active = vp->v_usecount) != 0) {
1417 		/* We have the vnode interlock. */
1418 		vp->v_usecount++;
1419 #ifdef DIAGNOSTIC
1420 		if (vp->v_usecount == 0) {
1421 			vprint("vclean", vp);
1422 			panic("vclean: usecount overflow");
1423 		}
1424 #endif
1425 	}
1426 
1427 	/*
1428 	 * Prevent the vnode from being recycled or
1429 	 * brought into use while we clean it out.
1430 	 */
1431 	if (vp->v_flag & VXLOCK)
1432 		panic("vclean: deadlock, vp %p", vp);
1433 	vp->v_flag |= VXLOCK;
1434 	/*
1435 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1436 	 * have the object locked while it cleans it out. The VOP_LOCK
1437 	 * ensures that the VOP_INACTIVE routine is done with its work.
1438 	 * For active vnodes, it ensures that no other activity can
1439 	 * occur while the underlying object is being cleaned out.
1440 	 */
1441 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1442 
1443 	/*
1444 	 * clean out any VM data associated with the vnode.
1445 	 */
1446 	uvm_vnp_terminate(vp);
1447 	/*
1448 	 * Clean out any buffers associated with the vnode.
1449 	 */
1450 	if (flags & DOCLOSE)
1451 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1452 
1453 	/*
1454 	 * If purging an active vnode, it must be closed and
1455 	 * deactivated before being reclaimed. Note that the
1456 	 * VOP_INACTIVE will unlock the vnode.
1457 	 */
1458 	if (active) {
1459 		if (flags & DOCLOSE)
1460 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1461 		VOP_INACTIVE(vp, p);
1462 	} else {
1463 		/*
1464 		 * Any other processes trying to obtain this lock must first
1465 		 * wait for VXLOCK to clear, then call the new lock operation.
1466 		 */
1467 		VOP_UNLOCK(vp, 0);
1468 	}
1469 	/*
1470 	 * Reclaim the vnode.
1471 	 */
1472 	if (VOP_RECLAIM(vp, p))
1473 		panic("vclean: cannot reclaim, vp %p", vp);
1474 
1475 	if (active) {
1476 		/*
1477 		 * Inline copy of vrele() since VOP_INACTIVE
1478 		 * has already been called.
1479 		 */
1480 		simple_lock(&vp->v_interlock);
1481 		if (--vp->v_usecount <= 0) {
1482 #ifdef DIAGNOSTIC
1483 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1484 				vprint("vclean: bad ref count", vp);
1485 				panic("vclean: ref cnt");
1486 			}
1487 #endif
1488 			/*
1489 			 * Insert at tail of LRU list.
1490 			 */
1491 			simple_unlock(&vp->v_interlock);
1492 			simple_lock(&vnode_free_list_slock);
1493 #ifdef DIAGNOSTIC
1494 			if (vp->v_vnlock) {
1495 				if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1496 					vprint("vclean: lock not drained", vp);
1497 			}
1498 			if (vp->v_holdcnt > 0)
1499 				panic("vclean: not clean, vp %p", vp);
1500 #endif
1501 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1502 			simple_unlock(&vnode_free_list_slock);
1503 		} else
1504 			simple_unlock(&vp->v_interlock);
1505 	}
1506 
1507 	cache_purge(vp);
1508 
1509 	/*
1510 	 * Done with purge, notify sleepers of the grim news.
1511 	 */
1512 	vp->v_op = dead_vnodeop_p;
1513 	vp->v_tag = VT_NON;
1514 	vp->v_flag &= ~VXLOCK;
1515 	if (vp->v_flag & VXWANT) {
1516 		vp->v_flag &= ~VXWANT;
1517 		wakeup((caddr_t)vp);
1518 	}
1519 }
1520 
1521 /*
1522  * Recycle an unused vnode to the front of the free list.
1523  * Release the passed interlock if the vnode will be recycled.
1524  */
1525 int
1526 vrecycle(vp, inter_lkp, p)
1527 	struct vnode *vp;
1528 	struct simplelock *inter_lkp;
1529 	struct proc *p;
1530 {
1531 
1532 	simple_lock(&vp->v_interlock);
1533 	if (vp->v_usecount == 0) {
1534 		if (inter_lkp)
1535 			simple_unlock(inter_lkp);
1536 		vgonel(vp, p);
1537 		return (1);
1538 	}
1539 	simple_unlock(&vp->v_interlock);
1540 	return (0);
1541 }
1542 
1543 /*
1544  * Eliminate all activity associated with a vnode
1545  * in preparation for reuse.
1546  */
1547 void
1548 vgone(vp)
1549 	struct vnode *vp;
1550 {
1551 	struct proc *p = curproc;	/* XXX */
1552 
1553 	simple_lock(&vp->v_interlock);
1554 	vgonel(vp, p);
1555 }
1556 
1557 /*
1558  * vgone, with the vp interlock held.
1559  */
1560 void
1561 vgonel(vp, p)
1562 	struct vnode *vp;
1563 	struct proc *p;
1564 {
1565 	struct vnode *vq;
1566 	struct vnode *vx;
1567 
1568 	/*
1569 	 * If a vgone (or vclean) is already in progress,
1570 	 * wait until it is done and return.
1571 	 */
1572 	if (vp->v_flag & VXLOCK) {
1573 		vp->v_flag |= VXWANT;
1574 		simple_unlock(&vp->v_interlock);
1575 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1576 		return;
1577 	}
1578 	/*
1579 	 * Clean out the filesystem specific data.
1580 	 */
1581 	vclean(vp, DOCLOSE, p);
1582 	/*
1583 	 * Delete from old mount point vnode list, if on one.
1584 	 */
1585 	if (vp->v_mount != NULL)
1586 		insmntque(vp, (struct mount *)0);
1587 	/*
1588 	 * If special device, remove it from special device alias list.
1589 	 * if it is on one.
1590 	 */
1591 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1592 		simple_lock(&spechash_slock);
1593 		if (vp->v_hashchain != NULL) {
1594 			if (*vp->v_hashchain == vp) {
1595 				*vp->v_hashchain = vp->v_specnext;
1596 			} else {
1597 				for (vq = *vp->v_hashchain; vq;
1598 							vq = vq->v_specnext) {
1599 					if (vq->v_specnext != vp)
1600 						continue;
1601 					vq->v_specnext = vp->v_specnext;
1602 					break;
1603 				}
1604 				if (vq == NULL)
1605 					panic("missing bdev");
1606 			}
1607 			if (vp->v_flag & VALIASED) {
1608 				vx = NULL;
1609 				for (vq = *vp->v_hashchain; vq;
1610 							vq = vq->v_specnext) {
1611 					if (vq->v_rdev != vp->v_rdev ||
1612 					    vq->v_type != vp->v_type)
1613 						continue;
1614 					if (vx)
1615 						break;
1616 					vx = vq;
1617 				}
1618 				if (vx == NULL)
1619 					panic("missing alias");
1620 				if (vq == NULL)
1621 					vx->v_flag &= ~VALIASED;
1622 				vp->v_flag &= ~VALIASED;
1623 			}
1624 		}
1625 		simple_unlock(&spechash_slock);
1626 		FREE(vp->v_specinfo, M_VNODE);
1627 		vp->v_specinfo = NULL;
1628 	}
1629 	/*
1630 	 * If it is on the freelist and not already at the head,
1631 	 * move it to the head of the list. The test of the back
1632 	 * pointer and the reference count of zero is because
1633 	 * it will be removed from the free list by getnewvnode,
1634 	 * but will not have its reference count incremented until
1635 	 * after calling vgone. If the reference count were
1636 	 * incremented first, vgone would (incorrectly) try to
1637 	 * close the previous instance of the underlying object.
1638 	 * So, the back pointer is explicitly set to `0xdeadb' in
1639 	 * getnewvnode after removing it from the freelist to ensure
1640 	 * that we do not try to move it here.
1641 	 */
1642 	if (vp->v_usecount == 0) {
1643 		simple_lock(&vnode_free_list_slock);
1644 		if (vp->v_holdcnt > 0)
1645 			panic("vgonel: not clean, vp %p", vp);
1646 		if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1647 		    TAILQ_FIRST(&vnode_free_list) != vp) {
1648 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1649 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1650 		}
1651 		simple_unlock(&vnode_free_list_slock);
1652 	}
1653 	vp->v_type = VBAD;
1654 }
1655 
1656 /*
1657  * Lookup a vnode by device number.
1658  */
1659 int
1660 vfinddev(dev, type, vpp)
1661 	dev_t dev;
1662 	enum vtype type;
1663 	struct vnode **vpp;
1664 {
1665 	struct vnode *vp;
1666 	int rc = 0;
1667 
1668 	simple_lock(&spechash_slock);
1669 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1670 		if (dev != vp->v_rdev || type != vp->v_type)
1671 			continue;
1672 		*vpp = vp;
1673 		rc = 1;
1674 		break;
1675 	}
1676 	simple_unlock(&spechash_slock);
1677 	return (rc);
1678 }
1679 
1680 /*
1681  * Revoke all the vnodes corresponding to the specified minor number
1682  * range (endpoints inclusive) of the specified major.
1683  */
1684 void
1685 vdevgone(maj, minl, minh, type)
1686 	int maj, minl, minh;
1687 	enum vtype type;
1688 {
1689 	struct vnode *vp;
1690 	int mn;
1691 
1692 	for (mn = minl; mn <= minh; mn++)
1693 		if (vfinddev(makedev(maj, mn), type, &vp))
1694 			VOP_REVOKE(vp, REVOKEALL);
1695 }
1696 
1697 /*
1698  * Calculate the total number of references to a special device.
1699  */
1700 int
1701 vcount(vp)
1702 	struct vnode *vp;
1703 {
1704 	struct vnode *vq, *vnext;
1705 	int count;
1706 
1707 loop:
1708 	if ((vp->v_flag & VALIASED) == 0)
1709 		return (vp->v_usecount);
1710 	simple_lock(&spechash_slock);
1711 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1712 		vnext = vq->v_specnext;
1713 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1714 			continue;
1715 		/*
1716 		 * Alias, but not in use, so flush it out.
1717 		 */
1718 		if (vq->v_usecount == 0 && vq != vp) {
1719 			simple_unlock(&spechash_slock);
1720 			vgone(vq);
1721 			goto loop;
1722 		}
1723 		count += vq->v_usecount;
1724 	}
1725 	simple_unlock(&spechash_slock);
1726 	return (count);
1727 }
1728 
1729 /*
1730  * Print out a description of a vnode.
1731  */
1732 static char *typename[] =
1733    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1734 
1735 void
1736 vprint(label, vp)
1737 	char *label;
1738 	struct vnode *vp;
1739 {
1740 	char buf[64];
1741 
1742 	if (label != NULL)
1743 		printf("%s: ", label);
1744 	printf("tag %d type %s, usecount %ld, writecount %ld, refcount %ld,",
1745 	    vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1746 	    vp->v_holdcnt);
1747 	buf[0] = '\0';
1748 	if (vp->v_flag & VROOT)
1749 		strcat(buf, "|VROOT");
1750 	if (vp->v_flag & VTEXT)
1751 		strcat(buf, "|VTEXT");
1752 	if (vp->v_flag & VSYSTEM)
1753 		strcat(buf, "|VSYSTEM");
1754 	if (vp->v_flag & VXLOCK)
1755 		strcat(buf, "|VXLOCK");
1756 	if (vp->v_flag & VXWANT)
1757 		strcat(buf, "|VXWANT");
1758 	if (vp->v_flag & VBWAIT)
1759 		strcat(buf, "|VBWAIT");
1760 	if (vp->v_flag & VALIASED)
1761 		strcat(buf, "|VALIASED");
1762 	if (buf[0] != '\0')
1763 		printf(" flags (%s)", &buf[1]);
1764 	if (vp->v_data == NULL) {
1765 		printf("\n");
1766 	} else {
1767 		printf("\n\t");
1768 		VOP_PRINT(vp);
1769 	}
1770 }
1771 
1772 #ifdef DEBUG
1773 /*
1774  * List all of the locked vnodes in the system.
1775  * Called when debugging the kernel.
1776  */
1777 void
1778 printlockedvnodes()
1779 {
1780 	struct mount *mp, *nmp;
1781 	struct vnode *vp;
1782 
1783 	printf("Locked vnodes\n");
1784 	simple_lock(&mountlist_slock);
1785 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1786 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1787 			nmp = mp->mnt_list.cqe_next;
1788 			continue;
1789 		}
1790 		for (vp = mp->mnt_vnodelist.lh_first;
1791 		     vp != NULL;
1792 		     vp = vp->v_mntvnodes.le_next) {
1793 			if (VOP_ISLOCKED(vp))
1794 				vprint((char *)0, vp);
1795 		}
1796 		simple_lock(&mountlist_slock);
1797 		nmp = mp->mnt_list.cqe_next;
1798 		vfs_unbusy(mp);
1799 	}
1800 	simple_unlock(&mountlist_slock);
1801 }
1802 #endif
1803 
1804 extern const char *mountcompatnames[];
1805 extern const int nmountcompatnames;
1806 
1807 /*
1808  * Top level filesystem related information gathering.
1809  */
1810 int
1811 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1812 	int *name;
1813 	u_int namelen;
1814 	void *oldp;
1815 	size_t *oldlenp;
1816 	void *newp;
1817 	size_t newlen;
1818 	struct proc *p;
1819 {
1820 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1821 	struct vfsconf vfc;
1822 #endif
1823 	struct vfsops *vfsp;
1824 
1825 	/* all sysctl names at this level are at least name and field */
1826 	if (namelen < 2)
1827 		return (ENOTDIR);		/* overloaded */
1828 
1829 	/* Not generic: goes to file system. */
1830 	if (name[0] != VFS_GENERIC) {
1831 		if (name[0] >= nmountcompatnames || name[0] < 0 ||
1832 		    mountcompatnames[name[0]] == NULL)
1833 			return (EOPNOTSUPP);
1834 		vfsp = vfs_getopsbyname(mountcompatnames[name[0]]);
1835 		if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
1836 			return (EOPNOTSUPP);
1837 		return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
1838 		    oldp, oldlenp, newp, newlen, p));
1839 	}
1840 
1841 	/* The rest are generic vfs sysctls. */
1842 	switch (name[1]) {
1843 	case VFS_USERMOUNT:
1844 		return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
1845 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1846 	case VFS_MAXTYPENUM:
1847 		/*
1848 		 * Provided for 4.4BSD-Lite2 compatibility.
1849 		 */
1850 		return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
1851 	case VFS_CONF:
1852 		/*
1853 		 * Special: a node, next is a file system name.
1854 		 * Provided for 4.4BSD-Lite2 compatibility.
1855 		 */
1856 		if (namelen < 3)
1857 			return (ENOTDIR);	/* overloaded */
1858 		if (name[2] >= nmountcompatnames || name[2] < 0 ||
1859 		    mountcompatnames[name[2]] == NULL)
1860 			return (EOPNOTSUPP);
1861 		vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
1862 		if (vfsp == NULL)
1863 			return (EOPNOTSUPP);
1864 		vfc.vfc_vfsops = vfsp;
1865 		strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
1866 		vfc.vfc_typenum = name[2];
1867 		vfc.vfc_refcount = vfsp->vfs_refcount;
1868 		vfc.vfc_flags = 0;
1869 		vfc.vfc_mountroot = vfsp->vfs_mountroot;
1870 		vfc.vfc_next = NULL;
1871 		return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
1872 		    sizeof(struct vfsconf)));
1873 #endif
1874 	default:
1875 		break;
1876 	}
1877 	return (EOPNOTSUPP);
1878 }
1879 
1880 int kinfo_vdebug = 1;
1881 int kinfo_vgetfailed;
1882 #define KINFO_VNODESLOP	10
1883 /*
1884  * Dump vnode list (via sysctl).
1885  * Copyout address of vnode followed by vnode.
1886  */
1887 /* ARGSUSED */
1888 int
1889 sysctl_vnode(where, sizep, p)
1890 	char *where;
1891 	size_t *sizep;
1892 	struct proc *p;
1893 {
1894 	struct mount *mp, *nmp;
1895 	struct vnode *nvp, *vp;
1896 	char *bp = where, *savebp;
1897 	char *ewhere;
1898 	int error;
1899 
1900 #define VPTRSZ	sizeof(struct vnode *)
1901 #define VNODESZ	sizeof(struct vnode)
1902 	if (where == NULL) {
1903 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1904 		return (0);
1905 	}
1906 	ewhere = where + *sizep;
1907 
1908 	simple_lock(&mountlist_slock);
1909 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1910 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1911 			nmp = mp->mnt_list.cqe_next;
1912 			continue;
1913 		}
1914 		savebp = bp;
1915 again:
1916 		simple_lock(&mntvnode_slock);
1917 		for (vp = mp->mnt_vnodelist.lh_first;
1918 		     vp != NULL;
1919 		     vp = nvp) {
1920 			/*
1921 			 * Check that the vp is still associated with
1922 			 * this filesystem.  RACE: could have been
1923 			 * recycled onto the same filesystem.
1924 			 */
1925 			if (vp->v_mount != mp) {
1926 				simple_unlock(&mntvnode_slock);
1927 				if (kinfo_vdebug)
1928 					printf("kinfo: vp changed\n");
1929 				bp = savebp;
1930 				goto again;
1931 			}
1932 			nvp = vp->v_mntvnodes.le_next;
1933 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1934 				simple_unlock(&mntvnode_slock);
1935 				*sizep = bp - where;
1936 				return (ENOMEM);
1937 			}
1938 			simple_unlock(&mntvnode_slock);
1939 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1940 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1941 				return (error);
1942 			bp += VPTRSZ + VNODESZ;
1943 			simple_lock(&mntvnode_slock);
1944 		}
1945 		simple_unlock(&mntvnode_slock);
1946 		simple_lock(&mountlist_slock);
1947 		nmp = mp->mnt_list.cqe_next;
1948 		vfs_unbusy(mp);
1949 	}
1950 	simple_unlock(&mountlist_slock);
1951 
1952 	*sizep = bp - where;
1953 	return (0);
1954 }
1955 
1956 /*
1957  * Check to see if a filesystem is mounted on a block device.
1958  */
1959 int
1960 vfs_mountedon(vp)
1961 	struct vnode *vp;
1962 {
1963 	struct vnode *vq;
1964 	int error = 0;
1965 
1966 	if (vp->v_specmountpoint != NULL)
1967 		return (EBUSY);
1968 	if (vp->v_flag & VALIASED) {
1969 		simple_lock(&spechash_slock);
1970 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1971 			if (vq->v_rdev != vp->v_rdev ||
1972 			    vq->v_type != vp->v_type)
1973 				continue;
1974 			if (vq->v_specmountpoint != NULL) {
1975 				error = EBUSY;
1976 				break;
1977 			}
1978 		}
1979 		simple_unlock(&spechash_slock);
1980 	}
1981 	return (error);
1982 }
1983 
1984 /*
1985  * Build hash lists of net addresses and hang them off the mount point.
1986  * Called by ufs_mount() to set up the lists of export addresses.
1987  */
1988 static int
1989 vfs_hang_addrlist(mp, nep, argp)
1990 	struct mount *mp;
1991 	struct netexport *nep;
1992 	struct export_args *argp;
1993 {
1994 	struct netcred *np, *enp;
1995 	struct radix_node_head *rnh;
1996 	int i;
1997 	struct radix_node *rn;
1998 	struct sockaddr *saddr, *smask = 0;
1999 	struct domain *dom;
2000 	int error;
2001 
2002 	if (argp->ex_addrlen == 0) {
2003 		if (mp->mnt_flag & MNT_DEFEXPORTED)
2004 			return (EPERM);
2005 		np = &nep->ne_defexported;
2006 		np->netc_exflags = argp->ex_flags;
2007 		np->netc_anon = argp->ex_anon;
2008 		np->netc_anon.cr_ref = 1;
2009 		mp->mnt_flag |= MNT_DEFEXPORTED;
2010 		return (0);
2011 	}
2012 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2013 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
2014 	memset((caddr_t)np, 0, i);
2015 	saddr = (struct sockaddr *)(np + 1);
2016 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2017 	if (error)
2018 		goto out;
2019 	if (saddr->sa_len > argp->ex_addrlen)
2020 		saddr->sa_len = argp->ex_addrlen;
2021 	if (argp->ex_masklen) {
2022 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2023 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
2024 		if (error)
2025 			goto out;
2026 		if (smask->sa_len > argp->ex_masklen)
2027 			smask->sa_len = argp->ex_masklen;
2028 	}
2029 	i = saddr->sa_family;
2030 	if ((rnh = nep->ne_rtable[i]) == 0) {
2031 		/*
2032 		 * Seems silly to initialize every AF when most are not
2033 		 * used, do so on demand here
2034 		 */
2035 		for (dom = domains; dom; dom = dom->dom_next)
2036 			if (dom->dom_family == i && dom->dom_rtattach) {
2037 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
2038 					dom->dom_rtoffset);
2039 				break;
2040 			}
2041 		if ((rnh = nep->ne_rtable[i]) == 0) {
2042 			error = ENOBUFS;
2043 			goto out;
2044 		}
2045 	}
2046 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2047 		np->netc_rnodes);
2048 	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
2049 		if (rn == 0) {
2050 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2051 				smask, rnh);
2052 			if (enp == 0) {
2053 				error = EPERM;
2054 				goto out;
2055 			}
2056 		} else
2057 			enp = (struct netcred *)rn;
2058 
2059 		if (enp->netc_exflags != argp->ex_flags ||
2060 		    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
2061 		    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
2062 		    enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
2063 		    memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
2064 			enp->netc_anon.cr_ngroups))
2065 				error = EPERM;
2066 		else
2067 			error = 0;
2068 		goto out;
2069 	}
2070 	np->netc_exflags = argp->ex_flags;
2071 	np->netc_anon = argp->ex_anon;
2072 	np->netc_anon.cr_ref = 1;
2073 	return (0);
2074 out:
2075 	free(np, M_NETADDR);
2076 	return (error);
2077 }
2078 
2079 /* ARGSUSED */
2080 static int
2081 vfs_free_netcred(rn, w)
2082 	struct radix_node *rn;
2083 	void *w;
2084 {
2085 	struct radix_node_head *rnh = (struct radix_node_head *)w;
2086 
2087 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2088 	free((caddr_t)rn, M_NETADDR);
2089 	return (0);
2090 }
2091 
2092 /*
2093  * Free the net address hash lists that are hanging off the mount points.
2094  */
2095 static void
2096 vfs_free_addrlist(nep)
2097 	struct netexport *nep;
2098 {
2099 	int i;
2100 	struct radix_node_head *rnh;
2101 
2102 	for (i = 0; i <= AF_MAX; i++)
2103 		if ((rnh = nep->ne_rtable[i]) != NULL) {
2104 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
2105 			free((caddr_t)rnh, M_RTABLE);
2106 			nep->ne_rtable[i] = 0;
2107 		}
2108 }
2109 
2110 int
2111 vfs_export(mp, nep, argp)
2112 	struct mount *mp;
2113 	struct netexport *nep;
2114 	struct export_args *argp;
2115 {
2116 	int error;
2117 
2118 	if (argp->ex_flags & MNT_DELEXPORT) {
2119 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2120 			vfs_setpublicfs(NULL, NULL, NULL);
2121 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2122 		}
2123 		vfs_free_addrlist(nep);
2124 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2125 	}
2126 	if (argp->ex_flags & MNT_EXPORTED) {
2127 		if (argp->ex_flags & MNT_EXPUBLIC) {
2128 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2129 				return (error);
2130 			mp->mnt_flag |= MNT_EXPUBLIC;
2131 		}
2132 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
2133 			return (error);
2134 		mp->mnt_flag |= MNT_EXPORTED;
2135 	}
2136 	return (0);
2137 }
2138 
2139 /*
2140  * Set the publicly exported filesystem (WebNFS). Currently, only
2141  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2142  */
2143 int
2144 vfs_setpublicfs(mp, nep, argp)
2145 	struct mount *mp;
2146 	struct netexport *nep;
2147 	struct export_args *argp;
2148 {
2149 	int error;
2150 	struct vnode *rvp;
2151 	char *cp;
2152 
2153 	/*
2154 	 * mp == NULL -> invalidate the current info, the FS is
2155 	 * no longer exported. May be called from either vfs_export
2156 	 * or unmount, so check if it hasn't already been done.
2157 	 */
2158 	if (mp == NULL) {
2159 		if (nfs_pub.np_valid) {
2160 			nfs_pub.np_valid = 0;
2161 			if (nfs_pub.np_index != NULL) {
2162 				FREE(nfs_pub.np_index, M_TEMP);
2163 				nfs_pub.np_index = NULL;
2164 			}
2165 		}
2166 		return (0);
2167 	}
2168 
2169 	/*
2170 	 * Only one allowed at a time.
2171 	 */
2172 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2173 		return (EBUSY);
2174 
2175 	/*
2176 	 * Get real filehandle for root of exported FS.
2177 	 */
2178 	memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
2179 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2180 
2181 	if ((error = VFS_ROOT(mp, &rvp)))
2182 		return (error);
2183 
2184 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2185 		return (error);
2186 
2187 	vput(rvp);
2188 
2189 	/*
2190 	 * If an indexfile was specified, pull it in.
2191 	 */
2192 	if (argp->ex_indexfile != NULL) {
2193 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2194 		    M_WAITOK);
2195 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2196 		    MAXNAMLEN, (size_t *)0);
2197 		if (!error) {
2198 			/*
2199 			 * Check for illegal filenames.
2200 			 */
2201 			for (cp = nfs_pub.np_index; *cp; cp++) {
2202 				if (*cp == '/') {
2203 					error = EINVAL;
2204 					break;
2205 				}
2206 			}
2207 		}
2208 		if (error) {
2209 			FREE(nfs_pub.np_index, M_TEMP);
2210 			return (error);
2211 		}
2212 	}
2213 
2214 	nfs_pub.np_mount = mp;
2215 	nfs_pub.np_valid = 1;
2216 	return (0);
2217 }
2218 
2219 struct netcred *
2220 vfs_export_lookup(mp, nep, nam)
2221 	struct mount *mp;
2222 	struct netexport *nep;
2223 	struct mbuf *nam;
2224 {
2225 	struct netcred *np;
2226 	struct radix_node_head *rnh;
2227 	struct sockaddr *saddr;
2228 
2229 	np = NULL;
2230 	if (mp->mnt_flag & MNT_EXPORTED) {
2231 		/*
2232 		 * Lookup in the export list first.
2233 		 */
2234 		if (nam != NULL) {
2235 			saddr = mtod(nam, struct sockaddr *);
2236 			rnh = nep->ne_rtable[saddr->sa_family];
2237 			if (rnh != NULL) {
2238 				np = (struct netcred *)
2239 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2240 							      rnh);
2241 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2242 					np = NULL;
2243 			}
2244 		}
2245 		/*
2246 		 * If no address match, use the default if it exists.
2247 		 */
2248 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2249 			np = &nep->ne_defexported;
2250 	}
2251 	return (np);
2252 }
2253 
2254 /*
2255  * Do the usual access checking.
2256  * file_mode, uid and gid are from the vnode in question,
2257  * while acc_mode and cred are from the VOP_ACCESS parameter list
2258  */
2259 int
2260 vaccess(type, file_mode, uid, gid, acc_mode, cred)
2261 	enum vtype type;
2262 	mode_t file_mode;
2263 	uid_t uid;
2264 	gid_t gid;
2265 	mode_t acc_mode;
2266 	struct ucred *cred;
2267 {
2268 	mode_t mask;
2269 
2270 	/*
2271 	 * Super-user always gets read/write access, but execute access depends
2272 	 * on at least one execute bit being set.
2273 	 */
2274 	if (cred->cr_uid == 0) {
2275 		if ((acc_mode & VEXEC) && type != VDIR &&
2276 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2277 			return (EACCES);
2278 		return (0);
2279 	}
2280 
2281 	mask = 0;
2282 
2283 	/* Otherwise, check the owner. */
2284 	if (cred->cr_uid == uid) {
2285 		if (acc_mode & VEXEC)
2286 			mask |= S_IXUSR;
2287 		if (acc_mode & VREAD)
2288 			mask |= S_IRUSR;
2289 		if (acc_mode & VWRITE)
2290 			mask |= S_IWUSR;
2291 		return ((file_mode & mask) == mask ? 0 : EACCES);
2292 	}
2293 
2294 	/* Otherwise, check the groups. */
2295 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
2296 		if (acc_mode & VEXEC)
2297 			mask |= S_IXGRP;
2298 		if (acc_mode & VREAD)
2299 			mask |= S_IRGRP;
2300 		if (acc_mode & VWRITE)
2301 			mask |= S_IWGRP;
2302 		return ((file_mode & mask) == mask ? 0 : EACCES);
2303 	}
2304 
2305 	/* Otherwise, check everyone else. */
2306 	if (acc_mode & VEXEC)
2307 		mask |= S_IXOTH;
2308 	if (acc_mode & VREAD)
2309 		mask |= S_IROTH;
2310 	if (acc_mode & VWRITE)
2311 		mask |= S_IWOTH;
2312 	return ((file_mode & mask) == mask ? 0 : EACCES);
2313 }
2314 
2315 /*
2316  * Unmount all file systems.
2317  * We traverse the list in reverse order under the assumption that doing so
2318  * will avoid needing to worry about dependencies.
2319  */
2320 void
2321 vfs_unmountall(p)
2322 	struct proc *p;
2323 {
2324 	struct mount *mp, *nmp;
2325 	int allerror, error;
2326 
2327 	for (allerror = 0,
2328 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2329 		nmp = mp->mnt_list.cqe_prev;
2330 #ifdef DEBUG
2331 		printf("unmounting %s (%s)...\n",
2332 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2333 #endif
2334 		if (vfs_busy(mp, 0, 0))
2335 			continue;
2336 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2337 			printf("unmount of %s failed with error %d\n",
2338 			    mp->mnt_stat.f_mntonname, error);
2339 			allerror = 1;
2340 		}
2341 	}
2342 	if (allerror)
2343 		printf("WARNING: some file systems would not unmount\n");
2344 }
2345 
2346 /*
2347  * Sync and unmount file systems before shutting down.
2348  */
2349 void
2350 vfs_shutdown()
2351 {
2352 	struct buf *bp;
2353 	int iter, nbusy, dcount, s;
2354 	struct proc *p = curproc;
2355 
2356 	/* XXX we're certainly not running in proc0's context! */
2357 	if (p == NULL)
2358 		p = &proc0;
2359 
2360 	printf("syncing disks... ");
2361 
2362 	/* XXX Should suspend scheduling. */
2363 	(void) spl0();
2364 
2365 	/* avoid coming back this way again if we panic. */
2366 	doing_shutdown = 1;
2367 
2368 	sys_sync(p, (void *)0, (register_t *)0);
2369 
2370 	/* Wait for sync to finish. */
2371 	dcount = 10000;
2372 	for (iter = 0; iter < 20; iter++) {
2373 		nbusy = 0;
2374 		for (bp = &buf[nbuf]; --bp >= buf; ) {
2375 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2376 				nbusy++;
2377 			/*
2378 			 * With soft updates, some buffers that are
2379 			 * written will be remarked as dirty until other
2380 			 * buffers are written.
2381 			 */
2382 			if (bp->b_vp && bp->b_vp->v_mount
2383 			    && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
2384 			    && (bp->b_flags & B_DELWRI)) {
2385 				s = splbio();
2386 				bremfree(bp);
2387 				bp->b_flags |= B_BUSY;
2388 				splx(s);
2389 				nbusy++;
2390 				bawrite(bp);
2391 				if (dcount-- <= 0) {
2392 					printf("softdep ");
2393 					goto fail;
2394 				}
2395 			}
2396 		}
2397 		if (nbusy == 0)
2398 			break;
2399 		printf("%d ", nbusy);
2400 		DELAY(40000 * iter);
2401 	}
2402 	if (nbusy) {
2403 fail:
2404 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
2405 		printf("giving up\nPrinting vnodes for busy buffers\n");
2406 		for (bp = &buf[nbuf]; --bp >= buf; )
2407 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2408 				vprint(NULL, bp->b_vp);
2409 
2410 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2411 		Debugger();
2412 #endif
2413 
2414 #else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
2415 		printf("giving up\n");
2416 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
2417 		return;
2418 	} else
2419 		printf("done\n");
2420 
2421 	/*
2422 	 * If we've panic'd, don't make the situation potentially
2423 	 * worse by unmounting the file systems.
2424 	 */
2425 	if (panicstr != NULL)
2426 		return;
2427 
2428 	/* Release inodes held by texts before update. */
2429 #ifdef notdef
2430 	vnshutdown();
2431 #endif
2432 	/* Unmount file systems. */
2433 	vfs_unmountall(p);
2434 }
2435 
2436 /*
2437  * Mount the root file system.  If the operator didn't specify a
2438  * file system to use, try all possible file systems until one
2439  * succeeds.
2440  */
2441 int
2442 vfs_mountroot()
2443 {
2444 	extern int (*mountroot) __P((void));
2445 	struct vfsops *v;
2446 
2447 	if (root_device == NULL)
2448 		panic("vfs_mountroot: root device unknown");
2449 
2450 	switch (root_device->dv_class) {
2451 	case DV_IFNET:
2452 		if (rootdev != NODEV)
2453 			panic("vfs_mountroot: rootdev set for DV_IFNET");
2454 		break;
2455 
2456 	case DV_DISK:
2457 		if (rootdev == NODEV)
2458 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2459 		break;
2460 
2461 	default:
2462 		printf("%s: inappropriate for root file system\n",
2463 		    root_device->dv_xname);
2464 		return (ENODEV);
2465 	}
2466 
2467 	/*
2468 	 * If user specified a file system, use it.
2469 	 */
2470 	if (mountroot != NULL)
2471 		return ((*mountroot)());
2472 
2473 	/*
2474 	 * Try each file system currently configured into the kernel.
2475 	 */
2476 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2477 		if (v->vfs_mountroot == NULL)
2478 			continue;
2479 #ifdef DEBUG
2480 		printf("mountroot: trying %s...\n", v->vfs_name);
2481 #endif
2482 		if ((*v->vfs_mountroot)() == 0) {
2483 			printf("root file system type: %s\n", v->vfs_name);
2484 			break;
2485 		}
2486 	}
2487 
2488 	if (v == NULL) {
2489 		printf("no file system for %s", root_device->dv_xname);
2490 		if (root_device->dv_class == DV_DISK)
2491 			printf(" (dev 0x%x)", rootdev);
2492 		printf("\n");
2493 		return (EFTYPE);
2494 	}
2495 	return (0);
2496 }
2497 
2498 /*
2499  * Given a file system name, look up the vfsops for that
2500  * file system, or return NULL if file system isn't present
2501  * in the kernel.
2502  */
2503 struct vfsops *
2504 vfs_getopsbyname(name)
2505 	const char *name;
2506 {
2507 	struct vfsops *v;
2508 
2509 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2510 		if (strcmp(v->vfs_name, name) == 0)
2511 			break;
2512 	}
2513 
2514 	return (v);
2515 }
2516 
2517 /*
2518  * Establish a file system and initialize it.
2519  */
2520 int
2521 vfs_attach(vfs)
2522 	struct vfsops *vfs;
2523 {
2524 	struct vfsops *v;
2525 	int error = 0;
2526 
2527 
2528 	/*
2529 	 * Make sure this file system doesn't already exist.
2530 	 */
2531 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2532 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2533 			error = EEXIST;
2534 			goto out;
2535 		}
2536 	}
2537 
2538 	/*
2539 	 * Initialize the vnode operations for this file system.
2540 	 */
2541 	vfs_opv_init(vfs->vfs_opv_descs);
2542 
2543 	/*
2544 	 * Now initialize the file system itself.
2545 	 */
2546 	(*vfs->vfs_init)();
2547 
2548 	/*
2549 	 * ...and link it into the kernel's list.
2550 	 */
2551 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2552 
2553 	/*
2554 	 * Sanity: make sure the reference count is 0.
2555 	 */
2556 	vfs->vfs_refcount = 0;
2557 
2558  out:
2559 	return (error);
2560 }
2561 
2562 /*
2563  * Remove a file system from the kernel.
2564  */
2565 int
2566 vfs_detach(vfs)
2567 	struct vfsops *vfs;
2568 {
2569 	struct vfsops *v;
2570 
2571 	/*
2572 	 * Make sure no one is using the filesystem.
2573 	 */
2574 	if (vfs->vfs_refcount != 0)
2575 		return (EBUSY);
2576 
2577 	/*
2578 	 * ...and remove it from the kernel's list.
2579 	 */
2580 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2581 		if (v == vfs) {
2582 			LIST_REMOVE(v, vfs_list);
2583 			break;
2584 		}
2585 	}
2586 
2587 	if (v == NULL)
2588 		return (ESRCH);
2589 
2590 	/*
2591 	 * Now run the file system-specific cleanups.
2592 	 */
2593 	(*vfs->vfs_done)();
2594 
2595 	/*
2596 	 * Free the vnode operations vector.
2597 	 */
2598 	vfs_opv_free(vfs->vfs_opv_descs);
2599 	return (0);
2600 }
2601 
2602 #ifdef DDB
2603 const char buf_flagbits[] =
2604 	"\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6CACHE\7CALL\10DELWRI"
2605 	"\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
2606 	"\21PAGET\22PGIN\23PHYS\24RAW\25READ\26TAPE\27UAREA\30WANTED"
2607 	"\31WRITEINPROG\32XXX\33VFLUSH";
2608 
2609 void
2610 vfs_buf_print(bp, full, pr)
2611 	struct buf *bp;
2612 	int full;
2613 	void (*pr) __P((const char *, ...));
2614 {
2615 	char buf[1024];
2616 
2617 	(*pr)("  vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
2618 		  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
2619 
2620 	bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
2621 	(*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
2622 
2623 	(*pr)("  bufsize 0x%x bcount 0x%x resid 0x%x\n",
2624 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
2625 	(*pr)("  data %p saveaddr %p\n",
2626 		  bp->b_data, bp->b_saveaddr);
2627 	(*pr)("  iodone %p\n", bp->b_iodone);
2628 
2629 	(*pr)("  dirtyoff 0x%x dirtyend 0x%x validoff 0x%x validend 0x%x\n",
2630 		  bp->b_dirtyoff, bp->b_dirtyend,
2631 		  bp->b_validoff, bp->b_validend);
2632 
2633 	(*pr)("  rcred %p wcred %p\n", bp->b_rcred, bp->b_wcred);
2634 }
2635 
2636 
2637 const char vnode_flagbits[] =
2638 	"\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED"
2639 	"\15DIROP\17DIRTY";
2640 
2641 const char *vnode_types[] = {
2642 	"VNON",
2643 	"VREG",
2644 	"VDIR",
2645 	"VBLK",
2646 	"VCHR",
2647 	"VLNK",
2648 	"VSOCK",
2649 	"VFIFO",
2650 	"VBAD",
2651 };
2652 
2653 const char *vnode_tags[] = {
2654 	"VT_NON",
2655 	"VT_UFS",
2656 	"VT_NFS",
2657 	"VT_MFS",
2658 	"VT_MSDOSFS",
2659 	"VT_LFS",
2660 	"VT_LOFS",
2661 	"VT_FDESC",
2662 	"VT_PORTAL",
2663 	"VT_NULL",
2664 	"VT_UMAP",
2665 	"VT_KERNFS",
2666 	"VT_PROCFS",
2667 	"VT_AFS",
2668 	"VT_ISOFS",
2669 	"VT_UNION",
2670 	"VT_ADOSFS",
2671 	"VT_EXT2FS",
2672 	"VT_CODA",
2673 	"VT_FILECORE",
2674 	"VT_NTFS",
2675 	"VT_VFS",
2676 	"VT_OVERLAY"
2677 };
2678 
2679 void
2680 vfs_vnode_print(vp, full, pr)
2681 	struct vnode *vp;
2682 	int full;
2683 	void (*pr) __P((const char *, ...));
2684 {
2685 	char buf[1024];
2686 
2687 	const char *vtype, *vtag;
2688 
2689 	uvm_object_printit(&vp->v_uvm.u_obj, full, pr);
2690 	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
2691 	(*pr)("\nVNODE flags %s\n", buf);
2692 	(*pr)("nio %d size 0x%x wlist %s\n",
2693 	      vp->v_uvm.u_nio, vp->v_uvm.u_size,
2694 	      vp->v_uvm.u_wlist.le_next ? "YES" : "NO");
2695 
2696 	(*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
2697 	      vp->v_data, vp->v_usecount, vp->v_writecount,
2698 	      vp->v_holdcnt, vp->v_numoutput);
2699 
2700 	vtype = (vp->v_type >= 0 &&
2701 		 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
2702 		vnode_types[vp->v_type] : "UNKNOWN";
2703 	vtag = (vp->v_tag >= 0 &&
2704 		vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
2705 		vnode_tags[vp->v_tag] : "UNKNOWN";
2706 
2707 	(*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n",
2708 	      vtype, vp->v_type, vtag, vp->v_tag,
2709 	      vp->v_id, vp->v_mount, vp->v_mountedhere);
2710 	(*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n",
2711 	      vp->v_lastr, vp->v_lastw, vp->v_lasta);
2712 	(*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n",
2713 	      vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra);
2714 
2715 	if (full) {
2716 		struct buf *bp;
2717 
2718 		(*pr)("clean bufs:\n");
2719 		for (bp = LIST_FIRST(&vp->v_cleanblkhd);
2720 		     bp != NULL;
2721 		     bp = LIST_NEXT(bp, b_vnbufs)) {
2722 			vfs_buf_print(bp, full, pr);
2723 		}
2724 
2725 		(*pr)("dirty bufs:\n");
2726 		for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
2727 		     bp != NULL;
2728 		     bp = LIST_NEXT(bp, b_vnbufs)) {
2729 			vfs_buf_print(bp, full, pr);
2730 		}
2731 	}
2732 }
2733 #endif
2734