xref: /netbsd-src/sys/kern/vfs_subr.c (revision 93086a3d8d0057acb6d878df423458ce6cbc1fc5)
1 /*	$NetBSD: vfs_subr.c,v 1.235 2004/09/22 11:47:23 lukem Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1989, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Redistribution and use in source and binary forms, with or without
50  * modification, are permitted provided that the following conditions
51  * are met:
52  * 1. Redistributions of source code must retain the above copyright
53  *    notice, this list of conditions and the following disclaimer.
54  * 2. Redistributions in binary form must reproduce the above copyright
55  *    notice, this list of conditions and the following disclaimer in the
56  *    documentation and/or other materials provided with the distribution.
57  * 3. Neither the name of the University nor the names of its contributors
58  *    may be used to endorse or promote products derived from this software
59  *    without specific prior written permission.
60  *
61  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71  * SUCH DAMAGE.
72  *
73  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
74  */
75 
76 /*
77  * External virtual filesystem routines
78  */
79 
80 #include <sys/cdefs.h>
81 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.235 2004/09/22 11:47:23 lukem Exp $");
82 
83 #include "opt_inet.h"
84 #include "opt_ddb.h"
85 #include "opt_compat_netbsd.h"
86 #include "opt_compat_43.h"
87 
88 #include <sys/param.h>
89 #include <sys/systm.h>
90 #include <sys/proc.h>
91 #include <sys/kernel.h>
92 #include <sys/mount.h>
93 #include <sys/time.h>
94 #include <sys/event.h>
95 #include <sys/fcntl.h>
96 #include <sys/vnode.h>
97 #include <sys/stat.h>
98 #include <sys/namei.h>
99 #include <sys/ucred.h>
100 #include <sys/buf.h>
101 #include <sys/errno.h>
102 #include <sys/malloc.h>
103 #include <sys/domain.h>
104 #include <sys/mbuf.h>
105 #include <sys/sa.h>
106 #include <sys/syscallargs.h>
107 #include <sys/device.h>
108 #include <sys/dirent.h>
109 #include <sys/filedesc.h>
110 
111 #include <miscfs/specfs/specdev.h>
112 #include <miscfs/genfs/genfs.h>
113 #include <miscfs/syncfs/syncfs.h>
114 
115 #include <netinet/in.h>
116 
117 #include <uvm/uvm.h>
118 #include <uvm/uvm_ddb.h>
119 
120 #include <netinet/in.h>
121 
122 #include <sys/sysctl.h>
123 
124 const enum vtype iftovt_tab[16] = {
125 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
126 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
127 };
128 const int	vttoif_tab[9] = {
129 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
130 	S_IFSOCK, S_IFIFO, S_IFMT,
131 };
132 
133 int doforce = 1;		/* 1 => permit forcible unmounting */
134 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
135 
136 extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
137 
138 /*
139  * Insq/Remq for the vnode usage lists.
140  */
141 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
142 #define	bufremvn(bp) {							\
143 	LIST_REMOVE(bp, b_vnbufs);					\
144 	(bp)->b_vnbufs.le_next = NOLIST;				\
145 }
146 /* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
147 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
148 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
149 
150 struct mntlist mountlist =			/* mounted filesystem list */
151     CIRCLEQ_HEAD_INITIALIZER(mountlist);
152 struct vfs_list_head vfs_list =			/* vfs list */
153     LIST_HEAD_INITIALIZER(vfs_list);
154 
155 struct nfs_public nfs_pub;			/* publicly exported FS */
156 
157 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
158 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
159 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
160 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
161 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
162 
163 /* XXX - gross; single global lock to protect v_numoutput */
164 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
165 
166 /*
167  * These define the root filesystem and device.
168  */
169 struct mount *rootfs;
170 struct vnode *rootvnode;
171 struct device *root_device;			/* root device */
172 
173 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
174     &pool_allocator_nointr);
175 
176 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
177 
178 /*
179  * Local declarations.
180  */
181 void insmntque(struct vnode *, struct mount *);
182 int getdevvp(dev_t, struct vnode **, enum vtype);
183 void vgoneall(struct vnode *);
184 
185 void vclean(struct vnode *, int, struct proc *);
186 
187 static int vfs_hang_addrlist(struct mount *, struct netexport *,
188 			     struct export_args *);
189 static int vfs_free_netcred(struct radix_node *, void *);
190 static void vfs_free_addrlist(struct netexport *);
191 static struct vnode *getcleanvnode(struct proc *);
192 
193 #ifdef DEBUG
194 void printlockedvnodes(void);
195 #endif
196 
197 /*
198  * Initialize the vnode management data structures.
199  */
200 void
201 vntblinit()
202 {
203 
204 	/*
205 	 * Initialize the filesystem syncer.
206 	 */
207 	vn_initialize_syncerd();
208 }
209 
210 int
211 vfs_drainvnodes(long target, struct proc *p)
212 {
213 
214 	simple_lock(&vnode_free_list_slock);
215 	while (numvnodes > target) {
216 		struct vnode *vp;
217 
218 		vp = getcleanvnode(p);
219 		if (vp == NULL)
220 			return EBUSY; /* give up */
221 		pool_put(&vnode_pool, vp);
222 		simple_lock(&vnode_free_list_slock);
223 		numvnodes--;
224 	}
225 	simple_unlock(&vnode_free_list_slock);
226 
227 	return 0;
228 }
229 
230 /*
231  * grab a vnode from freelist and clean it.
232  */
233 struct vnode *
234 getcleanvnode(p)
235 	struct proc *p;
236 {
237 	struct vnode *vp;
238 	struct mount *mp;
239 	struct freelst *listhd;
240 
241 	LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
242 
243 	listhd = &vnode_free_list;
244 try_nextlist:
245 	TAILQ_FOREACH(vp, listhd, v_freelist) {
246 		if (!simple_lock_try(&vp->v_interlock))
247 			continue;
248 		/*
249 		 * as our lwp might hold the underlying vnode locked,
250 		 * don't try to reclaim the VLAYER vnode if it's locked.
251 		 */
252 		if ((vp->v_flag & VXLOCK) == 0 &&
253 		    ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
254 			if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
255 				break;
256 		}
257 		mp = NULL;
258 		simple_unlock(&vp->v_interlock);
259 	}
260 
261 	if (vp == NULLVP) {
262 		if (listhd == &vnode_free_list) {
263 			listhd = &vnode_hold_list;
264 			goto try_nextlist;
265 		}
266 		simple_unlock(&vnode_free_list_slock);
267 		return NULLVP;
268 	}
269 
270 	if (vp->v_usecount)
271 		panic("free vnode isn't, vp %p", vp);
272 	TAILQ_REMOVE(listhd, vp, v_freelist);
273 	/* see comment on why 0xdeadb is set at end of vgone (below) */
274 	vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
275 	simple_unlock(&vnode_free_list_slock);
276 	vp->v_lease = NULL;
277 
278 	if (vp->v_type != VBAD)
279 		vgonel(vp, p);
280 	else
281 		simple_unlock(&vp->v_interlock);
282 	vn_finished_write(mp, 0);
283 #ifdef DIAGNOSTIC
284 	if (vp->v_data || vp->v_uobj.uo_npages ||
285 	    TAILQ_FIRST(&vp->v_uobj.memq))
286 		panic("cleaned vnode isn't, vp %p", vp);
287 	if (vp->v_numoutput)
288 		panic("clean vnode has pending I/O's, vp %p", vp);
289 #endif
290 	KASSERT((vp->v_flag & VONWORKLST) == 0);
291 
292 	return vp;
293 }
294 
295 /*
296  * Mark a mount point as busy. Used to synchronize access and to delay
297  * unmounting. Interlock is not released on failure.
298  */
299 int
300 vfs_busy(mp, flags, interlkp)
301 	struct mount *mp;
302 	int flags;
303 	struct simplelock *interlkp;
304 {
305 	int lkflags;
306 
307 	while (mp->mnt_iflag & IMNT_UNMOUNT) {
308 		int gone, n;
309 
310 		if (flags & LK_NOWAIT)
311 			return (ENOENT);
312 		if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
313 		    && mp->mnt_unmounter == curproc)
314 			return (EDEADLK);
315 		if (interlkp)
316 			simple_unlock(interlkp);
317 		/*
318 		 * Since all busy locks are shared except the exclusive
319 		 * lock granted when unmounting, the only place that a
320 		 * wakeup needs to be done is at the release of the
321 		 * exclusive lock at the end of dounmount.
322 		 */
323 		simple_lock(&mp->mnt_slock);
324 		mp->mnt_wcnt++;
325 		ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
326 		n = --mp->mnt_wcnt;
327 		simple_unlock(&mp->mnt_slock);
328 		gone = mp->mnt_iflag & IMNT_GONE;
329 
330 		if (n == 0)
331 			wakeup(&mp->mnt_wcnt);
332 		if (interlkp)
333 			simple_lock(interlkp);
334 		if (gone)
335 			return (ENOENT);
336 	}
337 	lkflags = LK_SHARED;
338 	if (interlkp)
339 		lkflags |= LK_INTERLOCK;
340 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
341 		panic("vfs_busy: unexpected lock failure");
342 	return (0);
343 }
344 
345 /*
346  * Free a busy filesystem.
347  */
348 void
349 vfs_unbusy(mp)
350 	struct mount *mp;
351 {
352 
353 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
354 }
355 
356 /*
357  * Lookup a filesystem type, and if found allocate and initialize
358  * a mount structure for it.
359  *
360  * Devname is usually updated by mount(8) after booting.
361  */
362 int
363 vfs_rootmountalloc(fstypename, devname, mpp)
364 	char *fstypename;
365 	char *devname;
366 	struct mount **mpp;
367 {
368 	struct vfsops *vfsp = NULL;
369 	struct mount *mp;
370 
371 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
372 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
373 			break;
374 
375 	if (vfsp == NULL)
376 		return (ENODEV);
377 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
378 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
379 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
380 	simple_lock_init(&mp->mnt_slock);
381 	(void)vfs_busy(mp, LK_NOWAIT, 0);
382 	LIST_INIT(&mp->mnt_vnodelist);
383 	mp->mnt_op = vfsp;
384 	mp->mnt_flag = MNT_RDONLY;
385 	mp->mnt_vnodecovered = NULLVP;
386 	mp->mnt_leaf = mp;
387 	vfsp->vfs_refcount++;
388 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
389 	mp->mnt_stat.f_mntonname[0] = '/';
390 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
391 	*mpp = mp;
392 	return (0);
393 }
394 
395 /*
396  * Lookup a mount point by filesystem identifier.
397  */
398 struct mount *
399 vfs_getvfs(fsid)
400 	fsid_t *fsid;
401 {
402 	struct mount *mp;
403 
404 	simple_lock(&mountlist_slock);
405 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
406 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
407 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
408 			simple_unlock(&mountlist_slock);
409 			return (mp);
410 		}
411 	}
412 	simple_unlock(&mountlist_slock);
413 	return ((struct mount *)0);
414 }
415 
416 /*
417  * Get a new unique fsid
418  */
419 void
420 vfs_getnewfsid(mp)
421 	struct mount *mp;
422 {
423 	static u_short xxxfs_mntid;
424 	fsid_t tfsid;
425 	int mtype;
426 
427 	simple_lock(&mntid_slock);
428 	mtype = makefstype(mp->mnt_op->vfs_name);
429 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
430 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
431 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
432 	if (xxxfs_mntid == 0)
433 		++xxxfs_mntid;
434 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
435 	tfsid.__fsid_val[1] = mtype;
436 	if (!CIRCLEQ_EMPTY(&mountlist)) {
437 		while (vfs_getvfs(&tfsid)) {
438 			tfsid.__fsid_val[0]++;
439 			xxxfs_mntid++;
440 		}
441 	}
442 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
443 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
444 	simple_unlock(&mntid_slock);
445 }
446 
447 /*
448  * Make a 'unique' number from a mount type name.
449  */
450 long
451 makefstype(type)
452 	const char *type;
453 {
454 	long rv;
455 
456 	for (rv = 0; *type; type++) {
457 		rv <<= 2;
458 		rv ^= *type;
459 	}
460 	return rv;
461 }
462 
463 
464 /*
465  * Set vnode attributes to VNOVAL
466  */
467 void
468 vattr_null(vap)
469 	struct vattr *vap;
470 {
471 
472 	vap->va_type = VNON;
473 
474 	/*
475 	 * Assign individually so that it is safe even if size and
476 	 * sign of each member are varied.
477 	 */
478 	vap->va_mode = VNOVAL;
479 	vap->va_nlink = VNOVAL;
480 	vap->va_uid = VNOVAL;
481 	vap->va_gid = VNOVAL;
482 	vap->va_fsid = VNOVAL;
483 	vap->va_fileid = VNOVAL;
484 	vap->va_size = VNOVAL;
485 	vap->va_blocksize = VNOVAL;
486 	vap->va_atime.tv_sec =
487 	    vap->va_mtime.tv_sec =
488 	    vap->va_ctime.tv_sec =
489 	    vap->va_birthtime.tv_sec = VNOVAL;
490 	vap->va_atime.tv_nsec =
491 	    vap->va_mtime.tv_nsec =
492 	    vap->va_ctime.tv_nsec =
493 	    vap->va_birthtime.tv_nsec = VNOVAL;
494 	vap->va_gen = VNOVAL;
495 	vap->va_flags = VNOVAL;
496 	vap->va_rdev = VNOVAL;
497 	vap->va_bytes = VNOVAL;
498 	vap->va_vaflags = 0;
499 }
500 
501 /*
502  * Routines having to do with the management of the vnode table.
503  */
504 extern int (**dead_vnodeop_p)(void *);
505 long numvnodes;
506 
507 /*
508  * Return the next vnode from the free list.
509  */
510 int
511 getnewvnode(tag, mp, vops, vpp)
512 	enum vtagtype tag;
513 	struct mount *mp;
514 	int (**vops)(void *);
515 	struct vnode **vpp;
516 {
517 	extern struct uvm_pagerops uvm_vnodeops;
518 	struct uvm_object *uobj;
519 	struct proc *p = curproc;	/* XXX */
520 	static int toggle;
521 	struct vnode *vp;
522 	int error = 0, tryalloc;
523 
524  try_again:
525 	if (mp) {
526 		/*
527 		 * Mark filesystem busy while we're creating a vnode.
528 		 * If unmount is in progress, this will wait; if the
529 		 * unmount succeeds (only if umount -f), this will
530 		 * return an error.  If the unmount fails, we'll keep
531 		 * going afterwards.
532 		 * (This puts the per-mount vnode list logically under
533 		 * the protection of the vfs_busy lock).
534 		 */
535 		error = vfs_busy(mp, LK_RECURSEFAIL, 0);
536 		if (error && error != EDEADLK)
537 			return error;
538 	}
539 
540 	/*
541 	 * We must choose whether to allocate a new vnode or recycle an
542 	 * existing one. The criterion for allocating a new one is that
543 	 * the total number of vnodes is less than the number desired or
544 	 * there are no vnodes on either free list. Generally we only
545 	 * want to recycle vnodes that have no buffers associated with
546 	 * them, so we look first on the vnode_free_list. If it is empty,
547 	 * we next consider vnodes with referencing buffers on the
548 	 * vnode_hold_list. The toggle ensures that half the time we
549 	 * will use a buffer from the vnode_hold_list, and half the time
550 	 * we will allocate a new one unless the list has grown to twice
551 	 * the desired size. We are reticent to recycle vnodes from the
552 	 * vnode_hold_list because we will lose the identity of all its
553 	 * referencing buffers.
554 	 */
555 
556 	vp = NULL;
557 
558 	simple_lock(&vnode_free_list_slock);
559 
560 	toggle ^= 1;
561 	if (numvnodes > 2 * desiredvnodes)
562 		toggle = 0;
563 
564 	tryalloc = numvnodes < desiredvnodes ||
565 	    (TAILQ_FIRST(&vnode_free_list) == NULL &&
566 	     (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
567 
568 	if (tryalloc &&
569 	    (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
570 		numvnodes++;
571 		simple_unlock(&vnode_free_list_slock);
572 		memset(vp, 0, sizeof(*vp));
573 		simple_lock_init(&vp->v_interlock);
574 		uobj = &vp->v_uobj;
575 		uobj->pgops = &uvm_vnodeops;
576 		TAILQ_INIT(&uobj->memq);
577 		/*
578 		 * done by memset() above.
579 		 *	uobj->uo_npages = 0;
580 		 *	LIST_INIT(&vp->v_nclist);
581 		 *	LIST_INIT(&vp->v_dnclist);
582 		 */
583 	} else {
584 		vp = getcleanvnode(p);
585 		/*
586 		 * Unless this is a bad time of the month, at most
587 		 * the first NCPUS items on the free list are
588 		 * locked, so this is close enough to being empty.
589 		 */
590 		if (vp == NULLVP) {
591 			if (mp && error != EDEADLK)
592 				vfs_unbusy(mp);
593 			if (tryalloc) {
594 				printf("WARNING: unable to allocate new "
595 				    "vnode, retrying...\n");
596 				(void) tsleep(&lbolt, PRIBIO, "newvn", hz);
597 				goto try_again;
598 			}
599 			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
600 			*vpp = 0;
601 			return (ENFILE);
602 		}
603 		vp->v_flag = 0;
604 		vp->v_socket = NULL;
605 #ifdef VERIFIED_EXEC
606 		vp->fp_status = FINGERPRINT_INVALID;
607 #endif
608 	}
609 	vp->v_type = VNON;
610 	vp->v_vnlock = &vp->v_lock;
611 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
612 	KASSERT(LIST_EMPTY(&vp->v_nclist));
613 	KASSERT(LIST_EMPTY(&vp->v_dnclist));
614 	vp->v_tag = tag;
615 	vp->v_op = vops;
616 	insmntque(vp, mp);
617 	*vpp = vp;
618 	vp->v_usecount = 1;
619 	vp->v_data = 0;
620 	simple_lock_init(&vp->v_uobj.vmobjlock);
621 
622 	/*
623 	 * initialize uvm_object within vnode.
624 	 */
625 
626 	uobj = &vp->v_uobj;
627 	KASSERT(uobj->pgops == &uvm_vnodeops);
628 	KASSERT(uobj->uo_npages == 0);
629 	KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
630 	vp->v_size = VSIZENOTSET;
631 
632 	if (mp && error != EDEADLK)
633 		vfs_unbusy(mp);
634 	return (0);
635 }
636 
637 /*
638  * This is really just the reverse of getnewvnode(). Needed for
639  * VFS_VGET functions who may need to push back a vnode in case
640  * of a locking race.
641  */
642 void
643 ungetnewvnode(vp)
644 	struct vnode *vp;
645 {
646 #ifdef DIAGNOSTIC
647 	if (vp->v_usecount != 1)
648 		panic("ungetnewvnode: busy vnode");
649 #endif
650 	vp->v_usecount--;
651 	insmntque(vp, NULL);
652 	vp->v_type = VBAD;
653 
654 	simple_lock(&vp->v_interlock);
655 	/*
656 	 * Insert at head of LRU list
657 	 */
658 	simple_lock(&vnode_free_list_slock);
659 	if (vp->v_holdcnt > 0)
660 		TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
661 	else
662 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
663 	simple_unlock(&vnode_free_list_slock);
664 	simple_unlock(&vp->v_interlock);
665 }
666 
667 /*
668  * Move a vnode from one mount queue to another.
669  */
670 void
671 insmntque(vp, mp)
672 	struct vnode *vp;
673 	struct mount *mp;
674 {
675 
676 #ifdef DIAGNOSTIC
677 	if ((mp != NULL) &&
678 	    (mp->mnt_iflag & IMNT_UNMOUNT) &&
679 	    !(mp->mnt_flag & MNT_SOFTDEP) &&
680 	    vp->v_tag != VT_VFS) {
681 		panic("insmntque into dying filesystem");
682 	}
683 #endif
684 
685 	simple_lock(&mntvnode_slock);
686 	/*
687 	 * Delete from old mount point vnode list, if on one.
688 	 */
689 	if (vp->v_mount != NULL)
690 		LIST_REMOVE(vp, v_mntvnodes);
691 	/*
692 	 * Insert into list of vnodes for the new mount point, if available.
693 	 */
694 	if ((vp->v_mount = mp) != NULL)
695 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
696 	simple_unlock(&mntvnode_slock);
697 }
698 
699 /*
700  * Update outstanding I/O count and do wakeup if requested.
701  */
702 void
703 vwakeup(bp)
704 	struct buf *bp;
705 {
706 	struct vnode *vp;
707 
708 	if ((vp = bp->b_vp) != NULL) {
709 		/* XXX global lock hack
710 		 * can't use v_interlock here since this is called
711 		 * in interrupt context from biodone().
712 		 */
713 		simple_lock(&global_v_numoutput_slock);
714 		if (--vp->v_numoutput < 0)
715 			panic("vwakeup: neg numoutput, vp %p", vp);
716 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
717 			vp->v_flag &= ~VBWAIT;
718 			wakeup((caddr_t)&vp->v_numoutput);
719 		}
720 		simple_unlock(&global_v_numoutput_slock);
721 	}
722 }
723 
724 /*
725  * Flush out and invalidate all buffers associated with a vnode.
726  * Called with the underlying vnode locked, which should prevent new dirty
727  * buffers from being queued.
728  */
729 int
730 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
731 	struct vnode *vp;
732 	int flags;
733 	struct ucred *cred;
734 	struct proc *p;
735 	int slpflag, slptimeo;
736 {
737 	struct buf *bp, *nbp;
738 	int s, error;
739 	int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
740 		(flags & V_SAVE ? PGO_CLEANIT : 0);
741 
742 	/* XXXUBC this doesn't look at flags or slp* */
743 	simple_lock(&vp->v_interlock);
744 	error = VOP_PUTPAGES(vp, 0, 0, flushflags);
745 	if (error) {
746 		return error;
747 	}
748 
749 	if (flags & V_SAVE) {
750 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
751 		if (error)
752 		        return (error);
753 #ifdef DIAGNOSTIC
754 		s = splbio();
755 		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
756 		        panic("vinvalbuf: dirty bufs, vp %p", vp);
757 		splx(s);
758 #endif
759 	}
760 
761 	s = splbio();
762 
763 restart:
764 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
765 		nbp = LIST_NEXT(bp, b_vnbufs);
766 		simple_lock(&bp->b_interlock);
767 		if (bp->b_flags & B_BUSY) {
768 			bp->b_flags |= B_WANTED;
769 			error = ltsleep((caddr_t)bp,
770 				    slpflag | (PRIBIO + 1) | PNORELOCK,
771 				    "vinvalbuf", slptimeo, &bp->b_interlock);
772 			if (error) {
773 				splx(s);
774 				return (error);
775 			}
776 			goto restart;
777 		}
778 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
779 		simple_unlock(&bp->b_interlock);
780 		brelse(bp);
781 	}
782 
783 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
784 		nbp = LIST_NEXT(bp, b_vnbufs);
785 		simple_lock(&bp->b_interlock);
786 		if (bp->b_flags & B_BUSY) {
787 			bp->b_flags |= B_WANTED;
788 			error = ltsleep((caddr_t)bp,
789 				    slpflag | (PRIBIO + 1) | PNORELOCK,
790 				    "vinvalbuf", slptimeo, &bp->b_interlock);
791 			if (error) {
792 				splx(s);
793 				return (error);
794 			}
795 			goto restart;
796 		}
797 		/*
798 		 * XXX Since there are no node locks for NFS, I believe
799 		 * there is a slight chance that a delayed write will
800 		 * occur while sleeping just above, so check for it.
801 		 */
802 		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
803 #ifdef DEBUG
804 			printf("buffer still DELWRI\n");
805 #endif
806 			bp->b_flags |= B_BUSY | B_VFLUSH;
807 			simple_unlock(&bp->b_interlock);
808 			VOP_BWRITE(bp);
809 			goto restart;
810 		}
811 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
812 		simple_unlock(&bp->b_interlock);
813 		brelse(bp);
814 	}
815 
816 #ifdef DIAGNOSTIC
817 	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
818 		panic("vinvalbuf: flush failed, vp %p", vp);
819 #endif
820 
821 	splx(s);
822 
823 	return (0);
824 }
825 
826 /*
827  * Destroy any in core blocks past the truncation length.
828  * Called with the underlying vnode locked, which should prevent new dirty
829  * buffers from being queued.
830  */
831 int
832 vtruncbuf(vp, lbn, slpflag, slptimeo)
833 	struct vnode *vp;
834 	daddr_t lbn;
835 	int slpflag, slptimeo;
836 {
837 	struct buf *bp, *nbp;
838 	int s, error;
839 	voff_t off;
840 
841 	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
842 	simple_lock(&vp->v_interlock);
843 	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
844 	if (error) {
845 		return error;
846 	}
847 
848 	s = splbio();
849 
850 restart:
851 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
852 		nbp = LIST_NEXT(bp, b_vnbufs);
853 		if (bp->b_lblkno < lbn)
854 			continue;
855 		simple_lock(&bp->b_interlock);
856 		if (bp->b_flags & B_BUSY) {
857 			bp->b_flags |= B_WANTED;
858 			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
859 			    "vtruncbuf", slptimeo, &bp->b_interlock);
860 			if (error) {
861 				splx(s);
862 				return (error);
863 			}
864 			goto restart;
865 		}
866 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
867 		simple_unlock(&bp->b_interlock);
868 		brelse(bp);
869 	}
870 
871 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
872 		nbp = LIST_NEXT(bp, b_vnbufs);
873 		if (bp->b_lblkno < lbn)
874 			continue;
875 		simple_lock(&bp->b_interlock);
876 		if (bp->b_flags & B_BUSY) {
877 			bp->b_flags |= B_WANTED;
878 			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
879 			    "vtruncbuf", slptimeo, &bp->b_interlock);
880 			if (error) {
881 				splx(s);
882 				return (error);
883 			}
884 			goto restart;
885 		}
886 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
887 		simple_unlock(&bp->b_interlock);
888 		brelse(bp);
889 	}
890 
891 	splx(s);
892 
893 	return (0);
894 }
895 
896 void
897 vflushbuf(vp, sync)
898 	struct vnode *vp;
899 	int sync;
900 {
901 	struct buf *bp, *nbp;
902 	int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
903 	int s;
904 
905 	simple_lock(&vp->v_interlock);
906 	(void) VOP_PUTPAGES(vp, 0, 0, flags);
907 
908 loop:
909 	s = splbio();
910 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
911 		nbp = LIST_NEXT(bp, b_vnbufs);
912 		simple_lock(&bp->b_interlock);
913 		if ((bp->b_flags & B_BUSY)) {
914 			simple_unlock(&bp->b_interlock);
915 			continue;
916 		}
917 		if ((bp->b_flags & B_DELWRI) == 0)
918 			panic("vflushbuf: not dirty, bp %p", bp);
919 		bp->b_flags |= B_BUSY | B_VFLUSH;
920 		simple_unlock(&bp->b_interlock);
921 		splx(s);
922 		/*
923 		 * Wait for I/O associated with indirect blocks to complete,
924 		 * since there is no way to quickly wait for them below.
925 		 */
926 		if (bp->b_vp == vp || sync == 0)
927 			(void) bawrite(bp);
928 		else
929 			(void) bwrite(bp);
930 		goto loop;
931 	}
932 	if (sync == 0) {
933 		splx(s);
934 		return;
935 	}
936 	simple_lock(&global_v_numoutput_slock);
937 	while (vp->v_numoutput) {
938 		vp->v_flag |= VBWAIT;
939 		ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
940 			&global_v_numoutput_slock);
941 	}
942 	simple_unlock(&global_v_numoutput_slock);
943 	splx(s);
944 	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
945 		vprint("vflushbuf: dirty", vp);
946 		goto loop;
947 	}
948 }
949 
950 /*
951  * Associate a buffer with a vnode.
952  */
953 void
954 bgetvp(vp, bp)
955 	struct vnode *vp;
956 	struct buf *bp;
957 {
958 	int s;
959 
960 	if (bp->b_vp)
961 		panic("bgetvp: not free, bp %p", bp);
962 	VHOLD(vp);
963 	s = splbio();
964 	bp->b_vp = vp;
965 	if (vp->v_type == VBLK || vp->v_type == VCHR)
966 		bp->b_dev = vp->v_rdev;
967 	else
968 		bp->b_dev = NODEV;
969 	/*
970 	 * Insert onto list for new vnode.
971 	 */
972 	bufinsvn(bp, &vp->v_cleanblkhd);
973 	splx(s);
974 }
975 
976 /*
977  * Disassociate a buffer from a vnode.
978  */
979 void
980 brelvp(bp)
981 	struct buf *bp;
982 {
983 	struct vnode *vp;
984 	int s;
985 
986 	if (bp->b_vp == NULL)
987 		panic("brelvp: vp NULL, bp %p", bp);
988 
989 	s = splbio();
990 	vp = bp->b_vp;
991 	/*
992 	 * Delete from old vnode list, if on one.
993 	 */
994 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
995 		bufremvn(bp);
996 
997 	if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
998 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
999 		vp->v_flag &= ~VONWORKLST;
1000 		LIST_REMOVE(vp, v_synclist);
1001 	}
1002 
1003 	bp->b_vp = NULL;
1004 	HOLDRELE(vp);
1005 	splx(s);
1006 }
1007 
1008 /*
1009  * Reassign a buffer from one vnode to another.
1010  * Used to assign file specific control information
1011  * (indirect blocks) to the vnode to which they belong.
1012  *
1013  * This function must be called at splbio().
1014  */
1015 void
1016 reassignbuf(bp, newvp)
1017 	struct buf *bp;
1018 	struct vnode *newvp;
1019 {
1020 	struct buflists *listheadp;
1021 	int delay;
1022 
1023 	/*
1024 	 * Delete from old vnode list, if on one.
1025 	 */
1026 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1027 		bufremvn(bp);
1028 	/*
1029 	 * If dirty, put on list of dirty buffers;
1030 	 * otherwise insert onto list of clean buffers.
1031 	 */
1032 	if ((bp->b_flags & B_DELWRI) == 0) {
1033 		listheadp = &newvp->v_cleanblkhd;
1034 		if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1035 		    (newvp->v_flag & VONWORKLST) &&
1036 		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1037 			newvp->v_flag &= ~VONWORKLST;
1038 			LIST_REMOVE(newvp, v_synclist);
1039 		}
1040 	} else {
1041 		listheadp = &newvp->v_dirtyblkhd;
1042 		if ((newvp->v_flag & VONWORKLST) == 0) {
1043 			switch (newvp->v_type) {
1044 			case VDIR:
1045 				delay = dirdelay;
1046 				break;
1047 			case VBLK:
1048 				if (newvp->v_specmountpoint != NULL) {
1049 					delay = metadelay;
1050 					break;
1051 				}
1052 				/* fall through */
1053 			default:
1054 				delay = filedelay;
1055 				break;
1056 			}
1057 			if (!newvp->v_mount ||
1058 			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1059 				vn_syncer_add_to_worklist(newvp, delay);
1060 		}
1061 	}
1062 	bufinsvn(bp, listheadp);
1063 }
1064 
1065 /*
1066  * Create a vnode for a block device.
1067  * Used for root filesystem and swap areas.
1068  * Also used for memory file system special devices.
1069  */
1070 int
1071 bdevvp(dev, vpp)
1072 	dev_t dev;
1073 	struct vnode **vpp;
1074 {
1075 
1076 	return (getdevvp(dev, vpp, VBLK));
1077 }
1078 
1079 /*
1080  * Create a vnode for a character device.
1081  * Used for kernfs and some console handling.
1082  */
1083 int
1084 cdevvp(dev, vpp)
1085 	dev_t dev;
1086 	struct vnode **vpp;
1087 {
1088 
1089 	return (getdevvp(dev, vpp, VCHR));
1090 }
1091 
1092 /*
1093  * Create a vnode for a device.
1094  * Used by bdevvp (block device) for root file system etc.,
1095  * and by cdevvp (character device) for console and kernfs.
1096  */
1097 int
1098 getdevvp(dev, vpp, type)
1099 	dev_t dev;
1100 	struct vnode **vpp;
1101 	enum vtype type;
1102 {
1103 	struct vnode *vp;
1104 	struct vnode *nvp;
1105 	int error;
1106 
1107 	if (dev == NODEV) {
1108 		*vpp = NULLVP;
1109 		return (0);
1110 	}
1111 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1112 	if (error) {
1113 		*vpp = NULLVP;
1114 		return (error);
1115 	}
1116 	vp = nvp;
1117 	vp->v_type = type;
1118 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1119 		vput(vp);
1120 		vp = nvp;
1121 	}
1122 	*vpp = vp;
1123 	return (0);
1124 }
1125 
1126 /*
1127  * Check to see if the new vnode represents a special device
1128  * for which we already have a vnode (either because of
1129  * bdevvp() or because of a different vnode representing
1130  * the same block device). If such an alias exists, deallocate
1131  * the existing contents and return the aliased vnode. The
1132  * caller is responsible for filling it with its new contents.
1133  */
1134 struct vnode *
1135 checkalias(nvp, nvp_rdev, mp)
1136 	struct vnode *nvp;
1137 	dev_t nvp_rdev;
1138 	struct mount *mp;
1139 {
1140 	struct proc *p = curproc;       /* XXX */
1141 	struct vnode *vp;
1142 	struct vnode **vpp;
1143 
1144 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1145 		return (NULLVP);
1146 
1147 	vpp = &speclisth[SPECHASH(nvp_rdev)];
1148 loop:
1149 	simple_lock(&spechash_slock);
1150 	for (vp = *vpp; vp; vp = vp->v_specnext) {
1151 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1152 			continue;
1153 		/*
1154 		 * Alias, but not in use, so flush it out.
1155 		 */
1156 		simple_lock(&vp->v_interlock);
1157 		simple_unlock(&spechash_slock);
1158 		if (vp->v_usecount == 0) {
1159 			vgonel(vp, p);
1160 			goto loop;
1161 		}
1162 		/*
1163 		 * What we're interested to know here is if someone else has
1164 		 * removed this vnode from the device hash list while we were
1165 		 * waiting.  This can only happen if vclean() did it, and
1166 		 * this requires the vnode to be locked.  Therefore, we use
1167 		 * LK_SLEEPFAIL and retry.
1168 		 */
1169 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
1170 			goto loop;
1171 		simple_lock(&spechash_slock);
1172 		break;
1173 	}
1174 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1175 		MALLOC(nvp->v_specinfo, struct specinfo *,
1176 			sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1177 		/* XXX Erg. */
1178 		if (nvp->v_specinfo == NULL) {
1179 			simple_unlock(&spechash_slock);
1180 			uvm_wait("checkalias");
1181 			goto loop;
1182 		}
1183 
1184 		nvp->v_rdev = nvp_rdev;
1185 		nvp->v_hashchain = vpp;
1186 		nvp->v_specnext = *vpp;
1187 		nvp->v_specmountpoint = NULL;
1188 		simple_unlock(&spechash_slock);
1189 		nvp->v_speclockf = NULL;
1190 		simple_lock_init(&nvp->v_spec_cow_slock);
1191 		SLIST_INIT(&nvp->v_spec_cow_head);
1192 		nvp->v_spec_cow_req = 0;
1193 		nvp->v_spec_cow_count = 0;
1194 
1195 		*vpp = nvp;
1196 		if (vp != NULLVP) {
1197 			nvp->v_flag |= VALIASED;
1198 			vp->v_flag |= VALIASED;
1199 			vput(vp);
1200 		}
1201 		return (NULLVP);
1202 	}
1203 	simple_unlock(&spechash_slock);
1204 	VOP_UNLOCK(vp, 0);
1205 	simple_lock(&vp->v_interlock);
1206 	vclean(vp, 0, p);
1207 	vp->v_op = nvp->v_op;
1208 	vp->v_tag = nvp->v_tag;
1209 	vp->v_vnlock = &vp->v_lock;
1210 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1211 	nvp->v_type = VNON;
1212 	insmntque(vp, mp);
1213 	return (vp);
1214 }
1215 
1216 /*
1217  * Grab a particular vnode from the free list, increment its
1218  * reference count and lock it. If the vnode lock bit is set the
1219  * vnode is being eliminated in vgone. In that case, we can not
1220  * grab the vnode, so the process is awakened when the transition is
1221  * completed, and an error returned to indicate that the vnode is no
1222  * longer usable (possibly having been changed to a new file system type).
1223  */
1224 int
1225 vget(vp, flags)
1226 	struct vnode *vp;
1227 	int flags;
1228 {
1229 	int error;
1230 
1231 	/*
1232 	 * If the vnode is in the process of being cleaned out for
1233 	 * another use, we wait for the cleaning to finish and then
1234 	 * return failure. Cleaning is determined by checking that
1235 	 * the VXLOCK flag is set.
1236 	 */
1237 
1238 	if ((flags & LK_INTERLOCK) == 0)
1239 		simple_lock(&vp->v_interlock);
1240 	if (vp->v_flag & VXLOCK) {
1241 		if (flags & LK_NOWAIT) {
1242 			simple_unlock(&vp->v_interlock);
1243 			return EBUSY;
1244 		}
1245 		vp->v_flag |= VXWANT;
1246 		ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1247 		return (ENOENT);
1248 	}
1249 	if (vp->v_usecount == 0) {
1250 		simple_lock(&vnode_free_list_slock);
1251 		if (vp->v_holdcnt > 0)
1252 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1253 		else
1254 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1255 		simple_unlock(&vnode_free_list_slock);
1256 	}
1257 	vp->v_usecount++;
1258 #ifdef DIAGNOSTIC
1259 	if (vp->v_usecount == 0) {
1260 		vprint("vget", vp);
1261 		panic("vget: usecount overflow, vp %p", vp);
1262 	}
1263 #endif
1264 	if (flags & LK_TYPE_MASK) {
1265 		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1266 			/*
1267 			 * must expand vrele here because we do not want
1268 			 * to call VOP_INACTIVE if the reference count
1269 			 * drops back to zero since it was never really
1270 			 * active. We must remove it from the free list
1271 			 * before sleeping so that multiple processes do
1272 			 * not try to recycle it.
1273 			 */
1274 			simple_lock(&vp->v_interlock);
1275 			vp->v_usecount--;
1276 			if (vp->v_usecount > 0) {
1277 				simple_unlock(&vp->v_interlock);
1278 				return (error);
1279 			}
1280 			/*
1281 			 * insert at tail of LRU list
1282 			 */
1283 			simple_lock(&vnode_free_list_slock);
1284 			if (vp->v_holdcnt > 0)
1285 				TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1286 				    v_freelist);
1287 			else
1288 				TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1289 				    v_freelist);
1290 			simple_unlock(&vnode_free_list_slock);
1291 			simple_unlock(&vp->v_interlock);
1292 		}
1293 		return (error);
1294 	}
1295 	simple_unlock(&vp->v_interlock);
1296 	return (0);
1297 }
1298 
1299 /*
1300  * vput(), just unlock and vrele()
1301  */
1302 void
1303 vput(vp)
1304 	struct vnode *vp;
1305 {
1306 	struct proc *p = curproc;	/* XXX */
1307 
1308 #ifdef DIAGNOSTIC
1309 	if (vp == NULL)
1310 		panic("vput: null vp");
1311 #endif
1312 	simple_lock(&vp->v_interlock);
1313 	vp->v_usecount--;
1314 	if (vp->v_usecount > 0) {
1315 		simple_unlock(&vp->v_interlock);
1316 		VOP_UNLOCK(vp, 0);
1317 		return;
1318 	}
1319 #ifdef DIAGNOSTIC
1320 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1321 		vprint("vput: bad ref count", vp);
1322 		panic("vput: ref cnt");
1323 	}
1324 #endif
1325 	/*
1326 	 * Insert at tail of LRU list.
1327 	 */
1328 	simple_lock(&vnode_free_list_slock);
1329 	if (vp->v_holdcnt > 0)
1330 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1331 	else
1332 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1333 	simple_unlock(&vnode_free_list_slock);
1334 	if (vp->v_flag & VEXECMAP) {
1335 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1336 		uvmexp.filepages += vp->v_uobj.uo_npages;
1337 	}
1338 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1339 	simple_unlock(&vp->v_interlock);
1340 	VOP_INACTIVE(vp, p);
1341 }
1342 
1343 /*
1344  * Vnode release.
1345  * If count drops to zero, call inactive routine and return to freelist.
1346  */
1347 void
1348 vrele(vp)
1349 	struct vnode *vp;
1350 {
1351 	struct proc *p = curproc;	/* XXX */
1352 
1353 #ifdef DIAGNOSTIC
1354 	if (vp == NULL)
1355 		panic("vrele: null vp");
1356 #endif
1357 	simple_lock(&vp->v_interlock);
1358 	vp->v_usecount--;
1359 	if (vp->v_usecount > 0) {
1360 		simple_unlock(&vp->v_interlock);
1361 		return;
1362 	}
1363 #ifdef DIAGNOSTIC
1364 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1365 		vprint("vrele: bad ref count", vp);
1366 		panic("vrele: ref cnt vp %p", vp);
1367 	}
1368 #endif
1369 	/*
1370 	 * Insert at tail of LRU list.
1371 	 */
1372 	simple_lock(&vnode_free_list_slock);
1373 	if (vp->v_holdcnt > 0)
1374 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1375 	else
1376 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1377 	simple_unlock(&vnode_free_list_slock);
1378 	if (vp->v_flag & VEXECMAP) {
1379 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1380 		uvmexp.filepages += vp->v_uobj.uo_npages;
1381 	}
1382 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1383 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1384 		VOP_INACTIVE(vp, p);
1385 }
1386 
1387 #ifdef DIAGNOSTIC
1388 /*
1389  * Page or buffer structure gets a reference.
1390  */
1391 void
1392 vholdl(vp)
1393 	struct vnode *vp;
1394 {
1395 
1396 	/*
1397 	 * If it is on the freelist and the hold count is currently
1398 	 * zero, move it to the hold list. The test of the back
1399 	 * pointer and the use reference count of zero is because
1400 	 * it will be removed from a free list by getnewvnode,
1401 	 * but will not have its reference count incremented until
1402 	 * after calling vgone. If the reference count were
1403 	 * incremented first, vgone would (incorrectly) try to
1404 	 * close the previous instance of the underlying object.
1405 	 * So, the back pointer is explicitly set to `0xdeadb' in
1406 	 * getnewvnode after removing it from a freelist to ensure
1407 	 * that we do not try to move it here.
1408 	 */
1409 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1410 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1411 		simple_lock(&vnode_free_list_slock);
1412 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1413 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1414 		simple_unlock(&vnode_free_list_slock);
1415 	}
1416 	vp->v_holdcnt++;
1417 }
1418 
1419 /*
1420  * Page or buffer structure frees a reference.
1421  */
1422 void
1423 holdrelel(vp)
1424 	struct vnode *vp;
1425 {
1426 
1427 	if (vp->v_holdcnt <= 0)
1428 		panic("holdrelel: holdcnt vp %p", vp);
1429 	vp->v_holdcnt--;
1430 
1431 	/*
1432 	 * If it is on the holdlist and the hold count drops to
1433 	 * zero, move it to the free list. The test of the back
1434 	 * pointer and the use reference count of zero is because
1435 	 * it will be removed from a free list by getnewvnode,
1436 	 * but will not have its reference count incremented until
1437 	 * after calling vgone. If the reference count were
1438 	 * incremented first, vgone would (incorrectly) try to
1439 	 * close the previous instance of the underlying object.
1440 	 * So, the back pointer is explicitly set to `0xdeadb' in
1441 	 * getnewvnode after removing it from a freelist to ensure
1442 	 * that we do not try to move it here.
1443 	 */
1444 
1445 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1446 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1447 		simple_lock(&vnode_free_list_slock);
1448 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1449 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1450 		simple_unlock(&vnode_free_list_slock);
1451 	}
1452 }
1453 
1454 /*
1455  * Vnode reference.
1456  */
1457 void
1458 vref(vp)
1459 	struct vnode *vp;
1460 {
1461 
1462 	simple_lock(&vp->v_interlock);
1463 	if (vp->v_usecount <= 0)
1464 		panic("vref used where vget required, vp %p", vp);
1465 	vp->v_usecount++;
1466 #ifdef DIAGNOSTIC
1467 	if (vp->v_usecount == 0) {
1468 		vprint("vref", vp);
1469 		panic("vref: usecount overflow, vp %p", vp);
1470 	}
1471 #endif
1472 	simple_unlock(&vp->v_interlock);
1473 }
1474 #endif /* DIAGNOSTIC */
1475 
1476 /*
1477  * Remove any vnodes in the vnode table belonging to mount point mp.
1478  *
1479  * If FORCECLOSE is not specified, there should not be any active ones,
1480  * return error if any are found (nb: this is a user error, not a
1481  * system error). If FORCECLOSE is specified, detach any active vnodes
1482  * that are found.
1483  *
1484  * If WRITECLOSE is set, only flush out regular file vnodes open for
1485  * writing.
1486  *
1487  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1488  */
1489 #ifdef DEBUG
1490 int busyprt = 0;	/* print out busy vnodes */
1491 struct ctldebug debug1 = { "busyprt", &busyprt };
1492 #endif
1493 
1494 int
1495 vflush(mp, skipvp, flags)
1496 	struct mount *mp;
1497 	struct vnode *skipvp;
1498 	int flags;
1499 {
1500 	struct proc *p = curproc;	/* XXX */
1501 	struct vnode *vp, *nvp;
1502 	int busy = 0;
1503 
1504 	simple_lock(&mntvnode_slock);
1505 loop:
1506 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1507 		if (vp->v_mount != mp)
1508 			goto loop;
1509 		nvp = LIST_NEXT(vp, v_mntvnodes);
1510 		/*
1511 		 * Skip over a selected vnode.
1512 		 */
1513 		if (vp == skipvp)
1514 			continue;
1515 		simple_lock(&vp->v_interlock);
1516 		/*
1517 		 * Skip over a vnodes marked VSYSTEM.
1518 		 */
1519 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1520 			simple_unlock(&vp->v_interlock);
1521 			continue;
1522 		}
1523 		/*
1524 		 * If WRITECLOSE is set, only flush out regular file
1525 		 * vnodes open for writing.
1526 		 */
1527 		if ((flags & WRITECLOSE) &&
1528 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1529 			simple_unlock(&vp->v_interlock);
1530 			continue;
1531 		}
1532 		/*
1533 		 * With v_usecount == 0, all we need to do is clear
1534 		 * out the vnode data structures and we are done.
1535 		 */
1536 		if (vp->v_usecount == 0) {
1537 			simple_unlock(&mntvnode_slock);
1538 			vgonel(vp, p);
1539 			simple_lock(&mntvnode_slock);
1540 			continue;
1541 		}
1542 		/*
1543 		 * If FORCECLOSE is set, forcibly close the vnode.
1544 		 * For block or character devices, revert to an
1545 		 * anonymous device. For all other files, just kill them.
1546 		 */
1547 		if (flags & FORCECLOSE) {
1548 			simple_unlock(&mntvnode_slock);
1549 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1550 				vgonel(vp, p);
1551 			} else {
1552 				vclean(vp, 0, p);
1553 				vp->v_op = spec_vnodeop_p;
1554 				insmntque(vp, (struct mount *)0);
1555 			}
1556 			simple_lock(&mntvnode_slock);
1557 			continue;
1558 		}
1559 #ifdef DEBUG
1560 		if (busyprt)
1561 			vprint("vflush: busy vnode", vp);
1562 #endif
1563 		simple_unlock(&vp->v_interlock);
1564 		busy++;
1565 	}
1566 	simple_unlock(&mntvnode_slock);
1567 	if (busy)
1568 		return (EBUSY);
1569 	return (0);
1570 }
1571 
1572 /*
1573  * Disassociate the underlying file system from a vnode.
1574  */
1575 void
1576 vclean(vp, flags, p)
1577 	struct vnode *vp;
1578 	int flags;
1579 	struct proc *p;
1580 {
1581 	struct mount *mp;
1582 	int active;
1583 
1584 	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1585 
1586 	/*
1587 	 * Check to see if the vnode is in use.
1588 	 * If so we have to reference it before we clean it out
1589 	 * so that its count cannot fall to zero and generate a
1590 	 * race against ourselves to recycle it.
1591 	 */
1592 
1593 	if ((active = vp->v_usecount) != 0) {
1594 		vp->v_usecount++;
1595 #ifdef DIAGNOSTIC
1596 		if (vp->v_usecount == 0) {
1597 			vprint("vclean", vp);
1598 			panic("vclean: usecount overflow");
1599 		}
1600 #endif
1601 	}
1602 
1603 	/*
1604 	 * Prevent the vnode from being recycled or
1605 	 * brought into use while we clean it out.
1606 	 */
1607 	if (vp->v_flag & VXLOCK)
1608 		panic("vclean: deadlock, vp %p", vp);
1609 	vp->v_flag |= VXLOCK;
1610 	if (vp->v_flag & VEXECMAP) {
1611 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1612 		uvmexp.filepages += vp->v_uobj.uo_npages;
1613 	}
1614 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1615 
1616 	/*
1617 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1618 	 * have the object locked while it cleans it out. The VOP_LOCK
1619 	 * ensures that the VOP_INACTIVE routine is done with its work.
1620 	 * For active vnodes, it ensures that no other activity can
1621 	 * occur while the underlying object is being cleaned out.
1622 	 */
1623 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1624 
1625 	/*
1626 	 * Clean out any cached data associated with the vnode.
1627 	 * If special device, remove it from special device alias list.
1628 	 * if it is on one.
1629 	 */
1630 	if (flags & DOCLOSE) {
1631 		int error;
1632 		struct vnode *vq, *vx;
1633 
1634 		vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1635 		error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1636 		vn_finished_write(mp, V_LOWER);
1637 		if (error)
1638 			error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
1639 		KASSERT(error == 0);
1640 		KASSERT((vp->v_flag & VONWORKLST) == 0);
1641 
1642 		if (active)
1643 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1644 
1645 		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1646 		    vp->v_specinfo != 0) {
1647 			simple_lock(&spechash_slock);
1648 			if (vp->v_hashchain != NULL) {
1649 				if (*vp->v_hashchain == vp) {
1650 					*vp->v_hashchain = vp->v_specnext;
1651 				} else {
1652 					for (vq = *vp->v_hashchain; vq;
1653 					     vq = vq->v_specnext) {
1654 						if (vq->v_specnext != vp)
1655 							continue;
1656 						vq->v_specnext = vp->v_specnext;
1657 						break;
1658 					}
1659 					if (vq == NULL)
1660 						panic("missing bdev");
1661 				}
1662 				if (vp->v_flag & VALIASED) {
1663 					vx = NULL;
1664 						for (vq = *vp->v_hashchain; vq;
1665 						     vq = vq->v_specnext) {
1666 						if (vq->v_rdev != vp->v_rdev ||
1667 						    vq->v_type != vp->v_type)
1668 							continue;
1669 						if (vx)
1670 							break;
1671 						vx = vq;
1672 					}
1673 					if (vx == NULL)
1674 						panic("missing alias");
1675 					if (vq == NULL)
1676 						vx->v_flag &= ~VALIASED;
1677 					vp->v_flag &= ~VALIASED;
1678 				}
1679 			}
1680 			simple_unlock(&spechash_slock);
1681 			FREE(vp->v_specinfo, M_VNODE);
1682 			vp->v_specinfo = NULL;
1683 		}
1684 	}
1685 	LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1686 
1687 	/*
1688 	 * If purging an active vnode, it must be closed and
1689 	 * deactivated before being reclaimed. Note that the
1690 	 * VOP_INACTIVE will unlock the vnode.
1691 	 */
1692 	if (active) {
1693 		VOP_INACTIVE(vp, p);
1694 	} else {
1695 		/*
1696 		 * Any other processes trying to obtain this lock must first
1697 		 * wait for VXLOCK to clear, then call the new lock operation.
1698 		 */
1699 		VOP_UNLOCK(vp, 0);
1700 	}
1701 	/*
1702 	 * Reclaim the vnode.
1703 	 */
1704 	if (VOP_RECLAIM(vp, p))
1705 		panic("vclean: cannot reclaim, vp %p", vp);
1706 	if (active) {
1707 		/*
1708 		 * Inline copy of vrele() since VOP_INACTIVE
1709 		 * has already been called.
1710 		 */
1711 		simple_lock(&vp->v_interlock);
1712 		if (--vp->v_usecount <= 0) {
1713 #ifdef DIAGNOSTIC
1714 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1715 				vprint("vclean: bad ref count", vp);
1716 				panic("vclean: ref cnt");
1717 			}
1718 #endif
1719 			/*
1720 			 * Insert at tail of LRU list.
1721 			 */
1722 
1723 			simple_unlock(&vp->v_interlock);
1724 			simple_lock(&vnode_free_list_slock);
1725 #ifdef DIAGNOSTIC
1726 			if (vp->v_holdcnt > 0)
1727 				panic("vclean: not clean, vp %p", vp);
1728 #endif
1729 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1730 			simple_unlock(&vnode_free_list_slock);
1731 		} else
1732 			simple_unlock(&vp->v_interlock);
1733 	}
1734 
1735 	KASSERT(vp->v_uobj.uo_npages == 0);
1736 	cache_purge(vp);
1737 
1738 	/*
1739 	 * Done with purge, notify sleepers of the grim news.
1740 	 */
1741 	vp->v_op = dead_vnodeop_p;
1742 	vp->v_tag = VT_NON;
1743 	simple_lock(&vp->v_interlock);
1744 	VN_KNOTE(vp, NOTE_REVOKE);	/* FreeBSD has this in vn_pollgone() */
1745 	vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1746 	if (vp->v_flag & VXWANT) {
1747 		vp->v_flag &= ~VXWANT;
1748 		simple_unlock(&vp->v_interlock);
1749 		wakeup((caddr_t)vp);
1750 	} else
1751 		simple_unlock(&vp->v_interlock);
1752 }
1753 
1754 /*
1755  * Recycle an unused vnode to the front of the free list.
1756  * Release the passed interlock if the vnode will be recycled.
1757  */
1758 int
1759 vrecycle(vp, inter_lkp, p)
1760 	struct vnode *vp;
1761 	struct simplelock *inter_lkp;
1762 	struct proc *p;
1763 {
1764 
1765 	simple_lock(&vp->v_interlock);
1766 	if (vp->v_usecount == 0) {
1767 		if (inter_lkp)
1768 			simple_unlock(inter_lkp);
1769 		vgonel(vp, p);
1770 		return (1);
1771 	}
1772 	simple_unlock(&vp->v_interlock);
1773 	return (0);
1774 }
1775 
1776 /*
1777  * Eliminate all activity associated with a vnode
1778  * in preparation for reuse.
1779  */
1780 void
1781 vgone(vp)
1782 	struct vnode *vp;
1783 {
1784 	struct proc *p = curproc;	/* XXX */
1785 
1786 	simple_lock(&vp->v_interlock);
1787 	vgonel(vp, p);
1788 }
1789 
1790 /*
1791  * vgone, with the vp interlock held.
1792  */
1793 void
1794 vgonel(vp, p)
1795 	struct vnode *vp;
1796 	struct proc *p;
1797 {
1798 
1799 	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1800 
1801 	/*
1802 	 * If a vgone (or vclean) is already in progress,
1803 	 * wait until it is done and return.
1804 	 */
1805 
1806 	if (vp->v_flag & VXLOCK) {
1807 		vp->v_flag |= VXWANT;
1808 		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1809 		return;
1810 	}
1811 
1812 	/*
1813 	 * Clean out the filesystem specific data.
1814 	 */
1815 
1816 	vclean(vp, DOCLOSE, p);
1817 	KASSERT((vp->v_flag & VONWORKLST) == 0);
1818 
1819 	/*
1820 	 * Delete from old mount point vnode list, if on one.
1821 	 */
1822 
1823 	if (vp->v_mount != NULL)
1824 		insmntque(vp, (struct mount *)0);
1825 
1826 	/*
1827 	 * The test of the back pointer and the reference count of
1828 	 * zero is because it will be removed from the free list by
1829 	 * getcleanvnode, but will not have its reference count
1830 	 * incremented until after calling vgone. If the reference
1831 	 * count were incremented first, vgone would (incorrectly)
1832 	 * try to close the previous instance of the underlying object.
1833 	 * So, the back pointer is explicitly set to `0xdeadb' in
1834 	 * getnewvnode after removing it from the freelist to ensure
1835 	 * that we do not try to move it here.
1836 	 */
1837 
1838 	vp->v_type = VBAD;
1839 	if (vp->v_usecount == 0) {
1840 		boolean_t dofree;
1841 
1842 		simple_lock(&vnode_free_list_slock);
1843 		if (vp->v_holdcnt > 0)
1844 			panic("vgonel: not clean, vp %p", vp);
1845 		/*
1846 		 * if it isn't on the freelist, we're called by getcleanvnode
1847 		 * and vnode is being re-used.  otherwise, we'll free it.
1848 		 */
1849 		dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1850 		if (dofree) {
1851 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1852 			numvnodes--;
1853 		}
1854 		simple_unlock(&vnode_free_list_slock);
1855 		if (dofree)
1856 			pool_put(&vnode_pool, vp);
1857 	}
1858 }
1859 
1860 /*
1861  * Lookup a vnode by device number.
1862  */
1863 int
1864 vfinddev(dev, type, vpp)
1865 	dev_t dev;
1866 	enum vtype type;
1867 	struct vnode **vpp;
1868 {
1869 	struct vnode *vp;
1870 	int rc = 0;
1871 
1872 	simple_lock(&spechash_slock);
1873 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1874 		if (dev != vp->v_rdev || type != vp->v_type)
1875 			continue;
1876 		*vpp = vp;
1877 		rc = 1;
1878 		break;
1879 	}
1880 	simple_unlock(&spechash_slock);
1881 	return (rc);
1882 }
1883 
1884 /*
1885  * Revoke all the vnodes corresponding to the specified minor number
1886  * range (endpoints inclusive) of the specified major.
1887  */
1888 void
1889 vdevgone(maj, minl, minh, type)
1890 	int maj, minl, minh;
1891 	enum vtype type;
1892 {
1893 	struct vnode *vp;
1894 	int mn;
1895 
1896 	for (mn = minl; mn <= minh; mn++)
1897 		if (vfinddev(makedev(maj, mn), type, &vp))
1898 			VOP_REVOKE(vp, REVOKEALL);
1899 }
1900 
1901 /*
1902  * Calculate the total number of references to a special device.
1903  */
1904 int
1905 vcount(vp)
1906 	struct vnode *vp;
1907 {
1908 	struct vnode *vq, *vnext;
1909 	int count;
1910 
1911 loop:
1912 	if ((vp->v_flag & VALIASED) == 0)
1913 		return (vp->v_usecount);
1914 	simple_lock(&spechash_slock);
1915 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1916 		vnext = vq->v_specnext;
1917 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1918 			continue;
1919 		/*
1920 		 * Alias, but not in use, so flush it out.
1921 		 */
1922 		if (vq->v_usecount == 0 && vq != vp &&
1923 		    (vq->v_flag & VXLOCK) == 0) {
1924 			simple_unlock(&spechash_slock);
1925 			vgone(vq);
1926 			goto loop;
1927 		}
1928 		count += vq->v_usecount;
1929 	}
1930 	simple_unlock(&spechash_slock);
1931 	return (count);
1932 }
1933 
1934 /*
1935  * Print out a description of a vnode.
1936  */
1937 const char * const vnode_types[] = {
1938 	"VNON",
1939 	"VREG",
1940 	"VDIR",
1941 	"VBLK",
1942 	"VCHR",
1943 	"VLNK",
1944 	"VSOCK",
1945 	"VFIFO",
1946 	"VBAD"
1947 };
1948 
1949 void
1950 vprint(label, vp)
1951 	char *label;
1952 	struct vnode *vp;
1953 {
1954 	char buf[96];
1955 
1956 	if (label != NULL)
1957 		printf("%s: ", label);
1958 	printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1959 	    vp->v_tag, vnode_types[vp->v_type],
1960 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1961 	buf[0] = '\0';
1962 	if (vp->v_flag & VROOT)
1963 		strlcat(buf, "|VROOT", sizeof(buf));
1964 	if (vp->v_flag & VTEXT)
1965 		strlcat(buf, "|VTEXT", sizeof(buf));
1966 	if (vp->v_flag & VEXECMAP)
1967 		strlcat(buf, "|VEXECMAP", sizeof(buf));
1968 	if (vp->v_flag & VSYSTEM)
1969 		strlcat(buf, "|VSYSTEM", sizeof(buf));
1970 	if (vp->v_flag & VXLOCK)
1971 		strlcat(buf, "|VXLOCK", sizeof(buf));
1972 	if (vp->v_flag & VXWANT)
1973 		strlcat(buf, "|VXWANT", sizeof(buf));
1974 	if (vp->v_flag & VBWAIT)
1975 		strlcat(buf, "|VBWAIT", sizeof(buf));
1976 	if (vp->v_flag & VALIASED)
1977 		strlcat(buf, "|VALIASED", sizeof(buf));
1978 	if (buf[0] != '\0')
1979 		printf(" flags (%s)", &buf[1]);
1980 	if (vp->v_data == NULL) {
1981 		printf("\n");
1982 	} else {
1983 		printf("\n\t");
1984 		VOP_PRINT(vp);
1985 	}
1986 }
1987 
1988 #ifdef DEBUG
1989 /*
1990  * List all of the locked vnodes in the system.
1991  * Called when debugging the kernel.
1992  */
1993 void
1994 printlockedvnodes()
1995 {
1996 	struct mount *mp, *nmp;
1997 	struct vnode *vp;
1998 
1999 	printf("Locked vnodes\n");
2000 	simple_lock(&mountlist_slock);
2001 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2002 	     mp = nmp) {
2003 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
2004 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
2005 			continue;
2006 		}
2007 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2008 			if (VOP_ISLOCKED(vp))
2009 				vprint(NULL, vp);
2010 		}
2011 		simple_lock(&mountlist_slock);
2012 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
2013 		vfs_unbusy(mp);
2014 	}
2015 	simple_unlock(&mountlist_slock);
2016 }
2017 #endif
2018 
2019 /*
2020  * sysctl helper routine for vfs.generic.conf lookups.
2021  */
2022 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2023 static int
2024 sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
2025 {
2026         struct vfsconf vfc;
2027         extern const char * const mountcompatnames[];
2028         extern int nmountcompatnames;
2029 	struct sysctlnode node;
2030 	struct vfsops *vfsp;
2031 	u_int vfsnum;
2032 
2033 	if (namelen != 1)
2034 		return (ENOTDIR);
2035 	vfsnum = name[0];
2036 	if (vfsnum >= nmountcompatnames ||
2037 	    mountcompatnames[vfsnum] == NULL)
2038 		return (EOPNOTSUPP);
2039 	vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
2040 	if (vfsp == NULL)
2041 		return (EOPNOTSUPP);
2042 
2043 	vfc.vfc_vfsops = vfsp;
2044 	strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
2045 	vfc.vfc_typenum = vfsnum;
2046 	vfc.vfc_refcount = vfsp->vfs_refcount;
2047 	vfc.vfc_flags = 0;
2048 	vfc.vfc_mountroot = vfsp->vfs_mountroot;
2049 	vfc.vfc_next = NULL;
2050 
2051 	node = *rnode;
2052 	node.sysctl_data = &vfc;
2053 	return (sysctl_lookup(SYSCTLFN_CALL(&node)));
2054 }
2055 #endif
2056 
2057 /*
2058  * sysctl helper routine to return list of supported fstypes
2059  */
2060 static int
2061 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2062 {
2063 	char buf[MFSNAMELEN];
2064 	char *where = oldp;
2065 	struct vfsops *v;
2066 	size_t needed, left, slen;
2067 	int error, first;
2068 
2069 	if (newp != NULL)
2070 		return (EPERM);
2071 	if (namelen != 0)
2072 		return (EINVAL);
2073 
2074 	first = 1;
2075 	error = 0;
2076 	needed = 0;
2077 	left = *oldlenp;
2078 
2079 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2080 		if (where == NULL)
2081 			needed += strlen(v->vfs_name) + 1;
2082 		else {
2083 			memset(buf, 0, sizeof(buf));
2084 			if (first) {
2085 				strncpy(buf, v->vfs_name, sizeof(buf));
2086 				first = 0;
2087 			} else {
2088 				buf[0] = ' ';
2089 				strncpy(buf + 1, v->vfs_name, sizeof(buf) - 1);
2090 			}
2091 			buf[sizeof(buf)-1] = '\0';
2092 			slen = strlen(buf);
2093 			if (left < slen + 1)
2094 				break;
2095 			/* +1 to copy out the trailing NUL byte */
2096 			error = copyout(buf, where, slen + 1);
2097 			if (error)
2098 				break;
2099 			where += slen;
2100 			needed += slen;
2101 			left -= slen;
2102 		}
2103 	}
2104 	*oldlenp = needed;
2105 	return (error);
2106 }
2107 
2108 /*
2109  * Top level filesystem related information gathering.
2110  */
2111 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
2112 {
2113 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2114 	extern int nmountcompatnames;
2115 #endif
2116 
2117 	sysctl_createv(clog, 0, NULL, NULL,
2118 		       CTLFLAG_PERMANENT,
2119 		       CTLTYPE_NODE, "vfs", NULL,
2120 		       NULL, 0, NULL, 0,
2121 		       CTL_VFS, CTL_EOL);
2122 	sysctl_createv(clog, 0, NULL, NULL,
2123 		       CTLFLAG_PERMANENT,
2124 		       CTLTYPE_NODE, "generic",
2125 		       SYSCTL_DESCR("Non-specific vfs related information"),
2126 		       NULL, 0, NULL, 0,
2127 		       CTL_VFS, VFS_GENERIC, CTL_EOL);
2128 
2129 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2130 	sysctl_createv(clog, 0, NULL, NULL,
2131 		       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
2132 		       CTLTYPE_INT, "maxtypenum",
2133 		       SYSCTL_DESCR("Highest valid filesystem type number"),
2134 		       NULL, nmountcompatnames, NULL, 0,
2135 		       CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
2136 #endif
2137 	sysctl_createv(clog, 0, NULL, NULL,
2138 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2139 		       CTLTYPE_INT, "usermount",
2140 		       SYSCTL_DESCR("Whether unprivileged users may mount "
2141 				    "filesystems"),
2142 		       NULL, 0, &dovfsusermount, 0,
2143 		       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
2144 	sysctl_createv(clog, 0, NULL, NULL,
2145 		       CTLFLAG_PERMANENT,
2146 		       CTLTYPE_STRING, "fstypes",
2147 		       SYSCTL_DESCR("List of file systems present"),
2148 		       sysctl_vfs_generic_fstypes, 0, NULL, 0,
2149 		       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
2150 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2151 	sysctl_createv(clog, 0, NULL, NULL,
2152 		       CTLFLAG_PERMANENT,
2153 		       CTLTYPE_STRUCT, "conf",
2154 		       SYSCTL_DESCR("Filesystem configuration information"),
2155 		       sysctl_vfs_generic_conf, 0, NULL,
2156 		       sizeof(struct vfsconf),
2157 		       CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
2158 #endif
2159 }
2160 
2161 
2162 int kinfo_vdebug = 1;
2163 int kinfo_vgetfailed;
2164 #define KINFO_VNODESLOP	10
2165 /*
2166  * Dump vnode list (via sysctl).
2167  * Copyout address of vnode followed by vnode.
2168  */
2169 /* ARGSUSED */
2170 int
2171 sysctl_kern_vnode(SYSCTLFN_ARGS)
2172 {
2173 	char *where = oldp;
2174 	size_t *sizep = oldlenp;
2175 	struct mount *mp, *nmp;
2176 	struct vnode *nvp, *vp;
2177 	char *bp = where, *savebp;
2178 	char *ewhere;
2179 	int error;
2180 
2181 	if (namelen != 0)
2182 		return (EOPNOTSUPP);
2183 	if (newp != NULL)
2184 		return (EPERM);
2185 
2186 #define VPTRSZ	sizeof(struct vnode *)
2187 #define VNODESZ	sizeof(struct vnode)
2188 	if (where == NULL) {
2189 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2190 		return (0);
2191 	}
2192 	ewhere = where + *sizep;
2193 
2194 	simple_lock(&mountlist_slock);
2195 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2196 	     mp = nmp) {
2197 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
2198 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
2199 			continue;
2200 		}
2201 		savebp = bp;
2202 again:
2203 		simple_lock(&mntvnode_slock);
2204 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
2205 		     vp != NULL;
2206 		     vp = nvp) {
2207 			/*
2208 			 * Check that the vp is still associated with
2209 			 * this filesystem.  RACE: could have been
2210 			 * recycled onto the same filesystem.
2211 			 */
2212 			if (vp->v_mount != mp) {
2213 				simple_unlock(&mntvnode_slock);
2214 				if (kinfo_vdebug)
2215 					printf("kinfo: vp changed\n");
2216 				bp = savebp;
2217 				goto again;
2218 			}
2219 			nvp = LIST_NEXT(vp, v_mntvnodes);
2220 			if (bp + VPTRSZ + VNODESZ > ewhere) {
2221 				simple_unlock(&mntvnode_slock);
2222 				*sizep = bp - where;
2223 				return (ENOMEM);
2224 			}
2225 			simple_unlock(&mntvnode_slock);
2226 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2227 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2228 				return (error);
2229 			bp += VPTRSZ + VNODESZ;
2230 			simple_lock(&mntvnode_slock);
2231 		}
2232 		simple_unlock(&mntvnode_slock);
2233 		simple_lock(&mountlist_slock);
2234 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
2235 		vfs_unbusy(mp);
2236 	}
2237 	simple_unlock(&mountlist_slock);
2238 
2239 	*sizep = bp - where;
2240 	return (0);
2241 }
2242 
2243 /*
2244  * Check to see if a filesystem is mounted on a block device.
2245  */
2246 int
2247 vfs_mountedon(vp)
2248 	struct vnode *vp;
2249 {
2250 	struct vnode *vq;
2251 	int error = 0;
2252 
2253 	if (vp->v_specmountpoint != NULL)
2254 		return (EBUSY);
2255 	if (vp->v_flag & VALIASED) {
2256 		simple_lock(&spechash_slock);
2257 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2258 			if (vq->v_rdev != vp->v_rdev ||
2259 			    vq->v_type != vp->v_type)
2260 				continue;
2261 			if (vq->v_specmountpoint != NULL) {
2262 				error = EBUSY;
2263 				break;
2264 			}
2265 		}
2266 		simple_unlock(&spechash_slock);
2267 	}
2268 	return (error);
2269 }
2270 
2271 static int
2272 sacheck(struct sockaddr *sa)
2273 {
2274 	switch (sa->sa_family) {
2275 #ifdef INET
2276 	case AF_INET: {
2277 		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2278 		char *p = (char *)sin->sin_zero;
2279 		size_t i;
2280 
2281 		if (sin->sin_len != sizeof(*sin))
2282 			return -1;
2283 		if (sin->sin_port != 0)
2284 			return -1;
2285 		for (i = 0; i < sizeof(sin->sin_zero); i++)
2286 			if (*p++ != '\0')
2287 				return -1;
2288 		return 0;
2289 	}
2290 #endif
2291 #ifdef INET6
2292 	case AF_INET6: {
2293 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2294 
2295 		if (sin6->sin6_len != sizeof(*sin6))
2296 			return -1;
2297 		if (sin6->sin6_port != 0)
2298 			return -1;
2299 		return 0;
2300 	}
2301 #endif
2302 	default:
2303 		return -1;
2304 	}
2305 }
2306 
2307 /*
2308  * Build hash lists of net addresses and hang them off the mount point.
2309  * Called by ufs_mount() to set up the lists of export addresses.
2310  */
2311 static int
2312 vfs_hang_addrlist(mp, nep, argp)
2313 	struct mount *mp;
2314 	struct netexport *nep;
2315 	struct export_args *argp;
2316 {
2317 	struct netcred *np, *enp;
2318 	struct radix_node_head *rnh;
2319 	int i;
2320 	struct sockaddr *saddr, *smask = 0;
2321 	struct domain *dom;
2322 	int error;
2323 
2324 	if (argp->ex_addrlen == 0) {
2325 		if (mp->mnt_flag & MNT_DEFEXPORTED)
2326 			return (EPERM);
2327 		np = &nep->ne_defexported;
2328 		np->netc_exflags = argp->ex_flags;
2329 		crcvt(&np->netc_anon, &argp->ex_anon);
2330 		np->netc_anon.cr_ref = 1;
2331 		mp->mnt_flag |= MNT_DEFEXPORTED;
2332 		return (0);
2333 	}
2334 
2335 	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
2336 		return (EINVAL);
2337 
2338 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2339 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
2340 	memset((caddr_t)np, 0, i);
2341 	saddr = (struct sockaddr *)(np + 1);
2342 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2343 	if (error)
2344 		goto out;
2345 	if (saddr->sa_len > argp->ex_addrlen)
2346 		saddr->sa_len = argp->ex_addrlen;
2347 	if (sacheck(saddr) == -1)
2348 		return EINVAL;
2349 	if (argp->ex_masklen) {
2350 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2351 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
2352 		if (error)
2353 			goto out;
2354 		if (smask->sa_len > argp->ex_masklen)
2355 			smask->sa_len = argp->ex_masklen;
2356 		if (smask->sa_family != saddr->sa_family)
2357 			return EINVAL;
2358 		if (sacheck(smask) == -1)
2359 			return EINVAL;
2360 	}
2361 	i = saddr->sa_family;
2362 	if ((rnh = nep->ne_rtable[i]) == 0) {
2363 		/*
2364 		 * Seems silly to initialize every AF when most are not
2365 		 * used, do so on demand here
2366 		 */
2367 		for (dom = domains; dom; dom = dom->dom_next)
2368 			if (dom->dom_family == i && dom->dom_rtattach) {
2369 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
2370 					dom->dom_rtoffset);
2371 				break;
2372 			}
2373 		if ((rnh = nep->ne_rtable[i]) == 0) {
2374 			error = ENOBUFS;
2375 			goto out;
2376 		}
2377 	}
2378 
2379 	enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
2380 	    np->netc_rnodes);
2381 	if (enp != np) {
2382 		if (enp == NULL) {
2383 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2384 			    smask, rnh);
2385 			if (enp == NULL) {
2386 				error = EPERM;
2387 				goto out;
2388 			}
2389 		} else
2390 			enp->netc_refcnt++;
2391 
2392 		goto check;
2393 	} else
2394 		enp->netc_refcnt = 1;
2395 
2396 	np->netc_exflags = argp->ex_flags;
2397 	crcvt(&np->netc_anon, &argp->ex_anon);
2398 	np->netc_anon.cr_ref = 1;
2399 	return 0;
2400 check:
2401 	if (enp->netc_exflags != argp->ex_flags ||
2402 	    crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
2403 		error = EPERM;
2404 	else
2405 		error = 0;
2406 out:
2407 	free(np, M_NETADDR);
2408 	return error;
2409 }
2410 
2411 /* ARGSUSED */
2412 static int
2413 vfs_free_netcred(rn, w)
2414 	struct radix_node *rn;
2415 	void *w;
2416 {
2417 	struct radix_node_head *rnh = (struct radix_node_head *)w;
2418 	struct netcred *np = (struct netcred *)(void *)rn;
2419 
2420 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2421 	if (--(np->netc_refcnt) <= 0)
2422 		free(np, M_NETADDR);
2423 	return (0);
2424 }
2425 
2426 /*
2427  * Free the net address hash lists that are hanging off the mount points.
2428  */
2429 static void
2430 vfs_free_addrlist(nep)
2431 	struct netexport *nep;
2432 {
2433 	int i;
2434 	struct radix_node_head *rnh;
2435 
2436 	for (i = 0; i <= AF_MAX; i++)
2437 		if ((rnh = nep->ne_rtable[i]) != NULL) {
2438 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
2439 			free((caddr_t)rnh, M_RTABLE);
2440 			nep->ne_rtable[i] = 0;
2441 		}
2442 }
2443 
2444 int
2445 vfs_export(mp, nep, argp)
2446 	struct mount *mp;
2447 	struct netexport *nep;
2448 	struct export_args *argp;
2449 {
2450 	int error;
2451 
2452 	if (argp->ex_flags & MNT_DELEXPORT) {
2453 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2454 			vfs_setpublicfs(NULL, NULL, NULL);
2455 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2456 		}
2457 		vfs_free_addrlist(nep);
2458 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2459 	}
2460 	if (argp->ex_flags & MNT_EXPORTED) {
2461 		if (argp->ex_flags & MNT_EXPUBLIC) {
2462 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2463 				return (error);
2464 			mp->mnt_flag |= MNT_EXPUBLIC;
2465 		}
2466 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
2467 			return (error);
2468 		mp->mnt_flag |= MNT_EXPORTED;
2469 	}
2470 	return (0);
2471 }
2472 
2473 /*
2474  * Set the publicly exported filesystem (WebNFS). Currently, only
2475  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2476  */
2477 int
2478 vfs_setpublicfs(mp, nep, argp)
2479 	struct mount *mp;
2480 	struct netexport *nep;
2481 	struct export_args *argp;
2482 {
2483 	int error;
2484 	struct vnode *rvp;
2485 	char *cp;
2486 
2487 	/*
2488 	 * mp == NULL -> invalidate the current info, the FS is
2489 	 * no longer exported. May be called from either vfs_export
2490 	 * or unmount, so check if it hasn't already been done.
2491 	 */
2492 	if (mp == NULL) {
2493 		if (nfs_pub.np_valid) {
2494 			nfs_pub.np_valid = 0;
2495 			if (nfs_pub.np_index != NULL) {
2496 				FREE(nfs_pub.np_index, M_TEMP);
2497 				nfs_pub.np_index = NULL;
2498 			}
2499 		}
2500 		return (0);
2501 	}
2502 
2503 	/*
2504 	 * Only one allowed at a time.
2505 	 */
2506 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2507 		return (EBUSY);
2508 
2509 	/*
2510 	 * Get real filehandle for root of exported FS.
2511 	 */
2512 	memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
2513 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsidx;
2514 
2515 	if ((error = VFS_ROOT(mp, &rvp)))
2516 		return (error);
2517 
2518 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2519 		return (error);
2520 
2521 	vput(rvp);
2522 
2523 	/*
2524 	 * If an indexfile was specified, pull it in.
2525 	 */
2526 	if (argp->ex_indexfile != NULL) {
2527 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2528 		    M_WAITOK);
2529 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2530 		    MAXNAMLEN, (size_t *)0);
2531 		if (!error) {
2532 			/*
2533 			 * Check for illegal filenames.
2534 			 */
2535 			for (cp = nfs_pub.np_index; *cp; cp++) {
2536 				if (*cp == '/') {
2537 					error = EINVAL;
2538 					break;
2539 				}
2540 			}
2541 		}
2542 		if (error) {
2543 			FREE(nfs_pub.np_index, M_TEMP);
2544 			return (error);
2545 		}
2546 	}
2547 
2548 	nfs_pub.np_mount = mp;
2549 	nfs_pub.np_valid = 1;
2550 	return (0);
2551 }
2552 
2553 struct netcred *
2554 vfs_export_lookup(mp, nep, nam)
2555 	struct mount *mp;
2556 	struct netexport *nep;
2557 	struct mbuf *nam;
2558 {
2559 	struct netcred *np;
2560 	struct radix_node_head *rnh;
2561 	struct sockaddr *saddr;
2562 
2563 	np = NULL;
2564 	if (mp->mnt_flag & MNT_EXPORTED) {
2565 		/*
2566 		 * Lookup in the export list first.
2567 		 */
2568 		if (nam != NULL) {
2569 			saddr = mtod(nam, struct sockaddr *);
2570 			rnh = nep->ne_rtable[saddr->sa_family];
2571 			if (rnh != NULL) {
2572 				np = (struct netcred *)
2573 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2574 							      rnh);
2575 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2576 					np = NULL;
2577 			}
2578 		}
2579 		/*
2580 		 * If no address match, use the default if it exists.
2581 		 */
2582 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2583 			np = &nep->ne_defexported;
2584 	}
2585 	return (np);
2586 }
2587 
2588 /*
2589  * Do the usual access checking.
2590  * file_mode, uid and gid are from the vnode in question,
2591  * while acc_mode and cred are from the VOP_ACCESS parameter list
2592  */
2593 int
2594 vaccess(type, file_mode, uid, gid, acc_mode, cred)
2595 	enum vtype type;
2596 	mode_t file_mode;
2597 	uid_t uid;
2598 	gid_t gid;
2599 	mode_t acc_mode;
2600 	struct ucred *cred;
2601 {
2602 	mode_t mask;
2603 
2604 	/*
2605 	 * Super-user always gets read/write access, but execute access depends
2606 	 * on at least one execute bit being set.
2607 	 */
2608 	if (cred->cr_uid == 0) {
2609 		if ((acc_mode & VEXEC) && type != VDIR &&
2610 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2611 			return (EACCES);
2612 		return (0);
2613 	}
2614 
2615 	mask = 0;
2616 
2617 	/* Otherwise, check the owner. */
2618 	if (cred->cr_uid == uid) {
2619 		if (acc_mode & VEXEC)
2620 			mask |= S_IXUSR;
2621 		if (acc_mode & VREAD)
2622 			mask |= S_IRUSR;
2623 		if (acc_mode & VWRITE)
2624 			mask |= S_IWUSR;
2625 		return ((file_mode & mask) == mask ? 0 : EACCES);
2626 	}
2627 
2628 	/* Otherwise, check the groups. */
2629 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
2630 		if (acc_mode & VEXEC)
2631 			mask |= S_IXGRP;
2632 		if (acc_mode & VREAD)
2633 			mask |= S_IRGRP;
2634 		if (acc_mode & VWRITE)
2635 			mask |= S_IWGRP;
2636 		return ((file_mode & mask) == mask ? 0 : EACCES);
2637 	}
2638 
2639 	/* Otherwise, check everyone else. */
2640 	if (acc_mode & VEXEC)
2641 		mask |= S_IXOTH;
2642 	if (acc_mode & VREAD)
2643 		mask |= S_IROTH;
2644 	if (acc_mode & VWRITE)
2645 		mask |= S_IWOTH;
2646 	return ((file_mode & mask) == mask ? 0 : EACCES);
2647 }
2648 
2649 /*
2650  * Unmount all file systems.
2651  * We traverse the list in reverse order under the assumption that doing so
2652  * will avoid needing to worry about dependencies.
2653  */
2654 void
2655 vfs_unmountall(p)
2656 	struct proc *p;
2657 {
2658 	struct mount *mp, *nmp;
2659 	int allerror, error;
2660 
2661 	printf("unmounting file systems...");
2662 	for (allerror = 0,
2663 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2664 		nmp = mp->mnt_list.cqe_prev;
2665 #ifdef DEBUG
2666 		printf("\nunmounting %s (%s)...",
2667 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2668 #endif
2669 		/*
2670 		 * XXX Freeze syncer.  Must do this before locking the
2671 		 * mount point.  See dounmount() for details.
2672 		 */
2673 		lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2674 		if (vfs_busy(mp, 0, 0)) {
2675 			lockmgr(&syncer_lock, LK_RELEASE, NULL);
2676 			continue;
2677 		}
2678 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2679 			printf("unmount of %s failed with error %d\n",
2680 			    mp->mnt_stat.f_mntonname, error);
2681 			allerror = 1;
2682 		}
2683 	}
2684 	printf(" done\n");
2685 	if (allerror)
2686 		printf("WARNING: some file systems would not unmount\n");
2687 }
2688 
2689 extern struct simplelock bqueue_slock; /* XXX */
2690 
2691 /*
2692  * Sync and unmount file systems before shutting down.
2693  */
2694 void
2695 vfs_shutdown()
2696 {
2697 	struct lwp *l = curlwp;
2698 	struct proc *p;
2699 
2700 	/* XXX we're certainly not running in proc0's context! */
2701 	if (l == NULL || (p = l->l_proc) == NULL)
2702 		p = &proc0;
2703 
2704 	printf("syncing disks... ");
2705 
2706 	/* remove user process from run queue */
2707 	suspendsched();
2708 	(void) spl0();
2709 
2710 	/* avoid coming back this way again if we panic. */
2711 	doing_shutdown = 1;
2712 
2713 	sys_sync(l, NULL, NULL);
2714 
2715 	/* Wait for sync to finish. */
2716 	if (buf_syncwait() != 0) {
2717 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2718 		Debugger();
2719 #endif
2720 		printf("giving up\n");
2721 		return;
2722 	} else
2723 		printf("done\n");
2724 
2725 	/*
2726 	 * If we've panic'd, don't make the situation potentially
2727 	 * worse by unmounting the file systems.
2728 	 */
2729 	if (panicstr != NULL)
2730 		return;
2731 
2732 	/* Release inodes held by texts before update. */
2733 #ifdef notdef
2734 	vnshutdown();
2735 #endif
2736 	/* Unmount file systems. */
2737 	vfs_unmountall(p);
2738 }
2739 
2740 /*
2741  * Mount the root file system.  If the operator didn't specify a
2742  * file system to use, try all possible file systems until one
2743  * succeeds.
2744  */
2745 int
2746 vfs_mountroot()
2747 {
2748 	struct vfsops *v;
2749 
2750 	if (root_device == NULL)
2751 		panic("vfs_mountroot: root device unknown");
2752 
2753 	switch (root_device->dv_class) {
2754 	case DV_IFNET:
2755 		if (rootdev != NODEV)
2756 			panic("vfs_mountroot: rootdev set for DV_IFNET "
2757 			    "(0x%08x -> %d,%d)", rootdev,
2758 			    major(rootdev), minor(rootdev));
2759 		break;
2760 
2761 	case DV_DISK:
2762 		if (rootdev == NODEV)
2763 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2764 		break;
2765 
2766 	default:
2767 		printf("%s: inappropriate for root file system\n",
2768 		    root_device->dv_xname);
2769 		return (ENODEV);
2770 	}
2771 
2772 	/*
2773 	 * If user specified a file system, use it.
2774 	 */
2775 	if (mountroot != NULL)
2776 		return ((*mountroot)());
2777 
2778 	/*
2779 	 * Try each file system currently configured into the kernel.
2780 	 */
2781 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2782 		if (v->vfs_mountroot == NULL)
2783 			continue;
2784 #ifdef DEBUG
2785 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2786 #endif
2787 		if ((*v->vfs_mountroot)() == 0) {
2788 			aprint_normal("root file system type: %s\n",
2789 			    v->vfs_name);
2790 			break;
2791 		}
2792 	}
2793 
2794 	if (v == NULL) {
2795 		printf("no file system for %s", root_device->dv_xname);
2796 		if (root_device->dv_class == DV_DISK)
2797 			printf(" (dev 0x%x)", rootdev);
2798 		printf("\n");
2799 		return (EFTYPE);
2800 	}
2801 	return (0);
2802 }
2803 
2804 /*
2805  * Given a file system name, look up the vfsops for that
2806  * file system, or return NULL if file system isn't present
2807  * in the kernel.
2808  */
2809 struct vfsops *
2810 vfs_getopsbyname(name)
2811 	const char *name;
2812 {
2813 	struct vfsops *v;
2814 
2815 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2816 		if (strcmp(v->vfs_name, name) == 0)
2817 			break;
2818 	}
2819 
2820 	return (v);
2821 }
2822 
2823 /*
2824  * Establish a file system and initialize it.
2825  */
2826 int
2827 vfs_attach(vfs)
2828 	struct vfsops *vfs;
2829 {
2830 	struct vfsops *v;
2831 	int error = 0;
2832 
2833 
2834 	/*
2835 	 * Make sure this file system doesn't already exist.
2836 	 */
2837 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2838 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2839 			error = EEXIST;
2840 			goto out;
2841 		}
2842 	}
2843 
2844 	/*
2845 	 * Initialize the vnode operations for this file system.
2846 	 */
2847 	vfs_opv_init(vfs->vfs_opv_descs);
2848 
2849 	/*
2850 	 * Now initialize the file system itself.
2851 	 */
2852 	(*vfs->vfs_init)();
2853 
2854 	/*
2855 	 * ...and link it into the kernel's list.
2856 	 */
2857 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2858 
2859 	/*
2860 	 * Sanity: make sure the reference count is 0.
2861 	 */
2862 	vfs->vfs_refcount = 0;
2863 
2864  out:
2865 	return (error);
2866 }
2867 
2868 /*
2869  * Remove a file system from the kernel.
2870  */
2871 int
2872 vfs_detach(vfs)
2873 	struct vfsops *vfs;
2874 {
2875 	struct vfsops *v;
2876 
2877 	/*
2878 	 * Make sure no one is using the filesystem.
2879 	 */
2880 	if (vfs->vfs_refcount != 0)
2881 		return (EBUSY);
2882 
2883 	/*
2884 	 * ...and remove it from the kernel's list.
2885 	 */
2886 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2887 		if (v == vfs) {
2888 			LIST_REMOVE(v, vfs_list);
2889 			break;
2890 		}
2891 	}
2892 
2893 	if (v == NULL)
2894 		return (ESRCH);
2895 
2896 	/*
2897 	 * Now run the file system-specific cleanups.
2898 	 */
2899 	(*vfs->vfs_done)();
2900 
2901 	/*
2902 	 * Free the vnode operations vector.
2903 	 */
2904 	vfs_opv_free(vfs->vfs_opv_descs);
2905 	return (0);
2906 }
2907 
2908 void
2909 vfs_reinit(void)
2910 {
2911 	struct vfsops *vfs;
2912 
2913 	LIST_FOREACH(vfs, &vfs_list, vfs_list) {
2914 		if (vfs->vfs_reinit) {
2915 			(*vfs->vfs_reinit)();
2916 		}
2917 	}
2918 }
2919 
2920 /*
2921  * Request a filesystem to suspend write operations.
2922  */
2923 int
2924 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
2925 {
2926 	struct proc *p = curproc;	/* XXX */
2927 	int error;
2928 
2929 	while ((mp->mnt_iflag & IMNT_SUSPEND)) {
2930 		if (slptimeo < 0)
2931 			return EWOULDBLOCK;
2932 		error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
2933 		if (error)
2934 			return error;
2935 	}
2936 	mp->mnt_iflag |= IMNT_SUSPEND;
2937 
2938 	simple_lock(&mp->mnt_slock);
2939 	if (mp->mnt_writeopcountupper > 0)
2940 		ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
2941 			0, &mp->mnt_slock);
2942 	simple_unlock(&mp->mnt_slock);
2943 
2944 	error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
2945 	if (error) {
2946 		vfs_write_resume(mp);
2947 		return error;
2948 	}
2949 	mp->mnt_iflag |= IMNT_SUSPENDLOW;
2950 
2951 	simple_lock(&mp->mnt_slock);
2952 	if (mp->mnt_writeopcountlower > 0)
2953 		ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
2954 			0, &mp->mnt_slock);
2955 	mp->mnt_iflag |= IMNT_SUSPENDED;
2956 	simple_unlock(&mp->mnt_slock);
2957 
2958 	return 0;
2959 }
2960 
2961 /*
2962  * Request a filesystem to resume write operations.
2963  */
2964 void
2965 vfs_write_resume(struct mount *mp)
2966 {
2967 
2968 	if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
2969 		return;
2970 	mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
2971 	wakeup(&mp->mnt_flag);
2972 }
2973 
2974 void
2975 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2976 {
2977 	const struct statvfs *mbp;
2978 
2979 	if (sbp == (mbp = &mp->mnt_stat))
2980 		return;
2981 
2982 	(void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2983 	sbp->f_fsid = mbp->f_fsid;
2984 	sbp->f_owner = mbp->f_owner;
2985 	sbp->f_flag = mbp->f_flag;
2986 	sbp->f_syncwrites = mbp->f_syncwrites;
2987 	sbp->f_asyncwrites = mbp->f_asyncwrites;
2988 	sbp->f_syncreads = mbp->f_syncreads;
2989 	sbp->f_asyncreads = mbp->f_asyncreads;
2990 	(void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2991 	(void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2992 	    sizeof(sbp->f_fstypename));
2993 	(void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2994 	    sizeof(sbp->f_mntonname));
2995 	(void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2996 	    sizeof(sbp->f_mntfromname));
2997 	sbp->f_namemax = mbp->f_namemax;
2998 }
2999 
3000 int
3001 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
3002     struct mount *mp, struct proc *p)
3003 {
3004 	int error;
3005 	size_t size;
3006 	struct statvfs *sfs = &mp->mnt_stat;
3007 	int (*fun)(const void *, void *, size_t, size_t *);
3008 
3009 	(void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
3010 	    sizeof(mp->mnt_stat.f_fstypename));
3011 
3012 	if (onp) {
3013 		struct cwdinfo *cwdi = p->p_cwdi;
3014 		fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
3015 		if (cwdi->cwdi_rdir != NULL) {
3016 			size_t len;
3017 			char *bp;
3018 			char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
3019 
3020 			if (!path) /* XXX can't happen with M_WAITOK */
3021 				return ENOMEM;
3022 
3023 			bp = path + MAXPATHLEN;
3024 			*--bp = '\0';
3025 			error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
3026 			    path, MAXPATHLEN / 2, 0, p);
3027 			if (error) {
3028 				free(path, M_TEMP);
3029 				return error;
3030 			}
3031 
3032 			len = strlen(bp);
3033 			if (len > sizeof(sfs->f_mntonname) - 1)
3034 				len = sizeof(sfs->f_mntonname) - 1;
3035 			(void)strncpy(sfs->f_mntonname, bp, len);
3036 			free(path, M_TEMP);
3037 
3038 			if (len < sizeof(sfs->f_mntonname) - 1) {
3039 				error = (*fun)(onp, &sfs->f_mntonname[len],
3040 				    sizeof(sfs->f_mntonname) - len - 1, &size);
3041 				if (error)
3042 					return error;
3043 				size += len;
3044 			} else {
3045 				size = len;
3046 			}
3047 		} else {
3048 			error = (*fun)(onp, &sfs->f_mntonname,
3049 			    sizeof(sfs->f_mntonname) - 1, &size);
3050 			if (error)
3051 				return error;
3052 		}
3053 		(void)memset(sfs->f_mntonname + size, 0,
3054 		    sizeof(sfs->f_mntonname) - size);
3055 	}
3056 
3057 	if (fromp) {
3058 		fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
3059 		error = (*fun)(fromp, sfs->f_mntfromname,
3060 		    sizeof(sfs->f_mntfromname) - 1, &size);
3061 		if (error)
3062 			return error;
3063 		(void)memset(sfs->f_mntfromname + size, 0,
3064 		    sizeof(sfs->f_mntfromname) - size);
3065 	}
3066 	return 0;
3067 }
3068 
3069 #ifdef DDB
3070 const char buf_flagbits[] =
3071 	"\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
3072 	"\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
3073 	"\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
3074 	"\32XXX\33VFLUSH";
3075 
3076 void
3077 vfs_buf_print(bp, full, pr)
3078 	struct buf *bp;
3079 	int full;
3080 	void (*pr)(const char *, ...);
3081 {
3082 	char buf[1024];
3083 
3084 	(*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
3085 		  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
3086 
3087 	bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
3088 	(*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
3089 
3090 	(*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3091 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
3092 	(*pr)("  data %p saveaddr %p dep %p\n",
3093 		  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
3094 	(*pr)("  iodone %p\n", bp->b_iodone);
3095 }
3096 
3097 
3098 const char vnode_flagbits[] =
3099 	"\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP"
3100 	"\11XLOCK\12XWANT\13BWAIT\14ALIASED"
3101 	"\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
3102 
3103 const char * const vnode_tags[] = {
3104 	"VT_NON",
3105 	"VT_UFS",
3106 	"VT_NFS",
3107 	"VT_MFS",
3108 	"VT_MSDOSFS",
3109 	"VT_LFS",
3110 	"VT_LOFS",
3111 	"VT_FDESC",
3112 	"VT_PORTAL",
3113 	"VT_NULL",
3114 	"VT_UMAP",
3115 	"VT_KERNFS",
3116 	"VT_PROCFS",
3117 	"VT_AFS",
3118 	"VT_ISOFS",
3119 	"VT_UNION",
3120 	"VT_ADOSFS",
3121 	"VT_EXT2FS",
3122 	"VT_CODA",
3123 	"VT_FILECORE",
3124 	"VT_NTFS",
3125 	"VT_VFS",
3126 	"VT_OVERLAY",
3127 	"VT_SMBFS"
3128 };
3129 
3130 void
3131 vfs_vnode_print(vp, full, pr)
3132 	struct vnode *vp;
3133 	int full;
3134 	void (*pr)(const char *, ...);
3135 {
3136 	char buf[256];
3137 	const char *vtype, *vtag;
3138 
3139 	uvm_object_printit(&vp->v_uobj, full, pr);
3140 	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
3141 	(*pr)("\nVNODE flags %s\n", buf);
3142 	(*pr)("mp %p numoutput %d size 0x%llx\n",
3143 	      vp->v_mount, vp->v_numoutput, vp->v_size);
3144 
3145 	(*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
3146 	      vp->v_data, vp->v_usecount, vp->v_writecount,
3147 	      vp->v_holdcnt, vp->v_numoutput);
3148 
3149 	vtype = (vp->v_type >= 0 &&
3150 		 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
3151 		vnode_types[vp->v_type] : "UNKNOWN";
3152 	vtag = (vp->v_tag >= 0 &&
3153 		vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
3154 		vnode_tags[vp->v_tag] : "UNKNOWN";
3155 
3156 	(*pr)("type %s(%d) tag %s(%d) mount %p typedata %p\n",
3157 	      vtype, vp->v_type, vtag, vp->v_tag,
3158 	      vp->v_mount, vp->v_mountedhere);
3159 
3160 	if (full) {
3161 		struct buf *bp;
3162 
3163 		(*pr)("clean bufs:\n");
3164 		LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3165 			(*pr)(" bp %p\n", bp);
3166 			vfs_buf_print(bp, full, pr);
3167 		}
3168 
3169 		(*pr)("dirty bufs:\n");
3170 		LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3171 			(*pr)(" bp %p\n", bp);
3172 			vfs_buf_print(bp, full, pr);
3173 		}
3174 	}
3175 }
3176 
3177 void
3178 vfs_mount_print(mp, full, pr)
3179 	struct mount *mp;
3180 	int full;
3181 	void (*pr)(const char *, ...);
3182 {
3183 	char sbuf[256];
3184 
3185 	(*pr)("vnodecovered = %p syncer = %p data = %p\n",
3186 			mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3187 
3188 	(*pr)("fs_bshift %d dev_bshift = %d\n",
3189 			mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3190 
3191 	bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
3192 	(*pr)("flag = %s\n", sbuf);
3193 
3194 	bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
3195 	(*pr)("iflag = %s\n", sbuf);
3196 
3197 	/* XXX use lockmgr_printinfo */
3198 	if (mp->mnt_lock.lk_sharecount)
3199 		(*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
3200 		    mp->mnt_lock.lk_sharecount);
3201 	else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
3202 		(*pr)(" lock type %s: EXCL (count %d) by ",
3203 		    mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
3204 		if (mp->mnt_lock.lk_flags & LK_SPIN)
3205 			(*pr)("processor %lu", mp->mnt_lock.lk_cpu);
3206 		else
3207 			(*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
3208 			    mp->mnt_lock.lk_locklwp);
3209 	} else
3210 		(*pr)(" not locked");
3211 	if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
3212 		(*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
3213 
3214 	(*pr)("\n");
3215 
3216 	if (mp->mnt_unmounter) {
3217 		(*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
3218 	}
3219 	(*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
3220 		mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
3221 
3222 	(*pr)("statvfs cache:\n");
3223 	(*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3224 	(*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3225 	(*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3226 
3227 	(*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks);
3228 	(*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree);
3229 	(*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail);
3230 	(*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd);
3231 
3232 	(*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files);
3233 	(*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree);
3234 	(*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail);
3235 	(*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd);
3236 
3237 	(*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3238 			mp->mnt_stat.f_fsidx.__fsid_val[0],
3239 			mp->mnt_stat.f_fsidx.__fsid_val[1]);
3240 
3241 	(*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3242 	(*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3243 
3244 	bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
3245 	    sizeof(sbuf));
3246 	(*pr)("\tflag = %s\n",sbuf);
3247 	(*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3248 	(*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3249 	(*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads);
3250 	(*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3251 	(*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3252 	(*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3253 	(*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3254 
3255 	{
3256 		int cnt = 0;
3257 		struct vnode *vp;
3258 		(*pr)("locked vnodes =");
3259 		/* XXX would take mountlist lock, except ddb may not have context */
3260 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3261 			if (VOP_ISLOCKED(vp)) {
3262 				if ((++cnt % 6) == 0) {
3263 					(*pr)(" %p,\n\t", vp);
3264 				} else {
3265 					(*pr)(" %p,", vp);
3266 				}
3267 			}
3268 		}
3269 		(*pr)("\n");
3270 	}
3271 
3272 	if (full) {
3273 		int cnt = 0;
3274 		struct vnode *vp;
3275 		(*pr)("all vnodes =");
3276 		/* XXX would take mountlist lock, except ddb may not have context */
3277 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3278 			if (!LIST_NEXT(vp, v_mntvnodes)) {
3279 				(*pr)(" %p", vp);
3280 			} else if ((++cnt % 6) == 0) {
3281 				(*pr)(" %p,\n\t", vp);
3282 			} else {
3283 				(*pr)(" %p,", vp);
3284 			}
3285 		}
3286 		(*pr)("\n", vp);
3287 	}
3288 }
3289 
3290 #endif
3291