xref: /netbsd-src/sys/kern/vfs_subr.c (revision 4b896b232495b7a9b8b94a1cf1e21873296d53b8)
1 /*	$NetBSD: vfs_subr.c,v 1.226 2004/05/25 04:44:44 atatat Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1989, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Redistribution and use in source and binary forms, with or without
50  * modification, are permitted provided that the following conditions
51  * are met:
52  * 1. Redistributions of source code must retain the above copyright
53  *    notice, this list of conditions and the following disclaimer.
54  * 2. Redistributions in binary form must reproduce the above copyright
55  *    notice, this list of conditions and the following disclaimer in the
56  *    documentation and/or other materials provided with the distribution.
57  * 3. Neither the name of the University nor the names of its contributors
58  *    may be used to endorse or promote products derived from this software
59  *    without specific prior written permission.
60  *
61  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71  * SUCH DAMAGE.
72  *
73  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
74  */
75 
76 /*
77  * External virtual filesystem routines
78  */
79 
80 #include <sys/cdefs.h>
81 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.226 2004/05/25 04:44:44 atatat Exp $");
82 
83 #include "opt_inet.h"
84 #include "opt_ddb.h"
85 #include "opt_compat_netbsd.h"
86 #include "opt_compat_43.h"
87 
88 #include <sys/param.h>
89 #include <sys/systm.h>
90 #include <sys/proc.h>
91 #include <sys/kernel.h>
92 #include <sys/mount.h>
93 #include <sys/time.h>
94 #include <sys/event.h>
95 #include <sys/fcntl.h>
96 #include <sys/vnode.h>
97 #include <sys/stat.h>
98 #include <sys/namei.h>
99 #include <sys/ucred.h>
100 #include <sys/buf.h>
101 #include <sys/errno.h>
102 #include <sys/malloc.h>
103 #include <sys/domain.h>
104 #include <sys/mbuf.h>
105 #include <sys/sa.h>
106 #include <sys/syscallargs.h>
107 #include <sys/device.h>
108 #include <sys/dirent.h>
109 #include <sys/filedesc.h>
110 
111 #include <miscfs/specfs/specdev.h>
112 #include <miscfs/genfs/genfs.h>
113 #include <miscfs/syncfs/syncfs.h>
114 
115 #include <netinet/in.h>
116 
117 #include <uvm/uvm.h>
118 #include <uvm/uvm_ddb.h>
119 
120 #include <netinet/in.h>
121 
122 #include <sys/sysctl.h>
123 
124 const enum vtype iftovt_tab[16] = {
125 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
126 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
127 };
128 const int	vttoif_tab[9] = {
129 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
130 	S_IFSOCK, S_IFIFO, S_IFMT,
131 };
132 
133 int doforce = 1;		/* 1 => permit forcible unmounting */
134 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
135 
136 extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
137 
138 /*
139  * Insq/Remq for the vnode usage lists.
140  */
141 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
142 #define	bufremvn(bp) {							\
143 	LIST_REMOVE(bp, b_vnbufs);					\
144 	(bp)->b_vnbufs.le_next = NOLIST;				\
145 }
146 /* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
147 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
148 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
149 
150 struct mntlist mountlist =			/* mounted filesystem list */
151     CIRCLEQ_HEAD_INITIALIZER(mountlist);
152 struct vfs_list_head vfs_list =			/* vfs list */
153     LIST_HEAD_INITIALIZER(vfs_list);
154 
155 struct nfs_public nfs_pub;			/* publicly exported FS */
156 
157 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
158 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
159 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
160 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
161 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
162 
163 /* XXX - gross; single global lock to protect v_numoutput */
164 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
165 
166 /*
167  * These define the root filesystem and device.
168  */
169 struct mount *rootfs;
170 struct vnode *rootvnode;
171 struct device *root_device;			/* root device */
172 
173 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
174     &pool_allocator_nointr);
175 
176 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
177 
178 /*
179  * Local declarations.
180  */
181 void insmntque(struct vnode *, struct mount *);
182 int getdevvp(dev_t, struct vnode **, enum vtype);
183 void vgoneall(struct vnode *);
184 
185 void vclean(struct vnode *, int, struct proc *);
186 
187 static int vfs_hang_addrlist(struct mount *, struct netexport *,
188 			     struct export_args *);
189 static int vfs_free_netcred(struct radix_node *, void *);
190 static void vfs_free_addrlist(struct netexport *);
191 static struct vnode *getcleanvnode(struct proc *);
192 
193 #ifdef DEBUG
194 void printlockedvnodes(void);
195 #endif
196 
197 /*
198  * Initialize the vnode management data structures.
199  */
200 void
201 vntblinit()
202 {
203 
204 	/*
205 	 * Initialize the filesystem syncer.
206 	 */
207 	vn_initialize_syncerd();
208 }
209 
210 int
211 vfs_drainvnodes(long target, struct proc *p)
212 {
213 
214 	simple_lock(&vnode_free_list_slock);
215 	while (numvnodes > target) {
216 		struct vnode *vp;
217 
218 		vp = getcleanvnode(p);
219 		if (vp == NULL)
220 			return EBUSY; /* give up */
221 		pool_put(&vnode_pool, vp);
222 		simple_lock(&vnode_free_list_slock);
223 		numvnodes--;
224 	}
225 	simple_unlock(&vnode_free_list_slock);
226 
227 	return 0;
228 }
229 
230 /*
231  * grab a vnode from freelist and clean it.
232  */
233 struct vnode *
234 getcleanvnode(p)
235 	struct proc *p;
236 {
237 	struct vnode *vp;
238 	struct mount *mp;
239 	struct freelst *listhd;
240 
241 	LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
242 	if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL)
243 		vp = TAILQ_FIRST(listhd = &vnode_hold_list);
244 	for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) {
245 		if (!simple_lock_try(&vp->v_interlock))
246 			continue;
247 		if ((vp->v_flag & VLAYER) == 0) {
248 			if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
249 				break;
250 		} else if (VOP_ISLOCKED(vp) == 0) {
251 			if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
252 				break;
253 		}
254 		mp = NULL;
255 		simple_unlock(&vp->v_interlock);
256 	}
257 
258 	if (vp == NULLVP) {
259 		simple_unlock(&vnode_free_list_slock);
260 		return NULLVP;
261 	}
262 
263 	if (vp->v_usecount)
264 		panic("free vnode isn't, vp %p", vp);
265 	TAILQ_REMOVE(listhd, vp, v_freelist);
266 	/* see comment on why 0xdeadb is set at end of vgone (below) */
267 	vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
268 	simple_unlock(&vnode_free_list_slock);
269 	vp->v_lease = NULL;
270 
271 	if (vp->v_type != VBAD)
272 		vgonel(vp, p);
273 	else
274 		simple_unlock(&vp->v_interlock);
275 	vn_finished_write(mp, 0);
276 #ifdef DIAGNOSTIC
277 	if (vp->v_data || vp->v_uobj.uo_npages ||
278 	    TAILQ_FIRST(&vp->v_uobj.memq))
279 		panic("cleaned vnode isn't, vp %p", vp);
280 	if (vp->v_numoutput)
281 		panic("clean vnode has pending I/O's, vp %p", vp);
282 #endif
283 	KASSERT((vp->v_flag & VONWORKLST) == 0);
284 
285 	return vp;
286 }
287 
288 /*
289  * Mark a mount point as busy. Used to synchronize access and to delay
290  * unmounting. Interlock is not released on failure.
291  */
292 int
293 vfs_busy(mp, flags, interlkp)
294 	struct mount *mp;
295 	int flags;
296 	struct simplelock *interlkp;
297 {
298 	int lkflags;
299 
300 	while (mp->mnt_iflag & IMNT_UNMOUNT) {
301 		int gone, n;
302 
303 		if (flags & LK_NOWAIT)
304 			return (ENOENT);
305 		if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
306 		    && mp->mnt_unmounter == curproc)
307 			return (EDEADLK);
308 		if (interlkp)
309 			simple_unlock(interlkp);
310 		/*
311 		 * Since all busy locks are shared except the exclusive
312 		 * lock granted when unmounting, the only place that a
313 		 * wakeup needs to be done is at the release of the
314 		 * exclusive lock at the end of dounmount.
315 		 */
316 		simple_lock(&mp->mnt_slock);
317 		mp->mnt_wcnt++;
318 		ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
319 		n = --mp->mnt_wcnt;
320 		simple_unlock(&mp->mnt_slock);
321 		gone = mp->mnt_iflag & IMNT_GONE;
322 
323 		if (n == 0)
324 			wakeup(&mp->mnt_wcnt);
325 		if (interlkp)
326 			simple_lock(interlkp);
327 		if (gone)
328 			return (ENOENT);
329 	}
330 	lkflags = LK_SHARED;
331 	if (interlkp)
332 		lkflags |= LK_INTERLOCK;
333 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
334 		panic("vfs_busy: unexpected lock failure");
335 	return (0);
336 }
337 
338 /*
339  * Free a busy filesystem.
340  */
341 void
342 vfs_unbusy(mp)
343 	struct mount *mp;
344 {
345 
346 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
347 }
348 
349 /*
350  * Lookup a filesystem type, and if found allocate and initialize
351  * a mount structure for it.
352  *
353  * Devname is usually updated by mount(8) after booting.
354  */
355 int
356 vfs_rootmountalloc(fstypename, devname, mpp)
357 	char *fstypename;
358 	char *devname;
359 	struct mount **mpp;
360 {
361 	struct vfsops *vfsp = NULL;
362 	struct mount *mp;
363 
364 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
365 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
366 			break;
367 
368 	if (vfsp == NULL)
369 		return (ENODEV);
370 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
371 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
372 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
373 	simple_lock_init(&mp->mnt_slock);
374 	(void)vfs_busy(mp, LK_NOWAIT, 0);
375 	LIST_INIT(&mp->mnt_vnodelist);
376 	mp->mnt_op = vfsp;
377 	mp->mnt_flag = MNT_RDONLY;
378 	mp->mnt_vnodecovered = NULLVP;
379 	vfsp->vfs_refcount++;
380 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
381 	mp->mnt_stat.f_mntonname[0] = '/';
382 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
383 	*mpp = mp;
384 	return (0);
385 }
386 
387 /*
388  * Lookup a mount point by filesystem identifier.
389  */
390 struct mount *
391 vfs_getvfs(fsid)
392 	fsid_t *fsid;
393 {
394 	struct mount *mp;
395 
396 	simple_lock(&mountlist_slock);
397 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
398 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
399 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
400 			simple_unlock(&mountlist_slock);
401 			return (mp);
402 		}
403 	}
404 	simple_unlock(&mountlist_slock);
405 	return ((struct mount *)0);
406 }
407 
408 /*
409  * Get a new unique fsid
410  */
411 void
412 vfs_getnewfsid(mp)
413 	struct mount *mp;
414 {
415 	static u_short xxxfs_mntid;
416 	fsid_t tfsid;
417 	int mtype;
418 
419 	simple_lock(&mntid_slock);
420 	mtype = makefstype(mp->mnt_op->vfs_name);
421 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
422 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
423 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
424 	if (xxxfs_mntid == 0)
425 		++xxxfs_mntid;
426 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
427 	tfsid.__fsid_val[1] = mtype;
428 	if (!CIRCLEQ_EMPTY(&mountlist)) {
429 		while (vfs_getvfs(&tfsid)) {
430 			tfsid.__fsid_val[0]++;
431 			xxxfs_mntid++;
432 		}
433 	}
434 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
435 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
436 	simple_unlock(&mntid_slock);
437 }
438 
439 /*
440  * Make a 'unique' number from a mount type name.
441  */
442 long
443 makefstype(type)
444 	const char *type;
445 {
446 	long rv;
447 
448 	for (rv = 0; *type; type++) {
449 		rv <<= 2;
450 		rv ^= *type;
451 	}
452 	return rv;
453 }
454 
455 
456 /*
457  * Set vnode attributes to VNOVAL
458  */
459 void
460 vattr_null(vap)
461 	struct vattr *vap;
462 {
463 
464 	vap->va_type = VNON;
465 
466 	/*
467 	 * Assign individually so that it is safe even if size and
468 	 * sign of each member are varied.
469 	 */
470 	vap->va_mode = VNOVAL;
471 	vap->va_nlink = VNOVAL;
472 	vap->va_uid = VNOVAL;
473 	vap->va_gid = VNOVAL;
474 	vap->va_fsid = VNOVAL;
475 	vap->va_fileid = VNOVAL;
476 	vap->va_size = VNOVAL;
477 	vap->va_blocksize = VNOVAL;
478 	vap->va_atime.tv_sec =
479 	    vap->va_mtime.tv_sec =
480 	    vap->va_ctime.tv_sec =
481 	    vap->va_birthtime.tv_sec = VNOVAL;
482 	vap->va_atime.tv_nsec =
483 	    vap->va_mtime.tv_nsec =
484 	    vap->va_ctime.tv_nsec =
485 	    vap->va_birthtime.tv_nsec = VNOVAL;
486 	vap->va_gen = VNOVAL;
487 	vap->va_flags = VNOVAL;
488 	vap->va_rdev = VNOVAL;
489 	vap->va_bytes = VNOVAL;
490 	vap->va_vaflags = 0;
491 }
492 
493 /*
494  * Routines having to do with the management of the vnode table.
495  */
496 extern int (**dead_vnodeop_p)(void *);
497 long numvnodes;
498 
499 /*
500  * Return the next vnode from the free list.
501  */
502 int
503 getnewvnode(tag, mp, vops, vpp)
504 	enum vtagtype tag;
505 	struct mount *mp;
506 	int (**vops)(void *);
507 	struct vnode **vpp;
508 {
509 	extern struct uvm_pagerops uvm_vnodeops;
510 	struct uvm_object *uobj;
511 	struct proc *p = curproc;	/* XXX */
512 	static int toggle;
513 	struct vnode *vp;
514 	int error = 0, tryalloc;
515 
516  try_again:
517 	if (mp) {
518 		/*
519 		 * Mark filesystem busy while we're creating a vnode.
520 		 * If unmount is in progress, this will wait; if the
521 		 * unmount succeeds (only if umount -f), this will
522 		 * return an error.  If the unmount fails, we'll keep
523 		 * going afterwards.
524 		 * (This puts the per-mount vnode list logically under
525 		 * the protection of the vfs_busy lock).
526 		 */
527 		error = vfs_busy(mp, LK_RECURSEFAIL, 0);
528 		if (error && error != EDEADLK)
529 			return error;
530 	}
531 
532 	/*
533 	 * We must choose whether to allocate a new vnode or recycle an
534 	 * existing one. The criterion for allocating a new one is that
535 	 * the total number of vnodes is less than the number desired or
536 	 * there are no vnodes on either free list. Generally we only
537 	 * want to recycle vnodes that have no buffers associated with
538 	 * them, so we look first on the vnode_free_list. If it is empty,
539 	 * we next consider vnodes with referencing buffers on the
540 	 * vnode_hold_list. The toggle ensures that half the time we
541 	 * will use a buffer from the vnode_hold_list, and half the time
542 	 * we will allocate a new one unless the list has grown to twice
543 	 * the desired size. We are reticent to recycle vnodes from the
544 	 * vnode_hold_list because we will lose the identity of all its
545 	 * referencing buffers.
546 	 */
547 
548 	vp = NULL;
549 
550 	simple_lock(&vnode_free_list_slock);
551 
552 	toggle ^= 1;
553 	if (numvnodes > 2 * desiredvnodes)
554 		toggle = 0;
555 
556 	tryalloc = numvnodes < desiredvnodes ||
557 	    (TAILQ_FIRST(&vnode_free_list) == NULL &&
558 	     (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
559 
560 	if (tryalloc &&
561 	    (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
562 		numvnodes++;
563 		simple_unlock(&vnode_free_list_slock);
564 		memset(vp, 0, sizeof(*vp));
565 		simple_lock_init(&vp->v_interlock);
566 		uobj = &vp->v_uobj;
567 		uobj->pgops = &uvm_vnodeops;
568 		TAILQ_INIT(&uobj->memq);
569 		/*
570 		 * done by memset() above.
571 		 *	uobj->uo_npages = 0;
572 		 *	LIST_INIT(&vp->v_nclist);
573 		 *	LIST_INIT(&vp->v_dnclist);
574 		 */
575 	} else {
576 		vp = getcleanvnode(p);
577 		/*
578 		 * Unless this is a bad time of the month, at most
579 		 * the first NCPUS items on the free list are
580 		 * locked, so this is close enough to being empty.
581 		 */
582 		if (vp == NULLVP) {
583 			if (mp && error != EDEADLK)
584 				vfs_unbusy(mp);
585 			if (tryalloc) {
586 				printf("WARNING: unable to allocate new "
587 				    "vnode, retrying...\n");
588 				(void) tsleep(&lbolt, PRIBIO, "newvn", hz);
589 				goto try_again;
590 			}
591 			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
592 			*vpp = 0;
593 			return (ENFILE);
594 		}
595 		vp->v_flag = 0;
596 		vp->v_socket = NULL;
597 #ifdef VERIFIED_EXEC
598 		vp->fp_status = FINGERPRINT_INVALID;
599 #endif
600 	}
601 	vp->v_type = VNON;
602 	vp->v_vnlock = &vp->v_lock;
603 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
604 	KASSERT(LIST_EMPTY(&vp->v_nclist));
605 	KASSERT(LIST_EMPTY(&vp->v_dnclist));
606 	vp->v_tag = tag;
607 	vp->v_op = vops;
608 	insmntque(vp, mp);
609 	*vpp = vp;
610 	vp->v_usecount = 1;
611 	vp->v_data = 0;
612 	simple_lock_init(&vp->v_uobj.vmobjlock);
613 
614 	/*
615 	 * initialize uvm_object within vnode.
616 	 */
617 
618 	uobj = &vp->v_uobj;
619 	KASSERT(uobj->pgops == &uvm_vnodeops);
620 	KASSERT(uobj->uo_npages == 0);
621 	KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
622 	vp->v_size = VSIZENOTSET;
623 
624 	if (mp && error != EDEADLK)
625 		vfs_unbusy(mp);
626 	return (0);
627 }
628 
629 /*
630  * This is really just the reverse of getnewvnode(). Needed for
631  * VFS_VGET functions who may need to push back a vnode in case
632  * of a locking race.
633  */
634 void
635 ungetnewvnode(vp)
636 	struct vnode *vp;
637 {
638 #ifdef DIAGNOSTIC
639 	if (vp->v_usecount != 1)
640 		panic("ungetnewvnode: busy vnode");
641 #endif
642 	vp->v_usecount--;
643 	insmntque(vp, NULL);
644 	vp->v_type = VBAD;
645 
646 	simple_lock(&vp->v_interlock);
647 	/*
648 	 * Insert at head of LRU list
649 	 */
650 	simple_lock(&vnode_free_list_slock);
651 	if (vp->v_holdcnt > 0)
652 		TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
653 	else
654 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
655 	simple_unlock(&vnode_free_list_slock);
656 	simple_unlock(&vp->v_interlock);
657 }
658 
659 /*
660  * Move a vnode from one mount queue to another.
661  */
662 void
663 insmntque(vp, mp)
664 	struct vnode *vp;
665 	struct mount *mp;
666 {
667 
668 #ifdef DIAGNOSTIC
669 	if ((mp != NULL) &&
670 	    (mp->mnt_iflag & IMNT_UNMOUNT) &&
671 	    !(mp->mnt_flag & MNT_SOFTDEP) &&
672 	    vp->v_tag != VT_VFS) {
673 		panic("insmntque into dying filesystem");
674 	}
675 #endif
676 
677 	simple_lock(&mntvnode_slock);
678 	/*
679 	 * Delete from old mount point vnode list, if on one.
680 	 */
681 	if (vp->v_mount != NULL)
682 		LIST_REMOVE(vp, v_mntvnodes);
683 	/*
684 	 * Insert into list of vnodes for the new mount point, if available.
685 	 */
686 	if ((vp->v_mount = mp) != NULL)
687 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
688 	simple_unlock(&mntvnode_slock);
689 }
690 
691 /*
692  * Update outstanding I/O count and do wakeup if requested.
693  */
694 void
695 vwakeup(bp)
696 	struct buf *bp;
697 {
698 	struct vnode *vp;
699 
700 	if ((vp = bp->b_vp) != NULL) {
701 		/* XXX global lock hack
702 		 * can't use v_interlock here since this is called
703 		 * in interrupt context from biodone().
704 		 */
705 		simple_lock(&global_v_numoutput_slock);
706 		if (--vp->v_numoutput < 0)
707 			panic("vwakeup: neg numoutput, vp %p", vp);
708 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
709 			vp->v_flag &= ~VBWAIT;
710 			wakeup((caddr_t)&vp->v_numoutput);
711 		}
712 		simple_unlock(&global_v_numoutput_slock);
713 	}
714 }
715 
716 /*
717  * Flush out and invalidate all buffers associated with a vnode.
718  * Called with the underlying vnode locked, which should prevent new dirty
719  * buffers from being queued.
720  */
721 int
722 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
723 	struct vnode *vp;
724 	int flags;
725 	struct ucred *cred;
726 	struct proc *p;
727 	int slpflag, slptimeo;
728 {
729 	struct buf *bp, *nbp;
730 	int s, error;
731 	int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
732 		(flags & V_SAVE ? PGO_CLEANIT : 0);
733 
734 	/* XXXUBC this doesn't look at flags or slp* */
735 	simple_lock(&vp->v_interlock);
736 	error = VOP_PUTPAGES(vp, 0, 0, flushflags);
737 	if (error) {
738 		return error;
739 	}
740 
741 	if (flags & V_SAVE) {
742 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
743 		if (error)
744 		        return (error);
745 #ifdef DIAGNOSTIC
746 		s = splbio();
747 		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
748 		        panic("vinvalbuf: dirty bufs, vp %p", vp);
749 		splx(s);
750 #endif
751 	}
752 
753 	s = splbio();
754 
755 restart:
756 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
757 		nbp = LIST_NEXT(bp, b_vnbufs);
758 		simple_lock(&bp->b_interlock);
759 		if (bp->b_flags & B_BUSY) {
760 			bp->b_flags |= B_WANTED;
761 			error = ltsleep((caddr_t)bp,
762 				    slpflag | (PRIBIO + 1) | PNORELOCK,
763 				    "vinvalbuf", slptimeo, &bp->b_interlock);
764 			if (error) {
765 				splx(s);
766 				return (error);
767 			}
768 			goto restart;
769 		}
770 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
771 		simple_unlock(&bp->b_interlock);
772 		brelse(bp);
773 	}
774 
775 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
776 		nbp = LIST_NEXT(bp, b_vnbufs);
777 		simple_lock(&bp->b_interlock);
778 		if (bp->b_flags & B_BUSY) {
779 			bp->b_flags |= B_WANTED;
780 			error = ltsleep((caddr_t)bp,
781 				    slpflag | (PRIBIO + 1) | PNORELOCK,
782 				    "vinvalbuf", slptimeo, &bp->b_interlock);
783 			if (error) {
784 				splx(s);
785 				return (error);
786 			}
787 			goto restart;
788 		}
789 		/*
790 		 * XXX Since there are no node locks for NFS, I believe
791 		 * there is a slight chance that a delayed write will
792 		 * occur while sleeping just above, so check for it.
793 		 */
794 		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
795 #ifdef DEBUG
796 			printf("buffer still DELWRI\n");
797 #endif
798 			bp->b_flags |= B_BUSY | B_VFLUSH;
799 			simple_unlock(&bp->b_interlock);
800 			VOP_BWRITE(bp);
801 			goto restart;
802 		}
803 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
804 		simple_unlock(&bp->b_interlock);
805 		brelse(bp);
806 	}
807 
808 #ifdef DIAGNOSTIC
809 	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
810 		panic("vinvalbuf: flush failed, vp %p", vp);
811 #endif
812 
813 	splx(s);
814 
815 	return (0);
816 }
817 
818 /*
819  * Destroy any in core blocks past the truncation length.
820  * Called with the underlying vnode locked, which should prevent new dirty
821  * buffers from being queued.
822  */
823 int
824 vtruncbuf(vp, lbn, slpflag, slptimeo)
825 	struct vnode *vp;
826 	daddr_t lbn;
827 	int slpflag, slptimeo;
828 {
829 	struct buf *bp, *nbp;
830 	int s, error;
831 	voff_t off;
832 
833 	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
834 	simple_lock(&vp->v_interlock);
835 	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
836 	if (error) {
837 		return error;
838 	}
839 
840 	s = splbio();
841 
842 restart:
843 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
844 		nbp = LIST_NEXT(bp, b_vnbufs);
845 		if (bp->b_lblkno < lbn)
846 			continue;
847 		simple_lock(&bp->b_interlock);
848 		if (bp->b_flags & B_BUSY) {
849 			bp->b_flags |= B_WANTED;
850 			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
851 			    "vtruncbuf", slptimeo, &bp->b_interlock);
852 			if (error) {
853 				splx(s);
854 				return (error);
855 			}
856 			goto restart;
857 		}
858 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
859 		simple_unlock(&bp->b_interlock);
860 		brelse(bp);
861 	}
862 
863 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
864 		nbp = LIST_NEXT(bp, b_vnbufs);
865 		if (bp->b_lblkno < lbn)
866 			continue;
867 		simple_lock(&bp->b_interlock);
868 		if (bp->b_flags & B_BUSY) {
869 			bp->b_flags |= B_WANTED;
870 			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
871 			    "vtruncbuf", slptimeo, &bp->b_interlock);
872 			if (error) {
873 				splx(s);
874 				return (error);
875 			}
876 			goto restart;
877 		}
878 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
879 		simple_unlock(&bp->b_interlock);
880 		brelse(bp);
881 	}
882 
883 	splx(s);
884 
885 	return (0);
886 }
887 
888 void
889 vflushbuf(vp, sync)
890 	struct vnode *vp;
891 	int sync;
892 {
893 	struct buf *bp, *nbp;
894 	int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
895 	int s;
896 
897 	simple_lock(&vp->v_interlock);
898 	(void) VOP_PUTPAGES(vp, 0, 0, flags);
899 
900 loop:
901 	s = splbio();
902 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
903 		nbp = LIST_NEXT(bp, b_vnbufs);
904 		simple_lock(&bp->b_interlock);
905 		if ((bp->b_flags & B_BUSY)) {
906 			simple_unlock(&bp->b_interlock);
907 			continue;
908 		}
909 		if ((bp->b_flags & B_DELWRI) == 0)
910 			panic("vflushbuf: not dirty, bp %p", bp);
911 		bp->b_flags |= B_BUSY | B_VFLUSH;
912 		simple_unlock(&bp->b_interlock);
913 		splx(s);
914 		/*
915 		 * Wait for I/O associated with indirect blocks to complete,
916 		 * since there is no way to quickly wait for them below.
917 		 */
918 		if (bp->b_vp == vp || sync == 0)
919 			(void) bawrite(bp);
920 		else
921 			(void) bwrite(bp);
922 		goto loop;
923 	}
924 	if (sync == 0) {
925 		splx(s);
926 		return;
927 	}
928 	simple_lock(&global_v_numoutput_slock);
929 	while (vp->v_numoutput) {
930 		vp->v_flag |= VBWAIT;
931 		ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
932 			&global_v_numoutput_slock);
933 	}
934 	simple_unlock(&global_v_numoutput_slock);
935 	splx(s);
936 	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
937 		vprint("vflushbuf: dirty", vp);
938 		goto loop;
939 	}
940 }
941 
942 /*
943  * Associate a buffer with a vnode.
944  */
945 void
946 bgetvp(vp, bp)
947 	struct vnode *vp;
948 	struct buf *bp;
949 {
950 	int s;
951 
952 	if (bp->b_vp)
953 		panic("bgetvp: not free, bp %p", bp);
954 	VHOLD(vp);
955 	s = splbio();
956 	bp->b_vp = vp;
957 	if (vp->v_type == VBLK || vp->v_type == VCHR)
958 		bp->b_dev = vp->v_rdev;
959 	else
960 		bp->b_dev = NODEV;
961 	/*
962 	 * Insert onto list for new vnode.
963 	 */
964 	bufinsvn(bp, &vp->v_cleanblkhd);
965 	splx(s);
966 }
967 
968 /*
969  * Disassociate a buffer from a vnode.
970  */
971 void
972 brelvp(bp)
973 	struct buf *bp;
974 {
975 	struct vnode *vp;
976 	int s;
977 
978 	if (bp->b_vp == NULL)
979 		panic("brelvp: vp NULL, bp %p", bp);
980 
981 	s = splbio();
982 	vp = bp->b_vp;
983 	/*
984 	 * Delete from old vnode list, if on one.
985 	 */
986 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
987 		bufremvn(bp);
988 
989 	if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
990 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
991 		vp->v_flag &= ~VONWORKLST;
992 		LIST_REMOVE(vp, v_synclist);
993 	}
994 
995 	bp->b_vp = NULL;
996 	HOLDRELE(vp);
997 	splx(s);
998 }
999 
1000 /*
1001  * Reassign a buffer from one vnode to another.
1002  * Used to assign file specific control information
1003  * (indirect blocks) to the vnode to which they belong.
1004  *
1005  * This function must be called at splbio().
1006  */
1007 void
1008 reassignbuf(bp, newvp)
1009 	struct buf *bp;
1010 	struct vnode *newvp;
1011 {
1012 	struct buflists *listheadp;
1013 	int delay;
1014 
1015 	/*
1016 	 * Delete from old vnode list, if on one.
1017 	 */
1018 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1019 		bufremvn(bp);
1020 	/*
1021 	 * If dirty, put on list of dirty buffers;
1022 	 * otherwise insert onto list of clean buffers.
1023 	 */
1024 	if ((bp->b_flags & B_DELWRI) == 0) {
1025 		listheadp = &newvp->v_cleanblkhd;
1026 		if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1027 		    (newvp->v_flag & VONWORKLST) &&
1028 		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1029 			newvp->v_flag &= ~VONWORKLST;
1030 			LIST_REMOVE(newvp, v_synclist);
1031 		}
1032 	} else {
1033 		listheadp = &newvp->v_dirtyblkhd;
1034 		if ((newvp->v_flag & VONWORKLST) == 0) {
1035 			switch (newvp->v_type) {
1036 			case VDIR:
1037 				delay = dirdelay;
1038 				break;
1039 			case VBLK:
1040 				if (newvp->v_specmountpoint != NULL) {
1041 					delay = metadelay;
1042 					break;
1043 				}
1044 				/* fall through */
1045 			default:
1046 				delay = filedelay;
1047 				break;
1048 			}
1049 			if (!newvp->v_mount ||
1050 			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1051 				vn_syncer_add_to_worklist(newvp, delay);
1052 		}
1053 	}
1054 	bufinsvn(bp, listheadp);
1055 }
1056 
1057 /*
1058  * Create a vnode for a block device.
1059  * Used for root filesystem and swap areas.
1060  * Also used for memory file system special devices.
1061  */
1062 int
1063 bdevvp(dev, vpp)
1064 	dev_t dev;
1065 	struct vnode **vpp;
1066 {
1067 
1068 	return (getdevvp(dev, vpp, VBLK));
1069 }
1070 
1071 /*
1072  * Create a vnode for a character device.
1073  * Used for kernfs and some console handling.
1074  */
1075 int
1076 cdevvp(dev, vpp)
1077 	dev_t dev;
1078 	struct vnode **vpp;
1079 {
1080 
1081 	return (getdevvp(dev, vpp, VCHR));
1082 }
1083 
1084 /*
1085  * Create a vnode for a device.
1086  * Used by bdevvp (block device) for root file system etc.,
1087  * and by cdevvp (character device) for console and kernfs.
1088  */
1089 int
1090 getdevvp(dev, vpp, type)
1091 	dev_t dev;
1092 	struct vnode **vpp;
1093 	enum vtype type;
1094 {
1095 	struct vnode *vp;
1096 	struct vnode *nvp;
1097 	int error;
1098 
1099 	if (dev == NODEV) {
1100 		*vpp = NULLVP;
1101 		return (0);
1102 	}
1103 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1104 	if (error) {
1105 		*vpp = NULLVP;
1106 		return (error);
1107 	}
1108 	vp = nvp;
1109 	vp->v_type = type;
1110 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1111 		vput(vp);
1112 		vp = nvp;
1113 	}
1114 	*vpp = vp;
1115 	return (0);
1116 }
1117 
1118 /*
1119  * Check to see if the new vnode represents a special device
1120  * for which we already have a vnode (either because of
1121  * bdevvp() or because of a different vnode representing
1122  * the same block device). If such an alias exists, deallocate
1123  * the existing contents and return the aliased vnode. The
1124  * caller is responsible for filling it with its new contents.
1125  */
1126 struct vnode *
1127 checkalias(nvp, nvp_rdev, mp)
1128 	struct vnode *nvp;
1129 	dev_t nvp_rdev;
1130 	struct mount *mp;
1131 {
1132 	struct proc *p = curproc;       /* XXX */
1133 	struct vnode *vp;
1134 	struct vnode **vpp;
1135 
1136 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1137 		return (NULLVP);
1138 
1139 	vpp = &speclisth[SPECHASH(nvp_rdev)];
1140 loop:
1141 	simple_lock(&spechash_slock);
1142 	for (vp = *vpp; vp; vp = vp->v_specnext) {
1143 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1144 			continue;
1145 		/*
1146 		 * Alias, but not in use, so flush it out.
1147 		 */
1148 		simple_lock(&vp->v_interlock);
1149 		if (vp->v_usecount == 0) {
1150 			simple_unlock(&spechash_slock);
1151 			vgonel(vp, p);
1152 			goto loop;
1153 		}
1154 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT)) {
1155 			simple_unlock(&spechash_slock);
1156 			goto loop;
1157 		}
1158 		break;
1159 	}
1160 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1161 		MALLOC(nvp->v_specinfo, struct specinfo *,
1162 			sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1163 		/* XXX Erg. */
1164 		if (nvp->v_specinfo == NULL) {
1165 			simple_unlock(&spechash_slock);
1166 			uvm_wait("checkalias");
1167 			goto loop;
1168 		}
1169 
1170 		nvp->v_rdev = nvp_rdev;
1171 		nvp->v_hashchain = vpp;
1172 		nvp->v_specnext = *vpp;
1173 		nvp->v_specmountpoint = NULL;
1174 		simple_unlock(&spechash_slock);
1175 		nvp->v_speclockf = NULL;
1176 		simple_lock_init(&nvp->v_spec_cow_slock);
1177 		SLIST_INIT(&nvp->v_spec_cow_head);
1178 		nvp->v_spec_cow_req = 0;
1179 		nvp->v_spec_cow_count = 0;
1180 
1181 		*vpp = nvp;
1182 		if (vp != NULLVP) {
1183 			nvp->v_flag |= VALIASED;
1184 			vp->v_flag |= VALIASED;
1185 			vput(vp);
1186 		}
1187 		return (NULLVP);
1188 	}
1189 	simple_unlock(&spechash_slock);
1190 	VOP_UNLOCK(vp, 0);
1191 	simple_lock(&vp->v_interlock);
1192 	vclean(vp, 0, p);
1193 	vp->v_op = nvp->v_op;
1194 	vp->v_tag = nvp->v_tag;
1195 	vp->v_vnlock = &vp->v_lock;
1196 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1197 	nvp->v_type = VNON;
1198 	insmntque(vp, mp);
1199 	return (vp);
1200 }
1201 
1202 /*
1203  * Grab a particular vnode from the free list, increment its
1204  * reference count and lock it. If the vnode lock bit is set the
1205  * vnode is being eliminated in vgone. In that case, we can not
1206  * grab the vnode, so the process is awakened when the transition is
1207  * completed, and an error returned to indicate that the vnode is no
1208  * longer usable (possibly having been changed to a new file system type).
1209  */
1210 int
1211 vget(vp, flags)
1212 	struct vnode *vp;
1213 	int flags;
1214 {
1215 	int error;
1216 
1217 	/*
1218 	 * If the vnode is in the process of being cleaned out for
1219 	 * another use, we wait for the cleaning to finish and then
1220 	 * return failure. Cleaning is determined by checking that
1221 	 * the VXLOCK flag is set.
1222 	 */
1223 
1224 	if ((flags & LK_INTERLOCK) == 0)
1225 		simple_lock(&vp->v_interlock);
1226 	if (vp->v_flag & VXLOCK) {
1227 		if (flags & LK_NOWAIT) {
1228 			simple_unlock(&vp->v_interlock);
1229 			return EBUSY;
1230 		}
1231 		vp->v_flag |= VXWANT;
1232 		ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1233 		return (ENOENT);
1234 	}
1235 	if (vp->v_usecount == 0) {
1236 		simple_lock(&vnode_free_list_slock);
1237 		if (vp->v_holdcnt > 0)
1238 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1239 		else
1240 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1241 		simple_unlock(&vnode_free_list_slock);
1242 	}
1243 	vp->v_usecount++;
1244 #ifdef DIAGNOSTIC
1245 	if (vp->v_usecount == 0) {
1246 		vprint("vget", vp);
1247 		panic("vget: usecount overflow, vp %p", vp);
1248 	}
1249 #endif
1250 	if (flags & LK_TYPE_MASK) {
1251 		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1252 			/*
1253 			 * must expand vrele here because we do not want
1254 			 * to call VOP_INACTIVE if the reference count
1255 			 * drops back to zero since it was never really
1256 			 * active. We must remove it from the free list
1257 			 * before sleeping so that multiple processes do
1258 			 * not try to recycle it.
1259 			 */
1260 			simple_lock(&vp->v_interlock);
1261 			vp->v_usecount--;
1262 			if (vp->v_usecount > 0) {
1263 				simple_unlock(&vp->v_interlock);
1264 				return (error);
1265 			}
1266 			/*
1267 			 * insert at tail of LRU list
1268 			 */
1269 			simple_lock(&vnode_free_list_slock);
1270 			if (vp->v_holdcnt > 0)
1271 				TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1272 				    v_freelist);
1273 			else
1274 				TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1275 				    v_freelist);
1276 			simple_unlock(&vnode_free_list_slock);
1277 			simple_unlock(&vp->v_interlock);
1278 		}
1279 		return (error);
1280 	}
1281 	simple_unlock(&vp->v_interlock);
1282 	return (0);
1283 }
1284 
1285 /*
1286  * vput(), just unlock and vrele()
1287  */
1288 void
1289 vput(vp)
1290 	struct vnode *vp;
1291 {
1292 	struct proc *p = curproc;	/* XXX */
1293 
1294 #ifdef DIAGNOSTIC
1295 	if (vp == NULL)
1296 		panic("vput: null vp");
1297 #endif
1298 	simple_lock(&vp->v_interlock);
1299 	vp->v_usecount--;
1300 	if (vp->v_usecount > 0) {
1301 		simple_unlock(&vp->v_interlock);
1302 		VOP_UNLOCK(vp, 0);
1303 		return;
1304 	}
1305 #ifdef DIAGNOSTIC
1306 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1307 		vprint("vput: bad ref count", vp);
1308 		panic("vput: ref cnt");
1309 	}
1310 #endif
1311 	/*
1312 	 * Insert at tail of LRU list.
1313 	 */
1314 	simple_lock(&vnode_free_list_slock);
1315 	if (vp->v_holdcnt > 0)
1316 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1317 	else
1318 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1319 	simple_unlock(&vnode_free_list_slock);
1320 	if (vp->v_flag & VEXECMAP) {
1321 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1322 		uvmexp.filepages += vp->v_uobj.uo_npages;
1323 	}
1324 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1325 	simple_unlock(&vp->v_interlock);
1326 	VOP_INACTIVE(vp, p);
1327 }
1328 
1329 /*
1330  * Vnode release.
1331  * If count drops to zero, call inactive routine and return to freelist.
1332  */
1333 void
1334 vrele(vp)
1335 	struct vnode *vp;
1336 {
1337 	struct proc *p = curproc;	/* XXX */
1338 
1339 #ifdef DIAGNOSTIC
1340 	if (vp == NULL)
1341 		panic("vrele: null vp");
1342 #endif
1343 	simple_lock(&vp->v_interlock);
1344 	vp->v_usecount--;
1345 	if (vp->v_usecount > 0) {
1346 		simple_unlock(&vp->v_interlock);
1347 		return;
1348 	}
1349 #ifdef DIAGNOSTIC
1350 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1351 		vprint("vrele: bad ref count", vp);
1352 		panic("vrele: ref cnt vp %p", vp);
1353 	}
1354 #endif
1355 	/*
1356 	 * Insert at tail of LRU list.
1357 	 */
1358 	simple_lock(&vnode_free_list_slock);
1359 	if (vp->v_holdcnt > 0)
1360 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1361 	else
1362 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1363 	simple_unlock(&vnode_free_list_slock);
1364 	if (vp->v_flag & VEXECMAP) {
1365 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1366 		uvmexp.filepages += vp->v_uobj.uo_npages;
1367 	}
1368 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1369 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1370 		VOP_INACTIVE(vp, p);
1371 }
1372 
1373 #ifdef DIAGNOSTIC
1374 /*
1375  * Page or buffer structure gets a reference.
1376  */
1377 void
1378 vholdl(vp)
1379 	struct vnode *vp;
1380 {
1381 
1382 	/*
1383 	 * If it is on the freelist and the hold count is currently
1384 	 * zero, move it to the hold list. The test of the back
1385 	 * pointer and the use reference count of zero is because
1386 	 * it will be removed from a free list by getnewvnode,
1387 	 * but will not have its reference count incremented until
1388 	 * after calling vgone. If the reference count were
1389 	 * incremented first, vgone would (incorrectly) try to
1390 	 * close the previous instance of the underlying object.
1391 	 * So, the back pointer is explicitly set to `0xdeadb' in
1392 	 * getnewvnode after removing it from a freelist to ensure
1393 	 * that we do not try to move it here.
1394 	 */
1395 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1396 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1397 		simple_lock(&vnode_free_list_slock);
1398 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1399 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1400 		simple_unlock(&vnode_free_list_slock);
1401 	}
1402 	vp->v_holdcnt++;
1403 }
1404 
1405 /*
1406  * Page or buffer structure frees a reference.
1407  */
1408 void
1409 holdrelel(vp)
1410 	struct vnode *vp;
1411 {
1412 
1413 	if (vp->v_holdcnt <= 0)
1414 		panic("holdrelel: holdcnt vp %p", vp);
1415 	vp->v_holdcnt--;
1416 
1417 	/*
1418 	 * If it is on the holdlist and the hold count drops to
1419 	 * zero, move it to the free list. The test of the back
1420 	 * pointer and the use reference count of zero is because
1421 	 * it will be removed from a free list by getnewvnode,
1422 	 * but will not have its reference count incremented until
1423 	 * after calling vgone. If the reference count were
1424 	 * incremented first, vgone would (incorrectly) try to
1425 	 * close the previous instance of the underlying object.
1426 	 * So, the back pointer is explicitly set to `0xdeadb' in
1427 	 * getnewvnode after removing it from a freelist to ensure
1428 	 * that we do not try to move it here.
1429 	 */
1430 
1431 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1432 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1433 		simple_lock(&vnode_free_list_slock);
1434 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1435 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1436 		simple_unlock(&vnode_free_list_slock);
1437 	}
1438 }
1439 
1440 /*
1441  * Vnode reference.
1442  */
1443 void
1444 vref(vp)
1445 	struct vnode *vp;
1446 {
1447 
1448 	simple_lock(&vp->v_interlock);
1449 	if (vp->v_usecount <= 0)
1450 		panic("vref used where vget required, vp %p", vp);
1451 	vp->v_usecount++;
1452 #ifdef DIAGNOSTIC
1453 	if (vp->v_usecount == 0) {
1454 		vprint("vref", vp);
1455 		panic("vref: usecount overflow, vp %p", vp);
1456 	}
1457 #endif
1458 	simple_unlock(&vp->v_interlock);
1459 }
1460 #endif /* DIAGNOSTIC */
1461 
1462 /*
1463  * Remove any vnodes in the vnode table belonging to mount point mp.
1464  *
1465  * If FORCECLOSE is not specified, there should not be any active ones,
1466  * return error if any are found (nb: this is a user error, not a
1467  * system error). If FORCECLOSE is specified, detach any active vnodes
1468  * that are found.
1469  *
1470  * If WRITECLOSE is set, only flush out regular file vnodes open for
1471  * writing.
1472  *
1473  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1474  */
1475 #ifdef DEBUG
1476 int busyprt = 0;	/* print out busy vnodes */
1477 struct ctldebug debug1 = { "busyprt", &busyprt };
1478 #endif
1479 
1480 int
1481 vflush(mp, skipvp, flags)
1482 	struct mount *mp;
1483 	struct vnode *skipvp;
1484 	int flags;
1485 {
1486 	struct proc *p = curproc;	/* XXX */
1487 	struct vnode *vp, *nvp;
1488 	int busy = 0;
1489 
1490 	simple_lock(&mntvnode_slock);
1491 loop:
1492 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1493 		if (vp->v_mount != mp)
1494 			goto loop;
1495 		nvp = LIST_NEXT(vp, v_mntvnodes);
1496 		/*
1497 		 * Skip over a selected vnode.
1498 		 */
1499 		if (vp == skipvp)
1500 			continue;
1501 		simple_lock(&vp->v_interlock);
1502 		/*
1503 		 * Skip over a vnodes marked VSYSTEM.
1504 		 */
1505 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1506 			simple_unlock(&vp->v_interlock);
1507 			continue;
1508 		}
1509 		/*
1510 		 * If WRITECLOSE is set, only flush out regular file
1511 		 * vnodes open for writing.
1512 		 */
1513 		if ((flags & WRITECLOSE) &&
1514 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1515 			simple_unlock(&vp->v_interlock);
1516 			continue;
1517 		}
1518 		/*
1519 		 * With v_usecount == 0, all we need to do is clear
1520 		 * out the vnode data structures and we are done.
1521 		 */
1522 		if (vp->v_usecount == 0) {
1523 			simple_unlock(&mntvnode_slock);
1524 			vgonel(vp, p);
1525 			simple_lock(&mntvnode_slock);
1526 			continue;
1527 		}
1528 		/*
1529 		 * If FORCECLOSE is set, forcibly close the vnode.
1530 		 * For block or character devices, revert to an
1531 		 * anonymous device. For all other files, just kill them.
1532 		 */
1533 		if (flags & FORCECLOSE) {
1534 			simple_unlock(&mntvnode_slock);
1535 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1536 				vgonel(vp, p);
1537 			} else {
1538 				vclean(vp, 0, p);
1539 				vp->v_op = spec_vnodeop_p;
1540 				insmntque(vp, (struct mount *)0);
1541 			}
1542 			simple_lock(&mntvnode_slock);
1543 			continue;
1544 		}
1545 #ifdef DEBUG
1546 		if (busyprt)
1547 			vprint("vflush: busy vnode", vp);
1548 #endif
1549 		simple_unlock(&vp->v_interlock);
1550 		busy++;
1551 	}
1552 	simple_unlock(&mntvnode_slock);
1553 	if (busy)
1554 		return (EBUSY);
1555 	return (0);
1556 }
1557 
1558 /*
1559  * Disassociate the underlying file system from a vnode.
1560  */
1561 void
1562 vclean(vp, flags, p)
1563 	struct vnode *vp;
1564 	int flags;
1565 	struct proc *p;
1566 {
1567 	struct mount *mp;
1568 	int active;
1569 
1570 	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1571 
1572 	/*
1573 	 * Check to see if the vnode is in use.
1574 	 * If so we have to reference it before we clean it out
1575 	 * so that its count cannot fall to zero and generate a
1576 	 * race against ourselves to recycle it.
1577 	 */
1578 
1579 	if ((active = vp->v_usecount) != 0) {
1580 		vp->v_usecount++;
1581 #ifdef DIAGNOSTIC
1582 		if (vp->v_usecount == 0) {
1583 			vprint("vclean", vp);
1584 			panic("vclean: usecount overflow");
1585 		}
1586 #endif
1587 	}
1588 
1589 	/*
1590 	 * Prevent the vnode from being recycled or
1591 	 * brought into use while we clean it out.
1592 	 */
1593 	if (vp->v_flag & VXLOCK)
1594 		panic("vclean: deadlock, vp %p", vp);
1595 	vp->v_flag |= VXLOCK;
1596 	if (vp->v_flag & VEXECMAP) {
1597 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1598 		uvmexp.filepages += vp->v_uobj.uo_npages;
1599 	}
1600 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1601 
1602 	/*
1603 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1604 	 * have the object locked while it cleans it out. The VOP_LOCK
1605 	 * ensures that the VOP_INACTIVE routine is done with its work.
1606 	 * For active vnodes, it ensures that no other activity can
1607 	 * occur while the underlying object is being cleaned out.
1608 	 */
1609 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1610 
1611 	/*
1612 	 * Clean out any cached data associated with the vnode.
1613 	 */
1614 	if (flags & DOCLOSE) {
1615 		int error;
1616 		vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1617 		error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1618 		vn_finished_write(mp, V_LOWER);
1619 		if (error)
1620 			error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
1621 		KASSERT(error == 0);
1622 		KASSERT((vp->v_flag & VONWORKLST) == 0);
1623 	}
1624 	LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1625 
1626 	/*
1627 	 * If purging an active vnode, it must be closed and
1628 	 * deactivated before being reclaimed. Note that the
1629 	 * VOP_INACTIVE will unlock the vnode.
1630 	 */
1631 	if (active) {
1632 		if (flags & DOCLOSE)
1633 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1634 		VOP_INACTIVE(vp, p);
1635 	} else {
1636 		/*
1637 		 * Any other processes trying to obtain this lock must first
1638 		 * wait for VXLOCK to clear, then call the new lock operation.
1639 		 */
1640 		VOP_UNLOCK(vp, 0);
1641 	}
1642 	/*
1643 	 * Reclaim the vnode.
1644 	 */
1645 	if (VOP_RECLAIM(vp, p))
1646 		panic("vclean: cannot reclaim, vp %p", vp);
1647 	if (active) {
1648 		/*
1649 		 * Inline copy of vrele() since VOP_INACTIVE
1650 		 * has already been called.
1651 		 */
1652 		simple_lock(&vp->v_interlock);
1653 		if (--vp->v_usecount <= 0) {
1654 #ifdef DIAGNOSTIC
1655 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1656 				vprint("vclean: bad ref count", vp);
1657 				panic("vclean: ref cnt");
1658 			}
1659 #endif
1660 			/*
1661 			 * Insert at tail of LRU list.
1662 			 */
1663 
1664 			simple_unlock(&vp->v_interlock);
1665 			simple_lock(&vnode_free_list_slock);
1666 #ifdef DIAGNOSTIC
1667 			if (vp->v_holdcnt > 0)
1668 				panic("vclean: not clean, vp %p", vp);
1669 #endif
1670 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1671 			simple_unlock(&vnode_free_list_slock);
1672 		} else
1673 			simple_unlock(&vp->v_interlock);
1674 	}
1675 
1676 	KASSERT(vp->v_uobj.uo_npages == 0);
1677 	cache_purge(vp);
1678 
1679 	/*
1680 	 * Done with purge, notify sleepers of the grim news.
1681 	 */
1682 	vp->v_op = dead_vnodeop_p;
1683 	vp->v_tag = VT_NON;
1684 	simple_lock(&vp->v_interlock);
1685 	VN_KNOTE(vp, NOTE_REVOKE);	/* FreeBSD has this in vn_pollgone() */
1686 	vp->v_flag &= ~VXLOCK;
1687 	if (vp->v_flag & VXWANT) {
1688 		vp->v_flag &= ~VXWANT;
1689 		simple_unlock(&vp->v_interlock);
1690 		wakeup((caddr_t)vp);
1691 	} else
1692 		simple_unlock(&vp->v_interlock);
1693 }
1694 
1695 /*
1696  * Recycle an unused vnode to the front of the free list.
1697  * Release the passed interlock if the vnode will be recycled.
1698  */
1699 int
1700 vrecycle(vp, inter_lkp, p)
1701 	struct vnode *vp;
1702 	struct simplelock *inter_lkp;
1703 	struct proc *p;
1704 {
1705 
1706 	simple_lock(&vp->v_interlock);
1707 	if (vp->v_usecount == 0) {
1708 		if (inter_lkp)
1709 			simple_unlock(inter_lkp);
1710 		vgonel(vp, p);
1711 		return (1);
1712 	}
1713 	simple_unlock(&vp->v_interlock);
1714 	return (0);
1715 }
1716 
1717 /*
1718  * Eliminate all activity associated with a vnode
1719  * in preparation for reuse.
1720  */
1721 void
1722 vgone(vp)
1723 	struct vnode *vp;
1724 {
1725 	struct proc *p = curproc;	/* XXX */
1726 
1727 	simple_lock(&vp->v_interlock);
1728 	vgonel(vp, p);
1729 }
1730 
1731 /*
1732  * vgone, with the vp interlock held.
1733  */
1734 void
1735 vgonel(vp, p)
1736 	struct vnode *vp;
1737 	struct proc *p;
1738 {
1739 	struct vnode *vq;
1740 	struct vnode *vx;
1741 
1742 	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1743 
1744 	/*
1745 	 * If a vgone (or vclean) is already in progress,
1746 	 * wait until it is done and return.
1747 	 */
1748 
1749 	if (vp->v_flag & VXLOCK) {
1750 		vp->v_flag |= VXWANT;
1751 		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1752 		return;
1753 	}
1754 
1755 	/*
1756 	 * Clean out the filesystem specific data.
1757 	 */
1758 
1759 	vclean(vp, DOCLOSE, p);
1760 	KASSERT((vp->v_flag & VONWORKLST) == 0);
1761 
1762 	/*
1763 	 * Delete from old mount point vnode list, if on one.
1764 	 */
1765 
1766 	if (vp->v_mount != NULL)
1767 		insmntque(vp, (struct mount *)0);
1768 
1769 	/*
1770 	 * If special device, remove it from special device alias list.
1771 	 * if it is on one.
1772 	 */
1773 
1774 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1775 		simple_lock(&spechash_slock);
1776 		if (vp->v_hashchain != NULL) {
1777 			if (*vp->v_hashchain == vp) {
1778 				*vp->v_hashchain = vp->v_specnext;
1779 			} else {
1780 				for (vq = *vp->v_hashchain; vq;
1781 							vq = vq->v_specnext) {
1782 					if (vq->v_specnext != vp)
1783 						continue;
1784 					vq->v_specnext = vp->v_specnext;
1785 					break;
1786 				}
1787 				if (vq == NULL)
1788 					panic("missing bdev");
1789 			}
1790 			if (vp->v_flag & VALIASED) {
1791 				vx = NULL;
1792 				for (vq = *vp->v_hashchain; vq;
1793 							vq = vq->v_specnext) {
1794 					if (vq->v_rdev != vp->v_rdev ||
1795 					    vq->v_type != vp->v_type)
1796 						continue;
1797 					if (vx)
1798 						break;
1799 					vx = vq;
1800 				}
1801 				if (vx == NULL)
1802 					panic("missing alias");
1803 				if (vq == NULL)
1804 					vx->v_flag &= ~VALIASED;
1805 				vp->v_flag &= ~VALIASED;
1806 			}
1807 		}
1808 		simple_unlock(&spechash_slock);
1809 		FREE(vp->v_specinfo, M_VNODE);
1810 		vp->v_specinfo = NULL;
1811 	}
1812 
1813 	/*
1814 	 * The test of the back pointer and the reference count of
1815 	 * zero is because it will be removed from the free list by
1816 	 * getcleanvnode, but will not have its reference count
1817 	 * incremented until after calling vgone. If the reference
1818 	 * count were incremented first, vgone would (incorrectly)
1819 	 * try to close the previous instance of the underlying object.
1820 	 * So, the back pointer is explicitly set to `0xdeadb' in
1821 	 * getnewvnode after removing it from the freelist to ensure
1822 	 * that we do not try to move it here.
1823 	 */
1824 
1825 	vp->v_type = VBAD;
1826 	if (vp->v_usecount == 0) {
1827 		boolean_t dofree;
1828 
1829 		simple_lock(&vnode_free_list_slock);
1830 		if (vp->v_holdcnt > 0)
1831 			panic("vgonel: not clean, vp %p", vp);
1832 		/*
1833 		 * if it isn't on the freelist, we're called by getcleanvnode
1834 		 * and vnode is being re-used.  otherwise, we'll free it.
1835 		 */
1836 		dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1837 		if (dofree) {
1838 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1839 			numvnodes--;
1840 		}
1841 		simple_unlock(&vnode_free_list_slock);
1842 		if (dofree)
1843 			pool_put(&vnode_pool, vp);
1844 	}
1845 }
1846 
1847 /*
1848  * Lookup a vnode by device number.
1849  */
1850 int
1851 vfinddev(dev, type, vpp)
1852 	dev_t dev;
1853 	enum vtype type;
1854 	struct vnode **vpp;
1855 {
1856 	struct vnode *vp;
1857 	int rc = 0;
1858 
1859 	simple_lock(&spechash_slock);
1860 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1861 		if (dev != vp->v_rdev || type != vp->v_type)
1862 			continue;
1863 		*vpp = vp;
1864 		rc = 1;
1865 		break;
1866 	}
1867 	simple_unlock(&spechash_slock);
1868 	return (rc);
1869 }
1870 
1871 /*
1872  * Revoke all the vnodes corresponding to the specified minor number
1873  * range (endpoints inclusive) of the specified major.
1874  */
1875 void
1876 vdevgone(maj, minl, minh, type)
1877 	int maj, minl, minh;
1878 	enum vtype type;
1879 {
1880 	struct vnode *vp;
1881 	int mn;
1882 
1883 	for (mn = minl; mn <= minh; mn++)
1884 		if (vfinddev(makedev(maj, mn), type, &vp))
1885 			VOP_REVOKE(vp, REVOKEALL);
1886 }
1887 
1888 /*
1889  * Calculate the total number of references to a special device.
1890  */
1891 int
1892 vcount(vp)
1893 	struct vnode *vp;
1894 {
1895 	struct vnode *vq, *vnext;
1896 	int count;
1897 
1898 loop:
1899 	if ((vp->v_flag & VALIASED) == 0)
1900 		return (vp->v_usecount);
1901 	simple_lock(&spechash_slock);
1902 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1903 		vnext = vq->v_specnext;
1904 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1905 			continue;
1906 		/*
1907 		 * Alias, but not in use, so flush it out.
1908 		 */
1909 		if (vq->v_usecount == 0 && vq != vp &&
1910 		    (vq->v_flag & VXLOCK) == 0) {
1911 			simple_unlock(&spechash_slock);
1912 			vgone(vq);
1913 			goto loop;
1914 		}
1915 		count += vq->v_usecount;
1916 	}
1917 	simple_unlock(&spechash_slock);
1918 	return (count);
1919 }
1920 
1921 /*
1922  * Print out a description of a vnode.
1923  */
1924 const char * const vnode_types[] = {
1925 	"VNON",
1926 	"VREG",
1927 	"VDIR",
1928 	"VBLK",
1929 	"VCHR",
1930 	"VLNK",
1931 	"VSOCK",
1932 	"VFIFO",
1933 	"VBAD"
1934 };
1935 
1936 void
1937 vprint(label, vp)
1938 	char *label;
1939 	struct vnode *vp;
1940 {
1941 	char buf[96];
1942 
1943 	if (label != NULL)
1944 		printf("%s: ", label);
1945 	printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1946 	    vp->v_tag, vnode_types[vp->v_type],
1947 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1948 	buf[0] = '\0';
1949 	if (vp->v_flag & VROOT)
1950 		strlcat(buf, "|VROOT", sizeof(buf));
1951 	if (vp->v_flag & VTEXT)
1952 		strlcat(buf, "|VTEXT", sizeof(buf));
1953 	if (vp->v_flag & VEXECMAP)
1954 		strlcat(buf, "|VEXECMAP", sizeof(buf));
1955 	if (vp->v_flag & VSYSTEM)
1956 		strlcat(buf, "|VSYSTEM", sizeof(buf));
1957 	if (vp->v_flag & VXLOCK)
1958 		strlcat(buf, "|VXLOCK", sizeof(buf));
1959 	if (vp->v_flag & VXWANT)
1960 		strlcat(buf, "|VXWANT", sizeof(buf));
1961 	if (vp->v_flag & VBWAIT)
1962 		strlcat(buf, "|VBWAIT", sizeof(buf));
1963 	if (vp->v_flag & VALIASED)
1964 		strlcat(buf, "|VALIASED", sizeof(buf));
1965 	if (buf[0] != '\0')
1966 		printf(" flags (%s)", &buf[1]);
1967 	if (vp->v_data == NULL) {
1968 		printf("\n");
1969 	} else {
1970 		printf("\n\t");
1971 		VOP_PRINT(vp);
1972 	}
1973 }
1974 
1975 #ifdef DEBUG
1976 /*
1977  * List all of the locked vnodes in the system.
1978  * Called when debugging the kernel.
1979  */
1980 void
1981 printlockedvnodes()
1982 {
1983 	struct mount *mp, *nmp;
1984 	struct vnode *vp;
1985 
1986 	printf("Locked vnodes\n");
1987 	simple_lock(&mountlist_slock);
1988 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1989 	     mp = nmp) {
1990 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1991 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1992 			continue;
1993 		}
1994 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1995 			if (VOP_ISLOCKED(vp))
1996 				vprint(NULL, vp);
1997 		}
1998 		simple_lock(&mountlist_slock);
1999 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
2000 		vfs_unbusy(mp);
2001 	}
2002 	simple_unlock(&mountlist_slock);
2003 }
2004 #endif
2005 
2006 /*
2007  * sysctl helper routine for vfs.generic.conf lookups.
2008  */
2009 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2010 static int
2011 sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
2012 {
2013         struct vfsconf vfc;
2014         extern const char * const mountcompatnames[];
2015         extern int nmountcompatnames;
2016 	struct sysctlnode node;
2017 	struct vfsops *vfsp;
2018 	u_int vfsnum;
2019 
2020 	if (namelen != 1)
2021 		return (ENOTDIR);
2022 	vfsnum = name[0];
2023 	if (vfsnum >= nmountcompatnames ||
2024 	    mountcompatnames[vfsnum] == NULL)
2025 		return (EOPNOTSUPP);
2026 	vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
2027 	if (vfsp == NULL)
2028 		return (EOPNOTSUPP);
2029 
2030 	vfc.vfc_vfsops = vfsp;
2031 	strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
2032 	vfc.vfc_typenum = vfsnum;
2033 	vfc.vfc_refcount = vfsp->vfs_refcount;
2034 	vfc.vfc_flags = 0;
2035 	vfc.vfc_mountroot = vfsp->vfs_mountroot;
2036 	vfc.vfc_next = NULL;
2037 
2038 	node = *rnode;
2039 	node.sysctl_data = &vfc;
2040 	return (sysctl_lookup(SYSCTLFN_CALL(&node)));
2041 }
2042 #endif
2043 
2044 /*
2045  * sysctl helper routine to return list of supported fstypes
2046  */
2047 static int
2048 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2049 {
2050 	char buf[MFSNAMELEN];
2051 	char *where = oldp;
2052 	struct vfsops *v;
2053 	size_t needed, left, slen;
2054 	int error, first;
2055 
2056 	if (newp != NULL)
2057 		return (EPERM);
2058 	if (namelen != 0)
2059 		return (EINVAL);
2060 
2061 	first = 1;
2062 	error = 0;
2063 	needed = 0;
2064 	left = *oldlenp;
2065 
2066 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2067 		if (where == NULL)
2068 			needed += strlen(v->vfs_name) + 1;
2069 		else {
2070 			memset(buf, 0, sizeof(buf));
2071 			if (first) {
2072 				strncpy(buf, v->vfs_name, sizeof(buf));
2073 				first = 0;
2074 			} else {
2075 				buf[0] = ' ';
2076 				strncpy(buf + 1, v->vfs_name, sizeof(buf) - 1);
2077 			}
2078 			buf[sizeof(buf)-1] = '\0';
2079 			slen = strlen(buf);
2080 			if (left < slen + 1)
2081 				break;
2082 			/* +1 to copy out the trailing NUL byte */
2083 			error = copyout(buf, where, slen + 1);
2084 			if (error)
2085 				break;
2086 			where += slen;
2087 			needed += slen;
2088 			left -= slen;
2089 		}
2090 	}
2091 	*oldlenp = needed;
2092 	return (error);
2093 }
2094 
2095 /*
2096  * Top level filesystem related information gathering.
2097  */
2098 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
2099 {
2100 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2101 	extern int nmountcompatnames;
2102 #endif
2103 
2104 	sysctl_createv(clog, 0, NULL, NULL,
2105 		       CTLFLAG_PERMANENT,
2106 		       CTLTYPE_NODE, "vfs", NULL,
2107 		       NULL, 0, NULL, 0,
2108 		       CTL_VFS, CTL_EOL);
2109 	sysctl_createv(clog, 0, NULL, NULL,
2110 		       CTLFLAG_PERMANENT,
2111 		       CTLTYPE_NODE, "generic",
2112 		       SYSCTL_DESCR("Non-specific vfs related information"),
2113 		       NULL, 0, NULL, 0,
2114 		       CTL_VFS, VFS_GENERIC, CTL_EOL);
2115 
2116 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2117 	sysctl_createv(clog, 0, NULL, NULL,
2118 		       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
2119 		       CTLTYPE_INT, "maxtypenum",
2120 		       SYSCTL_DESCR("Highest valid filesystem type number"),
2121 		       NULL, nmountcompatnames, NULL, 0,
2122 		       CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
2123 #endif
2124 	sysctl_createv(clog, 0, NULL, NULL,
2125 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2126 		       CTLTYPE_INT, "usermount",
2127 		       SYSCTL_DESCR("Whether unprivileged users may mount "
2128 				    "filesystems"),
2129 		       NULL, 0, &dovfsusermount, 0,
2130 		       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
2131 	sysctl_createv(clog, 0, NULL, NULL,
2132 		       CTLFLAG_PERMANENT,
2133 		       CTLTYPE_STRING, "fstypes",
2134 		       SYSCTL_DESCR("List of file systems present"),
2135 		       sysctl_vfs_generic_fstypes, 0, NULL, 0,
2136 		       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
2137 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2138 	sysctl_createv(clog, 0, NULL, NULL,
2139 		       CTLFLAG_PERMANENT,
2140 		       CTLTYPE_STRUCT, "conf",
2141 		       SYSCTL_DESCR("Filesystem configuration information"),
2142 		       sysctl_vfs_generic_conf, 0, NULL,
2143 		       sizeof(struct vfsconf),
2144 		       CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
2145 #endif
2146 }
2147 
2148 
2149 int kinfo_vdebug = 1;
2150 int kinfo_vgetfailed;
2151 #define KINFO_VNODESLOP	10
2152 /*
2153  * Dump vnode list (via sysctl).
2154  * Copyout address of vnode followed by vnode.
2155  */
2156 /* ARGSUSED */
2157 int
2158 sysctl_kern_vnode(SYSCTLFN_ARGS)
2159 {
2160 	char *where = oldp;
2161 	size_t *sizep = oldlenp;
2162 	struct mount *mp, *nmp;
2163 	struct vnode *nvp, *vp;
2164 	char *bp = where, *savebp;
2165 	char *ewhere;
2166 	int error;
2167 
2168 	if (namelen != 0)
2169 		return (EOPNOTSUPP);
2170 	if (newp != NULL)
2171 		return (EPERM);
2172 
2173 #define VPTRSZ	sizeof(struct vnode *)
2174 #define VNODESZ	sizeof(struct vnode)
2175 	if (where == NULL) {
2176 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2177 		return (0);
2178 	}
2179 	ewhere = where + *sizep;
2180 
2181 	simple_lock(&mountlist_slock);
2182 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2183 	     mp = nmp) {
2184 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
2185 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
2186 			continue;
2187 		}
2188 		savebp = bp;
2189 again:
2190 		simple_lock(&mntvnode_slock);
2191 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
2192 		     vp != NULL;
2193 		     vp = nvp) {
2194 			/*
2195 			 * Check that the vp is still associated with
2196 			 * this filesystem.  RACE: could have been
2197 			 * recycled onto the same filesystem.
2198 			 */
2199 			if (vp->v_mount != mp) {
2200 				simple_unlock(&mntvnode_slock);
2201 				if (kinfo_vdebug)
2202 					printf("kinfo: vp changed\n");
2203 				bp = savebp;
2204 				goto again;
2205 			}
2206 			nvp = LIST_NEXT(vp, v_mntvnodes);
2207 			if (bp + VPTRSZ + VNODESZ > ewhere) {
2208 				simple_unlock(&mntvnode_slock);
2209 				*sizep = bp - where;
2210 				return (ENOMEM);
2211 			}
2212 			simple_unlock(&mntvnode_slock);
2213 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2214 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2215 				return (error);
2216 			bp += VPTRSZ + VNODESZ;
2217 			simple_lock(&mntvnode_slock);
2218 		}
2219 		simple_unlock(&mntvnode_slock);
2220 		simple_lock(&mountlist_slock);
2221 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
2222 		vfs_unbusy(mp);
2223 	}
2224 	simple_unlock(&mountlist_slock);
2225 
2226 	*sizep = bp - where;
2227 	return (0);
2228 }
2229 
2230 /*
2231  * Check to see if a filesystem is mounted on a block device.
2232  */
2233 int
2234 vfs_mountedon(vp)
2235 	struct vnode *vp;
2236 {
2237 	struct vnode *vq;
2238 	int error = 0;
2239 
2240 	if (vp->v_specmountpoint != NULL)
2241 		return (EBUSY);
2242 	if (vp->v_flag & VALIASED) {
2243 		simple_lock(&spechash_slock);
2244 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2245 			if (vq->v_rdev != vp->v_rdev ||
2246 			    vq->v_type != vp->v_type)
2247 				continue;
2248 			if (vq->v_specmountpoint != NULL) {
2249 				error = EBUSY;
2250 				break;
2251 			}
2252 		}
2253 		simple_unlock(&spechash_slock);
2254 	}
2255 	return (error);
2256 }
2257 
2258 static int
2259 sacheck(struct sockaddr *sa)
2260 {
2261 	switch (sa->sa_family) {
2262 #ifdef INET
2263 	case AF_INET: {
2264 		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2265 		char *p = (char *)sin->sin_zero;
2266 		size_t i;
2267 
2268 		if (sin->sin_len != sizeof(*sin))
2269 			return -1;
2270 		if (sin->sin_port != 0)
2271 			return -1;
2272 		for (i = 0; i < sizeof(sin->sin_zero); i++)
2273 			if (*p++ != '\0')
2274 				return -1;
2275 		return 0;
2276 	}
2277 #endif
2278 #ifdef INET6
2279 	case AF_INET6: {
2280 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2281 
2282 		if (sin6->sin6_len != sizeof(*sin6))
2283 			return -1;
2284 		if (sin6->sin6_port != 0)
2285 			return -1;
2286 		return 0;
2287 	}
2288 #endif
2289 	default:
2290 		return -1;
2291 	}
2292 }
2293 
2294 /*
2295  * Build hash lists of net addresses and hang them off the mount point.
2296  * Called by ufs_mount() to set up the lists of export addresses.
2297  */
2298 static int
2299 vfs_hang_addrlist(mp, nep, argp)
2300 	struct mount *mp;
2301 	struct netexport *nep;
2302 	struct export_args *argp;
2303 {
2304 	struct netcred *np, *enp;
2305 	struct radix_node_head *rnh;
2306 	int i;
2307 	struct sockaddr *saddr, *smask = 0;
2308 	struct domain *dom;
2309 	int error;
2310 
2311 	if (argp->ex_addrlen == 0) {
2312 		if (mp->mnt_flag & MNT_DEFEXPORTED)
2313 			return (EPERM);
2314 		np = &nep->ne_defexported;
2315 		np->netc_exflags = argp->ex_flags;
2316 		crcvt(&np->netc_anon, &argp->ex_anon);
2317 		np->netc_anon.cr_ref = 1;
2318 		mp->mnt_flag |= MNT_DEFEXPORTED;
2319 		return (0);
2320 	}
2321 
2322 	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
2323 		return (EINVAL);
2324 
2325 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2326 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
2327 	memset((caddr_t)np, 0, i);
2328 	saddr = (struct sockaddr *)(np + 1);
2329 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2330 	if (error)
2331 		goto out;
2332 	if (saddr->sa_len > argp->ex_addrlen)
2333 		saddr->sa_len = argp->ex_addrlen;
2334 	if (sacheck(saddr) == -1)
2335 		return EINVAL;
2336 	if (argp->ex_masklen) {
2337 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2338 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
2339 		if (error)
2340 			goto out;
2341 		if (smask->sa_len > argp->ex_masklen)
2342 			smask->sa_len = argp->ex_masklen;
2343 		if (smask->sa_family != saddr->sa_family)
2344 			return EINVAL;
2345 		if (sacheck(smask) == -1)
2346 			return EINVAL;
2347 	}
2348 	i = saddr->sa_family;
2349 	if ((rnh = nep->ne_rtable[i]) == 0) {
2350 		/*
2351 		 * Seems silly to initialize every AF when most are not
2352 		 * used, do so on demand here
2353 		 */
2354 		for (dom = domains; dom; dom = dom->dom_next)
2355 			if (dom->dom_family == i && dom->dom_rtattach) {
2356 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
2357 					dom->dom_rtoffset);
2358 				break;
2359 			}
2360 		if ((rnh = nep->ne_rtable[i]) == 0) {
2361 			error = ENOBUFS;
2362 			goto out;
2363 		}
2364 	}
2365 
2366 	enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
2367 	    np->netc_rnodes);
2368 	if (enp != np) {
2369 		if (enp == NULL) {
2370 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2371 			    smask, rnh);
2372 			if (enp == NULL) {
2373 				error = EPERM;
2374 				goto out;
2375 			}
2376 		} else
2377 			enp->netc_refcnt++;
2378 
2379 		goto check;
2380 	} else
2381 		enp->netc_refcnt = 1;
2382 
2383 	np->netc_exflags = argp->ex_flags;
2384 	crcvt(&np->netc_anon, &argp->ex_anon);
2385 	np->netc_anon.cr_ref = 1;
2386 	return 0;
2387 check:
2388 	if (enp->netc_exflags != argp->ex_flags ||
2389 	    crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
2390 		error = EPERM;
2391 	else
2392 		error = 0;
2393 out:
2394 	free(np, M_NETADDR);
2395 	return error;
2396 }
2397 
2398 /* ARGSUSED */
2399 static int
2400 vfs_free_netcred(rn, w)
2401 	struct radix_node *rn;
2402 	void *w;
2403 {
2404 	struct radix_node_head *rnh = (struct radix_node_head *)w;
2405 	struct netcred *np = (struct netcred *)(void *)rn;
2406 
2407 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2408 	if (--(np->netc_refcnt) <= 0)
2409 		free(np, M_NETADDR);
2410 	return (0);
2411 }
2412 
2413 /*
2414  * Free the net address hash lists that are hanging off the mount points.
2415  */
2416 static void
2417 vfs_free_addrlist(nep)
2418 	struct netexport *nep;
2419 {
2420 	int i;
2421 	struct radix_node_head *rnh;
2422 
2423 	for (i = 0; i <= AF_MAX; i++)
2424 		if ((rnh = nep->ne_rtable[i]) != NULL) {
2425 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
2426 			free((caddr_t)rnh, M_RTABLE);
2427 			nep->ne_rtable[i] = 0;
2428 		}
2429 }
2430 
2431 int
2432 vfs_export(mp, nep, argp)
2433 	struct mount *mp;
2434 	struct netexport *nep;
2435 	struct export_args *argp;
2436 {
2437 	int error;
2438 
2439 	if (argp->ex_flags & MNT_DELEXPORT) {
2440 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2441 			vfs_setpublicfs(NULL, NULL, NULL);
2442 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2443 		}
2444 		vfs_free_addrlist(nep);
2445 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2446 	}
2447 	if (argp->ex_flags & MNT_EXPORTED) {
2448 		if (argp->ex_flags & MNT_EXPUBLIC) {
2449 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2450 				return (error);
2451 			mp->mnt_flag |= MNT_EXPUBLIC;
2452 		}
2453 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
2454 			return (error);
2455 		mp->mnt_flag |= MNT_EXPORTED;
2456 	}
2457 	return (0);
2458 }
2459 
2460 /*
2461  * Set the publicly exported filesystem (WebNFS). Currently, only
2462  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2463  */
2464 int
2465 vfs_setpublicfs(mp, nep, argp)
2466 	struct mount *mp;
2467 	struct netexport *nep;
2468 	struct export_args *argp;
2469 {
2470 	int error;
2471 	struct vnode *rvp;
2472 	char *cp;
2473 
2474 	/*
2475 	 * mp == NULL -> invalidate the current info, the FS is
2476 	 * no longer exported. May be called from either vfs_export
2477 	 * or unmount, so check if it hasn't already been done.
2478 	 */
2479 	if (mp == NULL) {
2480 		if (nfs_pub.np_valid) {
2481 			nfs_pub.np_valid = 0;
2482 			if (nfs_pub.np_index != NULL) {
2483 				FREE(nfs_pub.np_index, M_TEMP);
2484 				nfs_pub.np_index = NULL;
2485 			}
2486 		}
2487 		return (0);
2488 	}
2489 
2490 	/*
2491 	 * Only one allowed at a time.
2492 	 */
2493 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2494 		return (EBUSY);
2495 
2496 	/*
2497 	 * Get real filehandle for root of exported FS.
2498 	 */
2499 	memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
2500 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsidx;
2501 
2502 	if ((error = VFS_ROOT(mp, &rvp)))
2503 		return (error);
2504 
2505 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2506 		return (error);
2507 
2508 	vput(rvp);
2509 
2510 	/*
2511 	 * If an indexfile was specified, pull it in.
2512 	 */
2513 	if (argp->ex_indexfile != NULL) {
2514 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2515 		    M_WAITOK);
2516 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2517 		    MAXNAMLEN, (size_t *)0);
2518 		if (!error) {
2519 			/*
2520 			 * Check for illegal filenames.
2521 			 */
2522 			for (cp = nfs_pub.np_index; *cp; cp++) {
2523 				if (*cp == '/') {
2524 					error = EINVAL;
2525 					break;
2526 				}
2527 			}
2528 		}
2529 		if (error) {
2530 			FREE(nfs_pub.np_index, M_TEMP);
2531 			return (error);
2532 		}
2533 	}
2534 
2535 	nfs_pub.np_mount = mp;
2536 	nfs_pub.np_valid = 1;
2537 	return (0);
2538 }
2539 
2540 struct netcred *
2541 vfs_export_lookup(mp, nep, nam)
2542 	struct mount *mp;
2543 	struct netexport *nep;
2544 	struct mbuf *nam;
2545 {
2546 	struct netcred *np;
2547 	struct radix_node_head *rnh;
2548 	struct sockaddr *saddr;
2549 
2550 	np = NULL;
2551 	if (mp->mnt_flag & MNT_EXPORTED) {
2552 		/*
2553 		 * Lookup in the export list first.
2554 		 */
2555 		if (nam != NULL) {
2556 			saddr = mtod(nam, struct sockaddr *);
2557 			rnh = nep->ne_rtable[saddr->sa_family];
2558 			if (rnh != NULL) {
2559 				np = (struct netcred *)
2560 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2561 							      rnh);
2562 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2563 					np = NULL;
2564 			}
2565 		}
2566 		/*
2567 		 * If no address match, use the default if it exists.
2568 		 */
2569 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2570 			np = &nep->ne_defexported;
2571 	}
2572 	return (np);
2573 }
2574 
2575 /*
2576  * Do the usual access checking.
2577  * file_mode, uid and gid are from the vnode in question,
2578  * while acc_mode and cred are from the VOP_ACCESS parameter list
2579  */
2580 int
2581 vaccess(type, file_mode, uid, gid, acc_mode, cred)
2582 	enum vtype type;
2583 	mode_t file_mode;
2584 	uid_t uid;
2585 	gid_t gid;
2586 	mode_t acc_mode;
2587 	struct ucred *cred;
2588 {
2589 	mode_t mask;
2590 
2591 	/*
2592 	 * Super-user always gets read/write access, but execute access depends
2593 	 * on at least one execute bit being set.
2594 	 */
2595 	if (cred->cr_uid == 0) {
2596 		if ((acc_mode & VEXEC) && type != VDIR &&
2597 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2598 			return (EACCES);
2599 		return (0);
2600 	}
2601 
2602 	mask = 0;
2603 
2604 	/* Otherwise, check the owner. */
2605 	if (cred->cr_uid == uid) {
2606 		if (acc_mode & VEXEC)
2607 			mask |= S_IXUSR;
2608 		if (acc_mode & VREAD)
2609 			mask |= S_IRUSR;
2610 		if (acc_mode & VWRITE)
2611 			mask |= S_IWUSR;
2612 		return ((file_mode & mask) == mask ? 0 : EACCES);
2613 	}
2614 
2615 	/* Otherwise, check the groups. */
2616 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
2617 		if (acc_mode & VEXEC)
2618 			mask |= S_IXGRP;
2619 		if (acc_mode & VREAD)
2620 			mask |= S_IRGRP;
2621 		if (acc_mode & VWRITE)
2622 			mask |= S_IWGRP;
2623 		return ((file_mode & mask) == mask ? 0 : EACCES);
2624 	}
2625 
2626 	/* Otherwise, check everyone else. */
2627 	if (acc_mode & VEXEC)
2628 		mask |= S_IXOTH;
2629 	if (acc_mode & VREAD)
2630 		mask |= S_IROTH;
2631 	if (acc_mode & VWRITE)
2632 		mask |= S_IWOTH;
2633 	return ((file_mode & mask) == mask ? 0 : EACCES);
2634 }
2635 
2636 /*
2637  * Unmount all file systems.
2638  * We traverse the list in reverse order under the assumption that doing so
2639  * will avoid needing to worry about dependencies.
2640  */
2641 void
2642 vfs_unmountall(p)
2643 	struct proc *p;
2644 {
2645 	struct mount *mp, *nmp;
2646 	int allerror, error;
2647 
2648 	for (allerror = 0,
2649 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2650 		nmp = mp->mnt_list.cqe_prev;
2651 #ifdef DEBUG
2652 		printf("unmounting %s (%s)...\n",
2653 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2654 #endif
2655 		/*
2656 		 * XXX Freeze syncer.  Must do this before locking the
2657 		 * mount point.  See dounmount() for details.
2658 		 */
2659 		lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2660 		if (vfs_busy(mp, 0, 0)) {
2661 			lockmgr(&syncer_lock, LK_RELEASE, NULL);
2662 			continue;
2663 		}
2664 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2665 			printf("unmount of %s failed with error %d\n",
2666 			    mp->mnt_stat.f_mntonname, error);
2667 			allerror = 1;
2668 		}
2669 	}
2670 	if (allerror)
2671 		printf("WARNING: some file systems would not unmount\n");
2672 }
2673 
2674 extern struct simplelock bqueue_slock; /* XXX */
2675 
2676 /*
2677  * Sync and unmount file systems before shutting down.
2678  */
2679 void
2680 vfs_shutdown()
2681 {
2682 	struct lwp *l = curlwp;
2683 	struct proc *p;
2684 
2685 	/* XXX we're certainly not running in proc0's context! */
2686 	if (l == NULL || (p = l->l_proc) == NULL)
2687 		p = &proc0;
2688 
2689 	printf("syncing disks... ");
2690 
2691 	/* remove user process from run queue */
2692 	suspendsched();
2693 	(void) spl0();
2694 
2695 	/* avoid coming back this way again if we panic. */
2696 	doing_shutdown = 1;
2697 
2698 	sys_sync(l, NULL, NULL);
2699 
2700 	/* Wait for sync to finish. */
2701 	if (buf_syncwait() != 0) {
2702 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2703 		Debugger();
2704 #endif
2705 		printf("giving up\n");
2706 		return;
2707 	} else
2708 		printf("done\n");
2709 
2710 	/*
2711 	 * If we've panic'd, don't make the situation potentially
2712 	 * worse by unmounting the file systems.
2713 	 */
2714 	if (panicstr != NULL)
2715 		return;
2716 
2717 	/* Release inodes held by texts before update. */
2718 #ifdef notdef
2719 	vnshutdown();
2720 #endif
2721 	/* Unmount file systems. */
2722 	vfs_unmountall(p);
2723 }
2724 
2725 /*
2726  * Mount the root file system.  If the operator didn't specify a
2727  * file system to use, try all possible file systems until one
2728  * succeeds.
2729  */
2730 int
2731 vfs_mountroot()
2732 {
2733 	struct vfsops *v;
2734 
2735 	if (root_device == NULL)
2736 		panic("vfs_mountroot: root device unknown");
2737 
2738 	switch (root_device->dv_class) {
2739 	case DV_IFNET:
2740 		if (rootdev != NODEV)
2741 			panic("vfs_mountroot: rootdev set for DV_IFNET "
2742 			    "(0x%08x -> %d,%d)", rootdev,
2743 			    major(rootdev), minor(rootdev));
2744 		break;
2745 
2746 	case DV_DISK:
2747 		if (rootdev == NODEV)
2748 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2749 		break;
2750 
2751 	default:
2752 		printf("%s: inappropriate for root file system\n",
2753 		    root_device->dv_xname);
2754 		return (ENODEV);
2755 	}
2756 
2757 	/*
2758 	 * If user specified a file system, use it.
2759 	 */
2760 	if (mountroot != NULL)
2761 		return ((*mountroot)());
2762 
2763 	/*
2764 	 * Try each file system currently configured into the kernel.
2765 	 */
2766 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2767 		if (v->vfs_mountroot == NULL)
2768 			continue;
2769 #ifdef DEBUG
2770 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2771 #endif
2772 		if ((*v->vfs_mountroot)() == 0) {
2773 			aprint_normal("root file system type: %s\n",
2774 			    v->vfs_name);
2775 			break;
2776 		}
2777 	}
2778 
2779 	if (v == NULL) {
2780 		printf("no file system for %s", root_device->dv_xname);
2781 		if (root_device->dv_class == DV_DISK)
2782 			printf(" (dev 0x%x)", rootdev);
2783 		printf("\n");
2784 		return (EFTYPE);
2785 	}
2786 	return (0);
2787 }
2788 
2789 /*
2790  * Given a file system name, look up the vfsops for that
2791  * file system, or return NULL if file system isn't present
2792  * in the kernel.
2793  */
2794 struct vfsops *
2795 vfs_getopsbyname(name)
2796 	const char *name;
2797 {
2798 	struct vfsops *v;
2799 
2800 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2801 		if (strcmp(v->vfs_name, name) == 0)
2802 			break;
2803 	}
2804 
2805 	return (v);
2806 }
2807 
2808 /*
2809  * Establish a file system and initialize it.
2810  */
2811 int
2812 vfs_attach(vfs)
2813 	struct vfsops *vfs;
2814 {
2815 	struct vfsops *v;
2816 	int error = 0;
2817 
2818 
2819 	/*
2820 	 * Make sure this file system doesn't already exist.
2821 	 */
2822 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2823 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2824 			error = EEXIST;
2825 			goto out;
2826 		}
2827 	}
2828 
2829 	/*
2830 	 * Initialize the vnode operations for this file system.
2831 	 */
2832 	vfs_opv_init(vfs->vfs_opv_descs);
2833 
2834 	/*
2835 	 * Now initialize the file system itself.
2836 	 */
2837 	(*vfs->vfs_init)();
2838 
2839 	/*
2840 	 * ...and link it into the kernel's list.
2841 	 */
2842 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2843 
2844 	/*
2845 	 * Sanity: make sure the reference count is 0.
2846 	 */
2847 	vfs->vfs_refcount = 0;
2848 
2849  out:
2850 	return (error);
2851 }
2852 
2853 /*
2854  * Remove a file system from the kernel.
2855  */
2856 int
2857 vfs_detach(vfs)
2858 	struct vfsops *vfs;
2859 {
2860 	struct vfsops *v;
2861 
2862 	/*
2863 	 * Make sure no one is using the filesystem.
2864 	 */
2865 	if (vfs->vfs_refcount != 0)
2866 		return (EBUSY);
2867 
2868 	/*
2869 	 * ...and remove it from the kernel's list.
2870 	 */
2871 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2872 		if (v == vfs) {
2873 			LIST_REMOVE(v, vfs_list);
2874 			break;
2875 		}
2876 	}
2877 
2878 	if (v == NULL)
2879 		return (ESRCH);
2880 
2881 	/*
2882 	 * Now run the file system-specific cleanups.
2883 	 */
2884 	(*vfs->vfs_done)();
2885 
2886 	/*
2887 	 * Free the vnode operations vector.
2888 	 */
2889 	vfs_opv_free(vfs->vfs_opv_descs);
2890 	return (0);
2891 }
2892 
2893 void
2894 vfs_reinit(void)
2895 {
2896 	struct vfsops *vfs;
2897 
2898 	LIST_FOREACH(vfs, &vfs_list, vfs_list) {
2899 		if (vfs->vfs_reinit) {
2900 			(*vfs->vfs_reinit)();
2901 		}
2902 	}
2903 }
2904 
2905 /*
2906  * Request a filesystem to suspend write operations.
2907  */
2908 int
2909 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
2910 {
2911 	struct proc *p = curproc;	/* XXX */
2912 	int error;
2913 
2914 	while ((mp->mnt_iflag & IMNT_SUSPEND)) {
2915 		if (slptimeo < 0)
2916 			return EWOULDBLOCK;
2917 		error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
2918 		if (error)
2919 			return error;
2920 	}
2921 	mp->mnt_iflag |= IMNT_SUSPEND;
2922 
2923 	simple_lock(&mp->mnt_slock);
2924 	if (mp->mnt_writeopcountupper > 0)
2925 		ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
2926 			0, &mp->mnt_slock);
2927 	simple_unlock(&mp->mnt_slock);
2928 
2929 	error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
2930 	if (error) {
2931 		vfs_write_resume(mp);
2932 		return error;
2933 	}
2934 	mp->mnt_iflag |= IMNT_SUSPENDLOW;
2935 
2936 	simple_lock(&mp->mnt_slock);
2937 	if (mp->mnt_writeopcountlower > 0)
2938 		ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
2939 			0, &mp->mnt_slock);
2940 	mp->mnt_iflag |= IMNT_SUSPENDED;
2941 	simple_unlock(&mp->mnt_slock);
2942 
2943 	return 0;
2944 }
2945 
2946 /*
2947  * Request a filesystem to resume write operations.
2948  */
2949 void
2950 vfs_write_resume(struct mount *mp)
2951 {
2952 
2953 	if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
2954 		return;
2955 	mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
2956 	wakeup(&mp->mnt_flag);
2957 }
2958 
2959 void
2960 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2961 {
2962 	const struct statvfs *mbp;
2963 
2964 	if (sbp == (mbp = &mp->mnt_stat))
2965 		return;
2966 
2967 	(void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2968 	sbp->f_fsid = mbp->f_fsid;
2969 	sbp->f_owner = mbp->f_owner;
2970 	sbp->f_flag = mbp->f_flag;
2971 	sbp->f_syncwrites = mbp->f_syncwrites;
2972 	sbp->f_asyncwrites = mbp->f_asyncwrites;
2973 	sbp->f_syncreads = mbp->f_syncreads;
2974 	sbp->f_asyncreads = mbp->f_asyncreads;
2975 	(void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2976 	(void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2977 	    sizeof(sbp->f_fstypename));
2978 	(void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2979 	    sizeof(sbp->f_mntonname));
2980 	(void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2981 	    sizeof(sbp->f_mntfromname));
2982 }
2983 
2984 int
2985 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2986     struct mount *mp, struct proc *p)
2987 {
2988 	int error;
2989 	size_t size;
2990 	struct statvfs *sfs = &mp->mnt_stat;
2991 	int (*fun)(const void *, void *, size_t, size_t *);
2992 
2993 	(void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
2994 	    sizeof(mp->mnt_stat.f_fstypename));
2995 
2996 	if (onp) {
2997 		struct cwdinfo *cwdi = p->p_cwdi;
2998 		fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2999 		if (cwdi->cwdi_rdir != NULL) {
3000 			size_t len;
3001 			char *bp;
3002 			char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
3003 
3004 			if (!path) /* XXX can't happen with M_WAITOK */
3005 				return ENOMEM;
3006 
3007 			bp = path + MAXPATHLEN;
3008 			*--bp = '\0';
3009 			error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
3010 			    path, MAXPATHLEN / 2, 0, p);
3011 			if (error) {
3012 				free(path, M_TEMP);
3013 				return error;
3014 			}
3015 
3016 			len = strlen(bp);
3017 			if (len > sizeof(sfs->f_mntonname) - 1)
3018 				len = sizeof(sfs->f_mntonname) - 1;
3019 			(void)strncpy(sfs->f_mntonname, bp, len);
3020 			free(path, M_TEMP);
3021 
3022 			if (len < sizeof(sfs->f_mntonname) - 1) {
3023 				error = (*fun)(onp, &sfs->f_mntonname[len],
3024 				    sizeof(sfs->f_mntonname) - len - 1, &size);
3025 				if (error)
3026 					return error;
3027 				size += len;
3028 			} else {
3029 				size = len;
3030 			}
3031 		} else {
3032 			error = (*fun)(onp, &sfs->f_mntonname,
3033 			    sizeof(sfs->f_mntonname) - 1, &size);
3034 			if (error)
3035 				return error;
3036 		}
3037 		(void)memset(sfs->f_mntonname + size, 0,
3038 		    sizeof(sfs->f_mntonname) - size);
3039 	}
3040 
3041 	if (fromp) {
3042 		fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
3043 		error = (*fun)(fromp, sfs->f_mntfromname,
3044 		    sizeof(sfs->f_mntfromname) - 1, &size);
3045 		if (error)
3046 			return error;
3047 		(void)memset(sfs->f_mntfromname + size, 0,
3048 		    sizeof(sfs->f_mntfromname) - size);
3049 	}
3050 	return 0;
3051 }
3052 
3053 #ifdef DDB
3054 const char buf_flagbits[] =
3055 	"\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
3056 	"\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
3057 	"\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
3058 	"\32XXX\33VFLUSH";
3059 
3060 void
3061 vfs_buf_print(bp, full, pr)
3062 	struct buf *bp;
3063 	int full;
3064 	void (*pr)(const char *, ...);
3065 {
3066 	char buf[1024];
3067 
3068 	(*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
3069 		  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
3070 
3071 	bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
3072 	(*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
3073 
3074 	(*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3075 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
3076 	(*pr)("  data %p saveaddr %p dep %p\n",
3077 		  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
3078 	(*pr)("  iodone %p\n", bp->b_iodone);
3079 }
3080 
3081 
3082 const char vnode_flagbits[] =
3083 	"\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP"
3084 	"\11XLOCK\12XWANT\13BWAIT\14ALIASED"
3085 	"\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
3086 
3087 const char * const vnode_tags[] = {
3088 	"VT_NON",
3089 	"VT_UFS",
3090 	"VT_NFS",
3091 	"VT_MFS",
3092 	"VT_MSDOSFS",
3093 	"VT_LFS",
3094 	"VT_LOFS",
3095 	"VT_FDESC",
3096 	"VT_PORTAL",
3097 	"VT_NULL",
3098 	"VT_UMAP",
3099 	"VT_KERNFS",
3100 	"VT_PROCFS",
3101 	"VT_AFS",
3102 	"VT_ISOFS",
3103 	"VT_UNION",
3104 	"VT_ADOSFS",
3105 	"VT_EXT2FS",
3106 	"VT_CODA",
3107 	"VT_FILECORE",
3108 	"VT_NTFS",
3109 	"VT_VFS",
3110 	"VT_OVERLAY",
3111 	"VT_SMBFS"
3112 };
3113 
3114 void
3115 vfs_vnode_print(vp, full, pr)
3116 	struct vnode *vp;
3117 	int full;
3118 	void (*pr)(const char *, ...);
3119 {
3120 	char buf[256];
3121 	const char *vtype, *vtag;
3122 
3123 	uvm_object_printit(&vp->v_uobj, full, pr);
3124 	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
3125 	(*pr)("\nVNODE flags %s\n", buf);
3126 	(*pr)("mp %p numoutput %d size 0x%llx\n",
3127 	      vp->v_mount, vp->v_numoutput, vp->v_size);
3128 
3129 	(*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
3130 	      vp->v_data, vp->v_usecount, vp->v_writecount,
3131 	      vp->v_holdcnt, vp->v_numoutput);
3132 
3133 	vtype = (vp->v_type >= 0 &&
3134 		 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
3135 		vnode_types[vp->v_type] : "UNKNOWN";
3136 	vtag = (vp->v_tag >= 0 &&
3137 		vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
3138 		vnode_tags[vp->v_tag] : "UNKNOWN";
3139 
3140 	(*pr)("type %s(%d) tag %s(%d) mount %p typedata %p\n",
3141 	      vtype, vp->v_type, vtag, vp->v_tag,
3142 	      vp->v_mount, vp->v_mountedhere);
3143 
3144 	if (full) {
3145 		struct buf *bp;
3146 
3147 		(*pr)("clean bufs:\n");
3148 		LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3149 			(*pr)(" bp %p\n", bp);
3150 			vfs_buf_print(bp, full, pr);
3151 		}
3152 
3153 		(*pr)("dirty bufs:\n");
3154 		LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3155 			(*pr)(" bp %p\n", bp);
3156 			vfs_buf_print(bp, full, pr);
3157 		}
3158 	}
3159 }
3160 
3161 void
3162 vfs_mount_print(mp, full, pr)
3163 	struct mount *mp;
3164 	int full;
3165 	void (*pr)(const char *, ...);
3166 {
3167 	char sbuf[256];
3168 
3169 	(*pr)("vnodecovered = %p syncer = %p data = %p\n",
3170 			mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3171 
3172 	(*pr)("fs_bshift %d dev_bshift = %d maxsymlinklen = %d\n",
3173 			mp->mnt_fs_bshift,mp->mnt_dev_bshift,mp->mnt_maxsymlinklen);
3174 
3175 	bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
3176 	(*pr)("flag = %s\n", sbuf);
3177 
3178 	bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
3179 	(*pr)("iflag = %s\n", sbuf);
3180 
3181 	/* XXX use lockmgr_printinfo */
3182 	if (mp->mnt_lock.lk_sharecount)
3183 		(*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
3184 		    mp->mnt_lock.lk_sharecount);
3185 	else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
3186 		(*pr)(" lock type %s: EXCL (count %d) by ",
3187 		    mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
3188 		if (mp->mnt_lock.lk_flags & LK_SPIN)
3189 			(*pr)("processor %lu", mp->mnt_lock.lk_cpu);
3190 		else
3191 			(*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
3192 			    mp->mnt_lock.lk_locklwp);
3193 	} else
3194 		(*pr)(" not locked");
3195 	if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
3196 		(*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
3197 
3198 	(*pr)("\n");
3199 
3200 	if (mp->mnt_unmounter) {
3201 		(*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
3202 	}
3203 	(*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
3204 		mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
3205 
3206 	(*pr)("statvfs cache:\n");
3207 	(*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3208 	(*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3209 	(*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3210 
3211 	(*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks);
3212 	(*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree);
3213 	(*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail);
3214 	(*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd);
3215 
3216 	(*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files);
3217 	(*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree);
3218 	(*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail);
3219 	(*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd);
3220 
3221 	(*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3222 			mp->mnt_stat.f_fsidx.__fsid_val[0],
3223 			mp->mnt_stat.f_fsidx.__fsid_val[1]);
3224 
3225 	(*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3226 	(*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3227 
3228 	bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
3229 	    sizeof(sbuf));
3230 	(*pr)("\tflag = %s\n",sbuf);
3231 	(*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3232 	(*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3233 	(*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads);
3234 	(*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3235 	(*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3236 	(*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3237 	(*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3238 
3239 	{
3240 		int cnt = 0;
3241 		struct vnode *vp;
3242 		(*pr)("locked vnodes =");
3243 		/* XXX would take mountlist lock, except ddb may not have context */
3244 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3245 			if (VOP_ISLOCKED(vp)) {
3246 				if ((++cnt % 6) == 0) {
3247 					(*pr)(" %p,\n\t", vp);
3248 				} else {
3249 					(*pr)(" %p,", vp);
3250 				}
3251 			}
3252 		}
3253 		(*pr)("\n");
3254 	}
3255 
3256 	if (full) {
3257 		int cnt = 0;
3258 		struct vnode *vp;
3259 		(*pr)("all vnodes =");
3260 		/* XXX would take mountlist lock, except ddb may not have context */
3261 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3262 			if (!LIST_NEXT(vp, v_mntvnodes)) {
3263 				(*pr)(" %p", vp);
3264 			} else if ((++cnt % 6) == 0) {
3265 				(*pr)(" %p,\n\t", vp);
3266 			} else {
3267 				(*pr)(" %p,", vp);
3268 			}
3269 		}
3270 		(*pr)("\n", vp);
3271 	}
3272 }
3273 
3274 #endif
3275