xref: /netbsd-src/sys/kern/vfs_subr.c (revision 20e85ad185ab16980f1219a557c42e057edb42ea)
1 /*	$NetBSD: vfs_subr.c,v 1.243 2005/03/02 11:05:34 mycroft Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  * This code is derived from software contributed to The NetBSD Foundation
11  * by Charles M. Hannum.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the NetBSD
24  *	Foundation, Inc. and its contributors.
25  * 4. Neither the name of The NetBSD Foundation nor the names of its
26  *    contributors may be used to endorse or promote products derived
27  *    from this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
30  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
31  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
33  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 /*
43  * Copyright (c) 1989, 1993
44  *	The Regents of the University of California.  All rights reserved.
45  * (c) UNIX System Laboratories, Inc.
46  * All or some portions of this file are derived from material licensed
47  * to the University of California by American Telephone and Telegraph
48  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
49  * the permission of UNIX System Laboratories, Inc.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions
53  * are met:
54  * 1. Redistributions of source code must retain the above copyright
55  *    notice, this list of conditions and the following disclaimer.
56  * 2. Redistributions in binary form must reproduce the above copyright
57  *    notice, this list of conditions and the following disclaimer in the
58  *    documentation and/or other materials provided with the distribution.
59  * 3. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
76  */
77 
78 /*
79  * External virtual filesystem routines
80  */
81 
82 #include <sys/cdefs.h>
83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.243 2005/03/02 11:05:34 mycroft Exp $");
84 
85 #include "opt_inet.h"
86 #include "opt_ddb.h"
87 #include "opt_compat_netbsd.h"
88 #include "opt_compat_43.h"
89 
90 #include <sys/param.h>
91 #include <sys/systm.h>
92 #include <sys/proc.h>
93 #include <sys/kernel.h>
94 #include <sys/mount.h>
95 #include <sys/time.h>
96 #include <sys/event.h>
97 #include <sys/fcntl.h>
98 #include <sys/vnode.h>
99 #include <sys/stat.h>
100 #include <sys/namei.h>
101 #include <sys/ucred.h>
102 #include <sys/buf.h>
103 #include <sys/errno.h>
104 #include <sys/malloc.h>
105 #include <sys/domain.h>
106 #include <sys/mbuf.h>
107 #include <sys/sa.h>
108 #include <sys/syscallargs.h>
109 #include <sys/device.h>
110 #include <sys/extattr.h>
111 #include <sys/dirent.h>
112 #include <sys/filedesc.h>
113 
114 #include <miscfs/specfs/specdev.h>
115 #include <miscfs/genfs/genfs.h>
116 #include <miscfs/syncfs/syncfs.h>
117 
118 #include <netinet/in.h>
119 
120 #include <uvm/uvm.h>
121 #include <uvm/uvm_ddb.h>
122 
123 #include <netinet/in.h>
124 
125 #include <sys/sysctl.h>
126 
127 const enum vtype iftovt_tab[16] = {
128 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
129 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
130 };
131 const int	vttoif_tab[9] = {
132 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
133 	S_IFSOCK, S_IFIFO, S_IFMT,
134 };
135 
136 int doforce = 1;		/* 1 => permit forcible unmounting */
137 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
138 
139 extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
140 
141 /*
142  * Insq/Remq for the vnode usage lists.
143  */
144 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
145 #define	bufremvn(bp) {							\
146 	LIST_REMOVE(bp, b_vnbufs);					\
147 	(bp)->b_vnbufs.le_next = NOLIST;				\
148 }
149 /* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
150 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
151 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
152 
153 struct mntlist mountlist =			/* mounted filesystem list */
154     CIRCLEQ_HEAD_INITIALIZER(mountlist);
155 struct vfs_list_head vfs_list =			/* vfs list */
156     LIST_HEAD_INITIALIZER(vfs_list);
157 
158 struct nfs_public nfs_pub;			/* publicly exported FS */
159 
160 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
161 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
162 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
163 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
164 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
165 
166 /* XXX - gross; single global lock to protect v_numoutput */
167 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
168 
169 /*
170  * These define the root filesystem and device.
171  */
172 struct mount *rootfs;
173 struct vnode *rootvnode;
174 struct device *root_device;			/* root device */
175 
176 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
177     &pool_allocator_nointr);
178 
179 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
180 
181 /*
182  * Local declarations.
183  */
184 void insmntque(struct vnode *, struct mount *);
185 int getdevvp(dev_t, struct vnode **, enum vtype);
186 
187 void vclean(struct vnode *, int, struct proc *);
188 
189 static int vfs_hang_addrlist(struct mount *, struct netexport *,
190 			     struct export_args *);
191 static int vfs_free_netcred(struct radix_node *, void *);
192 static void vfs_free_addrlist(struct netexport *);
193 static struct vnode *getcleanvnode(struct proc *);
194 
195 #ifdef DEBUG
196 void printlockedvnodes(void);
197 #endif
198 
199 /*
200  * Initialize the vnode management data structures.
201  */
202 void
203 vntblinit()
204 {
205 
206 	/*
207 	 * Initialize the filesystem syncer.
208 	 */
209 	vn_initialize_syncerd();
210 }
211 
212 int
213 vfs_drainvnodes(long target, struct proc *p)
214 {
215 
216 	simple_lock(&vnode_free_list_slock);
217 	while (numvnodes > target) {
218 		struct vnode *vp;
219 
220 		vp = getcleanvnode(p);
221 		if (vp == NULL)
222 			return EBUSY; /* give up */
223 		pool_put(&vnode_pool, vp);
224 		simple_lock(&vnode_free_list_slock);
225 		numvnodes--;
226 	}
227 	simple_unlock(&vnode_free_list_slock);
228 
229 	return 0;
230 }
231 
232 /*
233  * grab a vnode from freelist and clean it.
234  */
235 struct vnode *
236 getcleanvnode(p)
237 	struct proc *p;
238 {
239 	struct vnode *vp;
240 	struct mount *mp;
241 	struct freelst *listhd;
242 
243 	LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
244 
245 	listhd = &vnode_free_list;
246 try_nextlist:
247 	TAILQ_FOREACH(vp, listhd, v_freelist) {
248 		if (!simple_lock_try(&vp->v_interlock))
249 			continue;
250 		/*
251 		 * as our lwp might hold the underlying vnode locked,
252 		 * don't try to reclaim the VLAYER vnode if it's locked.
253 		 */
254 		if ((vp->v_flag & VXLOCK) == 0 &&
255 		    ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
256 			if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
257 				break;
258 		}
259 		mp = NULL;
260 		simple_unlock(&vp->v_interlock);
261 	}
262 
263 	if (vp == NULLVP) {
264 		if (listhd == &vnode_free_list) {
265 			listhd = &vnode_hold_list;
266 			goto try_nextlist;
267 		}
268 		simple_unlock(&vnode_free_list_slock);
269 		return NULLVP;
270 	}
271 
272 	if (vp->v_usecount)
273 		panic("free vnode isn't, vp %p", vp);
274 	TAILQ_REMOVE(listhd, vp, v_freelist);
275 	/* see comment on why 0xdeadb is set at end of vgone (below) */
276 	vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
277 	simple_unlock(&vnode_free_list_slock);
278 	vp->v_lease = NULL;
279 
280 	if (vp->v_type != VBAD)
281 		vgonel(vp, p);
282 	else
283 		simple_unlock(&vp->v_interlock);
284 	vn_finished_write(mp, 0);
285 #ifdef DIAGNOSTIC
286 	if (vp->v_data || vp->v_uobj.uo_npages ||
287 	    TAILQ_FIRST(&vp->v_uobj.memq))
288 		panic("cleaned vnode isn't, vp %p", vp);
289 	if (vp->v_numoutput)
290 		panic("clean vnode has pending I/O's, vp %p", vp);
291 #endif
292 	KASSERT((vp->v_flag & VONWORKLST) == 0);
293 
294 	return vp;
295 }
296 
297 /*
298  * Mark a mount point as busy. Used to synchronize access and to delay
299  * unmounting. Interlock is not released on failure.
300  */
301 int
302 vfs_busy(mp, flags, interlkp)
303 	struct mount *mp;
304 	int flags;
305 	struct simplelock *interlkp;
306 {
307 	int lkflags;
308 
309 	while (mp->mnt_iflag & IMNT_UNMOUNT) {
310 		int gone, n;
311 
312 		if (flags & LK_NOWAIT)
313 			return (ENOENT);
314 		if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
315 		    && mp->mnt_unmounter == curproc)
316 			return (EDEADLK);
317 		if (interlkp)
318 			simple_unlock(interlkp);
319 		/*
320 		 * Since all busy locks are shared except the exclusive
321 		 * lock granted when unmounting, the only place that a
322 		 * wakeup needs to be done is at the release of the
323 		 * exclusive lock at the end of dounmount.
324 		 */
325 		simple_lock(&mp->mnt_slock);
326 		mp->mnt_wcnt++;
327 		ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
328 		n = --mp->mnt_wcnt;
329 		simple_unlock(&mp->mnt_slock);
330 		gone = mp->mnt_iflag & IMNT_GONE;
331 
332 		if (n == 0)
333 			wakeup(&mp->mnt_wcnt);
334 		if (interlkp)
335 			simple_lock(interlkp);
336 		if (gone)
337 			return (ENOENT);
338 	}
339 	lkflags = LK_SHARED;
340 	if (interlkp)
341 		lkflags |= LK_INTERLOCK;
342 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
343 		panic("vfs_busy: unexpected lock failure");
344 	return (0);
345 }
346 
347 /*
348  * Free a busy filesystem.
349  */
350 void
351 vfs_unbusy(mp)
352 	struct mount *mp;
353 {
354 
355 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
356 }
357 
358 /*
359  * Lookup a filesystem type, and if found allocate and initialize
360  * a mount structure for it.
361  *
362  * Devname is usually updated by mount(8) after booting.
363  */
364 int
365 vfs_rootmountalloc(fstypename, devname, mpp)
366 	char *fstypename;
367 	char *devname;
368 	struct mount **mpp;
369 {
370 	struct vfsops *vfsp = NULL;
371 	struct mount *mp;
372 
373 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
374 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
375 			break;
376 
377 	if (vfsp == NULL)
378 		return (ENODEV);
379 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
380 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
381 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
382 	simple_lock_init(&mp->mnt_slock);
383 	(void)vfs_busy(mp, LK_NOWAIT, 0);
384 	LIST_INIT(&mp->mnt_vnodelist);
385 	mp->mnt_op = vfsp;
386 	mp->mnt_flag = MNT_RDONLY;
387 	mp->mnt_vnodecovered = NULLVP;
388 	mp->mnt_leaf = mp;
389 	vfsp->vfs_refcount++;
390 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
391 	mp->mnt_stat.f_mntonname[0] = '/';
392 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
393 	*mpp = mp;
394 	return (0);
395 }
396 
397 /*
398  * Lookup a mount point by filesystem identifier.
399  */
400 struct mount *
401 vfs_getvfs(fsid)
402 	fsid_t *fsid;
403 {
404 	struct mount *mp;
405 
406 	simple_lock(&mountlist_slock);
407 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
408 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
409 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
410 			simple_unlock(&mountlist_slock);
411 			return (mp);
412 		}
413 	}
414 	simple_unlock(&mountlist_slock);
415 	return ((struct mount *)0);
416 }
417 
418 /*
419  * Get a new unique fsid
420  */
421 void
422 vfs_getnewfsid(mp)
423 	struct mount *mp;
424 {
425 	static u_short xxxfs_mntid;
426 	fsid_t tfsid;
427 	int mtype;
428 
429 	simple_lock(&mntid_slock);
430 	mtype = makefstype(mp->mnt_op->vfs_name);
431 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
432 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
433 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
434 	if (xxxfs_mntid == 0)
435 		++xxxfs_mntid;
436 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
437 	tfsid.__fsid_val[1] = mtype;
438 	if (!CIRCLEQ_EMPTY(&mountlist)) {
439 		while (vfs_getvfs(&tfsid)) {
440 			tfsid.__fsid_val[0]++;
441 			xxxfs_mntid++;
442 		}
443 	}
444 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
445 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
446 	simple_unlock(&mntid_slock);
447 }
448 
449 /*
450  * Make a 'unique' number from a mount type name.
451  */
452 long
453 makefstype(type)
454 	const char *type;
455 {
456 	long rv;
457 
458 	for (rv = 0; *type; type++) {
459 		rv <<= 2;
460 		rv ^= *type;
461 	}
462 	return rv;
463 }
464 
465 
466 /*
467  * Set vnode attributes to VNOVAL
468  */
469 void
470 vattr_null(vap)
471 	struct vattr *vap;
472 {
473 
474 	vap->va_type = VNON;
475 
476 	/*
477 	 * Assign individually so that it is safe even if size and
478 	 * sign of each member are varied.
479 	 */
480 	vap->va_mode = VNOVAL;
481 	vap->va_nlink = VNOVAL;
482 	vap->va_uid = VNOVAL;
483 	vap->va_gid = VNOVAL;
484 	vap->va_fsid = VNOVAL;
485 	vap->va_fileid = VNOVAL;
486 	vap->va_size = VNOVAL;
487 	vap->va_blocksize = VNOVAL;
488 	vap->va_atime.tv_sec =
489 	    vap->va_mtime.tv_sec =
490 	    vap->va_ctime.tv_sec =
491 	    vap->va_birthtime.tv_sec = VNOVAL;
492 	vap->va_atime.tv_nsec =
493 	    vap->va_mtime.tv_nsec =
494 	    vap->va_ctime.tv_nsec =
495 	    vap->va_birthtime.tv_nsec = VNOVAL;
496 	vap->va_gen = VNOVAL;
497 	vap->va_flags = VNOVAL;
498 	vap->va_rdev = VNOVAL;
499 	vap->va_bytes = VNOVAL;
500 	vap->va_vaflags = 0;
501 }
502 
503 /*
504  * Routines having to do with the management of the vnode table.
505  */
506 extern int (**dead_vnodeop_p)(void *);
507 long numvnodes;
508 
509 /*
510  * Return the next vnode from the free list.
511  */
512 int
513 getnewvnode(tag, mp, vops, vpp)
514 	enum vtagtype tag;
515 	struct mount *mp;
516 	int (**vops)(void *);
517 	struct vnode **vpp;
518 {
519 	extern struct uvm_pagerops uvm_vnodeops;
520 	struct uvm_object *uobj;
521 	struct proc *p = curproc;	/* XXX */
522 	static int toggle;
523 	struct vnode *vp;
524 	int error = 0, tryalloc;
525 
526  try_again:
527 	if (mp) {
528 		/*
529 		 * Mark filesystem busy while we're creating a vnode.
530 		 * If unmount is in progress, this will wait; if the
531 		 * unmount succeeds (only if umount -f), this will
532 		 * return an error.  If the unmount fails, we'll keep
533 		 * going afterwards.
534 		 * (This puts the per-mount vnode list logically under
535 		 * the protection of the vfs_busy lock).
536 		 */
537 		error = vfs_busy(mp, LK_RECURSEFAIL, 0);
538 		if (error && error != EDEADLK)
539 			return error;
540 	}
541 
542 	/*
543 	 * We must choose whether to allocate a new vnode or recycle an
544 	 * existing one. The criterion for allocating a new one is that
545 	 * the total number of vnodes is less than the number desired or
546 	 * there are no vnodes on either free list. Generally we only
547 	 * want to recycle vnodes that have no buffers associated with
548 	 * them, so we look first on the vnode_free_list. If it is empty,
549 	 * we next consider vnodes with referencing buffers on the
550 	 * vnode_hold_list. The toggle ensures that half the time we
551 	 * will use a buffer from the vnode_hold_list, and half the time
552 	 * we will allocate a new one unless the list has grown to twice
553 	 * the desired size. We are reticent to recycle vnodes from the
554 	 * vnode_hold_list because we will lose the identity of all its
555 	 * referencing buffers.
556 	 */
557 
558 	vp = NULL;
559 
560 	simple_lock(&vnode_free_list_slock);
561 
562 	toggle ^= 1;
563 	if (numvnodes > 2 * desiredvnodes)
564 		toggle = 0;
565 
566 	tryalloc = numvnodes < desiredvnodes ||
567 	    (TAILQ_FIRST(&vnode_free_list) == NULL &&
568 	     (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
569 
570 	if (tryalloc &&
571 	    (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
572 		numvnodes++;
573 		simple_unlock(&vnode_free_list_slock);
574 		memset(vp, 0, sizeof(*vp));
575 		simple_lock_init(&vp->v_interlock);
576 		uobj = &vp->v_uobj;
577 		uobj->pgops = &uvm_vnodeops;
578 		TAILQ_INIT(&uobj->memq);
579 		/*
580 		 * done by memset() above.
581 		 *	uobj->uo_npages = 0;
582 		 *	LIST_INIT(&vp->v_nclist);
583 		 *	LIST_INIT(&vp->v_dnclist);
584 		 */
585 	} else {
586 		vp = getcleanvnode(p);
587 		/*
588 		 * Unless this is a bad time of the month, at most
589 		 * the first NCPUS items on the free list are
590 		 * locked, so this is close enough to being empty.
591 		 */
592 		if (vp == NULLVP) {
593 			if (mp && error != EDEADLK)
594 				vfs_unbusy(mp);
595 			if (tryalloc) {
596 				printf("WARNING: unable to allocate new "
597 				    "vnode, retrying...\n");
598 				(void) tsleep(&lbolt, PRIBIO, "newvn", hz);
599 				goto try_again;
600 			}
601 			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
602 			*vpp = 0;
603 			return (ENFILE);
604 		}
605 		vp->v_flag = 0;
606 		vp->v_socket = NULL;
607 #ifdef VERIFIED_EXEC
608 		vp->fp_status = FINGERPRINT_INVALID;
609 #endif
610 	}
611 	vp->v_type = VNON;
612 	vp->v_vnlock = &vp->v_lock;
613 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
614 	KASSERT(LIST_EMPTY(&vp->v_nclist));
615 	KASSERT(LIST_EMPTY(&vp->v_dnclist));
616 	vp->v_tag = tag;
617 	vp->v_op = vops;
618 	insmntque(vp, mp);
619 	*vpp = vp;
620 	vp->v_usecount = 1;
621 	vp->v_data = 0;
622 	simple_lock_init(&vp->v_interlock);
623 
624 	/*
625 	 * initialize uvm_object within vnode.
626 	 */
627 
628 	uobj = &vp->v_uobj;
629 	KASSERT(uobj->pgops == &uvm_vnodeops);
630 	KASSERT(uobj->uo_npages == 0);
631 	KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
632 	vp->v_size = VSIZENOTSET;
633 
634 	if (mp && error != EDEADLK)
635 		vfs_unbusy(mp);
636 	return (0);
637 }
638 
639 /*
640  * This is really just the reverse of getnewvnode(). Needed for
641  * VFS_VGET functions who may need to push back a vnode in case
642  * of a locking race.
643  */
644 void
645 ungetnewvnode(vp)
646 	struct vnode *vp;
647 {
648 #ifdef DIAGNOSTIC
649 	if (vp->v_usecount != 1)
650 		panic("ungetnewvnode: busy vnode");
651 #endif
652 	vp->v_usecount--;
653 	insmntque(vp, NULL);
654 	vp->v_type = VBAD;
655 
656 	simple_lock(&vp->v_interlock);
657 	/*
658 	 * Insert at head of LRU list
659 	 */
660 	simple_lock(&vnode_free_list_slock);
661 	if (vp->v_holdcnt > 0)
662 		TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
663 	else
664 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
665 	simple_unlock(&vnode_free_list_slock);
666 	simple_unlock(&vp->v_interlock);
667 }
668 
669 /*
670  * Move a vnode from one mount queue to another.
671  */
672 void
673 insmntque(vp, mp)
674 	struct vnode *vp;
675 	struct mount *mp;
676 {
677 
678 #ifdef DIAGNOSTIC
679 	if ((mp != NULL) &&
680 	    (mp->mnt_iflag & IMNT_UNMOUNT) &&
681 	    !(mp->mnt_flag & MNT_SOFTDEP) &&
682 	    vp->v_tag != VT_VFS) {
683 		panic("insmntque into dying filesystem");
684 	}
685 #endif
686 
687 	simple_lock(&mntvnode_slock);
688 	/*
689 	 * Delete from old mount point vnode list, if on one.
690 	 */
691 	if (vp->v_mount != NULL)
692 		LIST_REMOVE(vp, v_mntvnodes);
693 	/*
694 	 * Insert into list of vnodes for the new mount point, if available.
695 	 */
696 	if ((vp->v_mount = mp) != NULL)
697 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
698 	simple_unlock(&mntvnode_slock);
699 }
700 
701 /*
702  * Update outstanding I/O count and do wakeup if requested.
703  */
704 void
705 vwakeup(bp)
706 	struct buf *bp;
707 {
708 	struct vnode *vp;
709 
710 	if ((vp = bp->b_vp) != NULL) {
711 		/* XXX global lock hack
712 		 * can't use v_interlock here since this is called
713 		 * in interrupt context from biodone().
714 		 */
715 		simple_lock(&global_v_numoutput_slock);
716 		if (--vp->v_numoutput < 0)
717 			panic("vwakeup: neg numoutput, vp %p", vp);
718 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
719 			vp->v_flag &= ~VBWAIT;
720 			wakeup((caddr_t)&vp->v_numoutput);
721 		}
722 		simple_unlock(&global_v_numoutput_slock);
723 	}
724 }
725 
726 /*
727  * Flush out and invalidate all buffers associated with a vnode.
728  * Called with the underlying vnode locked, which should prevent new dirty
729  * buffers from being queued.
730  */
731 int
732 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
733 	struct vnode *vp;
734 	int flags;
735 	struct ucred *cred;
736 	struct proc *p;
737 	int slpflag, slptimeo;
738 {
739 	struct buf *bp, *nbp;
740 	int s, error;
741 	int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
742 		(flags & V_SAVE ? PGO_CLEANIT : 0);
743 
744 	/* XXXUBC this doesn't look at flags or slp* */
745 	simple_lock(&vp->v_interlock);
746 	error = VOP_PUTPAGES(vp, 0, 0, flushflags);
747 	if (error) {
748 		return error;
749 	}
750 
751 	if (flags & V_SAVE) {
752 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
753 		if (error)
754 		        return (error);
755 #ifdef DIAGNOSTIC
756 		s = splbio();
757 		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
758 		        panic("vinvalbuf: dirty bufs, vp %p", vp);
759 		splx(s);
760 #endif
761 	}
762 
763 	s = splbio();
764 
765 restart:
766 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
767 		nbp = LIST_NEXT(bp, b_vnbufs);
768 		simple_lock(&bp->b_interlock);
769 		if (bp->b_flags & B_BUSY) {
770 			bp->b_flags |= B_WANTED;
771 			error = ltsleep((caddr_t)bp,
772 				    slpflag | (PRIBIO + 1) | PNORELOCK,
773 				    "vinvalbuf", slptimeo, &bp->b_interlock);
774 			if (error) {
775 				splx(s);
776 				return (error);
777 			}
778 			goto restart;
779 		}
780 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
781 		simple_unlock(&bp->b_interlock);
782 		brelse(bp);
783 	}
784 
785 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
786 		nbp = LIST_NEXT(bp, b_vnbufs);
787 		simple_lock(&bp->b_interlock);
788 		if (bp->b_flags & B_BUSY) {
789 			bp->b_flags |= B_WANTED;
790 			error = ltsleep((caddr_t)bp,
791 				    slpflag | (PRIBIO + 1) | PNORELOCK,
792 				    "vinvalbuf", slptimeo, &bp->b_interlock);
793 			if (error) {
794 				splx(s);
795 				return (error);
796 			}
797 			goto restart;
798 		}
799 		/*
800 		 * XXX Since there are no node locks for NFS, I believe
801 		 * there is a slight chance that a delayed write will
802 		 * occur while sleeping just above, so check for it.
803 		 */
804 		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
805 #ifdef DEBUG
806 			printf("buffer still DELWRI\n");
807 #endif
808 			bp->b_flags |= B_BUSY | B_VFLUSH;
809 			simple_unlock(&bp->b_interlock);
810 			VOP_BWRITE(bp);
811 			goto restart;
812 		}
813 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
814 		simple_unlock(&bp->b_interlock);
815 		brelse(bp);
816 	}
817 
818 #ifdef DIAGNOSTIC
819 	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
820 		panic("vinvalbuf: flush failed, vp %p", vp);
821 #endif
822 
823 	splx(s);
824 
825 	return (0);
826 }
827 
828 /*
829  * Destroy any in core blocks past the truncation length.
830  * Called with the underlying vnode locked, which should prevent new dirty
831  * buffers from being queued.
832  */
833 int
834 vtruncbuf(vp, lbn, slpflag, slptimeo)
835 	struct vnode *vp;
836 	daddr_t lbn;
837 	int slpflag, slptimeo;
838 {
839 	struct buf *bp, *nbp;
840 	int s, error;
841 	voff_t off;
842 
843 	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
844 	simple_lock(&vp->v_interlock);
845 	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
846 	if (error) {
847 		return error;
848 	}
849 
850 	s = splbio();
851 
852 restart:
853 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
854 		nbp = LIST_NEXT(bp, b_vnbufs);
855 		if (bp->b_lblkno < lbn)
856 			continue;
857 		simple_lock(&bp->b_interlock);
858 		if (bp->b_flags & B_BUSY) {
859 			bp->b_flags |= B_WANTED;
860 			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
861 			    "vtruncbuf", slptimeo, &bp->b_interlock);
862 			if (error) {
863 				splx(s);
864 				return (error);
865 			}
866 			goto restart;
867 		}
868 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
869 		simple_unlock(&bp->b_interlock);
870 		brelse(bp);
871 	}
872 
873 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
874 		nbp = LIST_NEXT(bp, b_vnbufs);
875 		if (bp->b_lblkno < lbn)
876 			continue;
877 		simple_lock(&bp->b_interlock);
878 		if (bp->b_flags & B_BUSY) {
879 			bp->b_flags |= B_WANTED;
880 			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
881 			    "vtruncbuf", slptimeo, &bp->b_interlock);
882 			if (error) {
883 				splx(s);
884 				return (error);
885 			}
886 			goto restart;
887 		}
888 		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
889 		simple_unlock(&bp->b_interlock);
890 		brelse(bp);
891 	}
892 
893 	splx(s);
894 
895 	return (0);
896 }
897 
898 void
899 vflushbuf(vp, sync)
900 	struct vnode *vp;
901 	int sync;
902 {
903 	struct buf *bp, *nbp;
904 	int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
905 	int s;
906 
907 	simple_lock(&vp->v_interlock);
908 	(void) VOP_PUTPAGES(vp, 0, 0, flags);
909 
910 loop:
911 	s = splbio();
912 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
913 		nbp = LIST_NEXT(bp, b_vnbufs);
914 		simple_lock(&bp->b_interlock);
915 		if ((bp->b_flags & B_BUSY)) {
916 			simple_unlock(&bp->b_interlock);
917 			continue;
918 		}
919 		if ((bp->b_flags & B_DELWRI) == 0)
920 			panic("vflushbuf: not dirty, bp %p", bp);
921 		bp->b_flags |= B_BUSY | B_VFLUSH;
922 		simple_unlock(&bp->b_interlock);
923 		splx(s);
924 		/*
925 		 * Wait for I/O associated with indirect blocks to complete,
926 		 * since there is no way to quickly wait for them below.
927 		 */
928 		if (bp->b_vp == vp || sync == 0)
929 			(void) bawrite(bp);
930 		else
931 			(void) bwrite(bp);
932 		goto loop;
933 	}
934 	if (sync == 0) {
935 		splx(s);
936 		return;
937 	}
938 	simple_lock(&global_v_numoutput_slock);
939 	while (vp->v_numoutput) {
940 		vp->v_flag |= VBWAIT;
941 		ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
942 			&global_v_numoutput_slock);
943 	}
944 	simple_unlock(&global_v_numoutput_slock);
945 	splx(s);
946 	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
947 		vprint("vflushbuf: dirty", vp);
948 		goto loop;
949 	}
950 }
951 
952 /*
953  * Associate a buffer with a vnode.
954  */
955 void
956 bgetvp(vp, bp)
957 	struct vnode *vp;
958 	struct buf *bp;
959 {
960 	int s;
961 
962 	if (bp->b_vp)
963 		panic("bgetvp: not free, bp %p", bp);
964 	VHOLD(vp);
965 	s = splbio();
966 	bp->b_vp = vp;
967 	if (vp->v_type == VBLK || vp->v_type == VCHR)
968 		bp->b_dev = vp->v_rdev;
969 	else
970 		bp->b_dev = NODEV;
971 	/*
972 	 * Insert onto list for new vnode.
973 	 */
974 	bufinsvn(bp, &vp->v_cleanblkhd);
975 	splx(s);
976 }
977 
978 /*
979  * Disassociate a buffer from a vnode.
980  */
981 void
982 brelvp(bp)
983 	struct buf *bp;
984 {
985 	struct vnode *vp;
986 	int s;
987 
988 	if (bp->b_vp == NULL)
989 		panic("brelvp: vp NULL, bp %p", bp);
990 
991 	s = splbio();
992 	vp = bp->b_vp;
993 	/*
994 	 * Delete from old vnode list, if on one.
995 	 */
996 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
997 		bufremvn(bp);
998 
999 	if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
1000 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1001 		vp->v_flag &= ~VONWORKLST;
1002 		LIST_REMOVE(vp, v_synclist);
1003 	}
1004 
1005 	bp->b_vp = NULL;
1006 	HOLDRELE(vp);
1007 	splx(s);
1008 }
1009 
1010 /*
1011  * Reassign a buffer from one vnode to another.
1012  * Used to assign file specific control information
1013  * (indirect blocks) to the vnode to which they belong.
1014  *
1015  * This function must be called at splbio().
1016  */
1017 void
1018 reassignbuf(bp, newvp)
1019 	struct buf *bp;
1020 	struct vnode *newvp;
1021 {
1022 	struct buflists *listheadp;
1023 	int delay;
1024 
1025 	/*
1026 	 * Delete from old vnode list, if on one.
1027 	 */
1028 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1029 		bufremvn(bp);
1030 	/*
1031 	 * If dirty, put on list of dirty buffers;
1032 	 * otherwise insert onto list of clean buffers.
1033 	 */
1034 	if ((bp->b_flags & B_DELWRI) == 0) {
1035 		listheadp = &newvp->v_cleanblkhd;
1036 		if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1037 		    (newvp->v_flag & VONWORKLST) &&
1038 		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1039 			newvp->v_flag &= ~VONWORKLST;
1040 			LIST_REMOVE(newvp, v_synclist);
1041 		}
1042 	} else {
1043 		listheadp = &newvp->v_dirtyblkhd;
1044 		if ((newvp->v_flag & VONWORKLST) == 0) {
1045 			switch (newvp->v_type) {
1046 			case VDIR:
1047 				delay = dirdelay;
1048 				break;
1049 			case VBLK:
1050 				if (newvp->v_specmountpoint != NULL) {
1051 					delay = metadelay;
1052 					break;
1053 				}
1054 				/* fall through */
1055 			default:
1056 				delay = filedelay;
1057 				break;
1058 			}
1059 			if (!newvp->v_mount ||
1060 			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1061 				vn_syncer_add_to_worklist(newvp, delay);
1062 		}
1063 	}
1064 	bufinsvn(bp, listheadp);
1065 }
1066 
1067 /*
1068  * Create a vnode for a block device.
1069  * Used for root filesystem and swap areas.
1070  * Also used for memory file system special devices.
1071  */
1072 int
1073 bdevvp(dev, vpp)
1074 	dev_t dev;
1075 	struct vnode **vpp;
1076 {
1077 
1078 	return (getdevvp(dev, vpp, VBLK));
1079 }
1080 
1081 /*
1082  * Create a vnode for a character device.
1083  * Used for kernfs and some console handling.
1084  */
1085 int
1086 cdevvp(dev, vpp)
1087 	dev_t dev;
1088 	struct vnode **vpp;
1089 {
1090 
1091 	return (getdevvp(dev, vpp, VCHR));
1092 }
1093 
1094 /*
1095  * Create a vnode for a device.
1096  * Used by bdevvp (block device) for root file system etc.,
1097  * and by cdevvp (character device) for console and kernfs.
1098  */
1099 int
1100 getdevvp(dev, vpp, type)
1101 	dev_t dev;
1102 	struct vnode **vpp;
1103 	enum vtype type;
1104 {
1105 	struct vnode *vp;
1106 	struct vnode *nvp;
1107 	int error;
1108 
1109 	if (dev == NODEV) {
1110 		*vpp = NULLVP;
1111 		return (0);
1112 	}
1113 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1114 	if (error) {
1115 		*vpp = NULLVP;
1116 		return (error);
1117 	}
1118 	vp = nvp;
1119 	vp->v_type = type;
1120 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1121 		vput(vp);
1122 		vp = nvp;
1123 	}
1124 	*vpp = vp;
1125 	return (0);
1126 }
1127 
1128 /*
1129  * Check to see if the new vnode represents a special device
1130  * for which we already have a vnode (either because of
1131  * bdevvp() or because of a different vnode representing
1132  * the same block device). If such an alias exists, deallocate
1133  * the existing contents and return the aliased vnode. The
1134  * caller is responsible for filling it with its new contents.
1135  */
1136 struct vnode *
1137 checkalias(nvp, nvp_rdev, mp)
1138 	struct vnode *nvp;
1139 	dev_t nvp_rdev;
1140 	struct mount *mp;
1141 {
1142 	struct proc *p = curproc;       /* XXX */
1143 	struct vnode *vp;
1144 	struct vnode **vpp;
1145 
1146 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1147 		return (NULLVP);
1148 
1149 	vpp = &speclisth[SPECHASH(nvp_rdev)];
1150 loop:
1151 	simple_lock(&spechash_slock);
1152 	for (vp = *vpp; vp; vp = vp->v_specnext) {
1153 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1154 			continue;
1155 		/*
1156 		 * Alias, but not in use, so flush it out.
1157 		 */
1158 		simple_lock(&vp->v_interlock);
1159 		simple_unlock(&spechash_slock);
1160 		if (vp->v_usecount == 0) {
1161 			vgonel(vp, p);
1162 			goto loop;
1163 		}
1164 		/*
1165 		 * What we're interested to know here is if someone else has
1166 		 * removed this vnode from the device hash list while we were
1167 		 * waiting.  This can only happen if vclean() did it, and
1168 		 * this requires the vnode to be locked.  Therefore, we use
1169 		 * LK_SLEEPFAIL and retry.
1170 		 */
1171 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
1172 			goto loop;
1173 		simple_lock(&spechash_slock);
1174 		break;
1175 	}
1176 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1177 		MALLOC(nvp->v_specinfo, struct specinfo *,
1178 			sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1179 		/* XXX Erg. */
1180 		if (nvp->v_specinfo == NULL) {
1181 			simple_unlock(&spechash_slock);
1182 			uvm_wait("checkalias");
1183 			goto loop;
1184 		}
1185 
1186 		nvp->v_rdev = nvp_rdev;
1187 		nvp->v_hashchain = vpp;
1188 		nvp->v_specnext = *vpp;
1189 		nvp->v_specmountpoint = NULL;
1190 		simple_unlock(&spechash_slock);
1191 		nvp->v_speclockf = NULL;
1192 		simple_lock_init(&nvp->v_spec_cow_slock);
1193 		SLIST_INIT(&nvp->v_spec_cow_head);
1194 		nvp->v_spec_cow_req = 0;
1195 		nvp->v_spec_cow_count = 0;
1196 
1197 		*vpp = nvp;
1198 		if (vp != NULLVP) {
1199 			nvp->v_flag |= VALIASED;
1200 			vp->v_flag |= VALIASED;
1201 			vput(vp);
1202 		}
1203 		return (NULLVP);
1204 	}
1205 	simple_unlock(&spechash_slock);
1206 	VOP_UNLOCK(vp, 0);
1207 	simple_lock(&vp->v_interlock);
1208 	vclean(vp, 0, p);
1209 	vp->v_op = nvp->v_op;
1210 	vp->v_tag = nvp->v_tag;
1211 	vp->v_vnlock = &vp->v_lock;
1212 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1213 	nvp->v_type = VNON;
1214 	insmntque(vp, mp);
1215 	return (vp);
1216 }
1217 
1218 /*
1219  * Grab a particular vnode from the free list, increment its
1220  * reference count and lock it. If the vnode lock bit is set the
1221  * vnode is being eliminated in vgone. In that case, we can not
1222  * grab the vnode, so the process is awakened when the transition is
1223  * completed, and an error returned to indicate that the vnode is no
1224  * longer usable (possibly having been changed to a new file system type).
1225  */
1226 int
1227 vget(vp, flags)
1228 	struct vnode *vp;
1229 	int flags;
1230 {
1231 	int error;
1232 
1233 	/*
1234 	 * If the vnode is in the process of being cleaned out for
1235 	 * another use, we wait for the cleaning to finish and then
1236 	 * return failure. Cleaning is determined by checking that
1237 	 * the VXLOCK flag is set.
1238 	 */
1239 
1240 	if ((flags & LK_INTERLOCK) == 0)
1241 		simple_lock(&vp->v_interlock);
1242 	if (vp->v_flag & VXLOCK) {
1243 		if (flags & LK_NOWAIT) {
1244 			simple_unlock(&vp->v_interlock);
1245 			return EBUSY;
1246 		}
1247 		vp->v_flag |= VXWANT;
1248 		ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1249 		return (ENOENT);
1250 	}
1251 	if (vp->v_usecount == 0) {
1252 		simple_lock(&vnode_free_list_slock);
1253 		if (vp->v_holdcnt > 0)
1254 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1255 		else
1256 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1257 		simple_unlock(&vnode_free_list_slock);
1258 	}
1259 	vp->v_usecount++;
1260 #ifdef DIAGNOSTIC
1261 	if (vp->v_usecount == 0) {
1262 		vprint("vget", vp);
1263 		panic("vget: usecount overflow, vp %p", vp);
1264 	}
1265 #endif
1266 	if (flags & LK_TYPE_MASK) {
1267 		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1268 			/*
1269 			 * must expand vrele here because we do not want
1270 			 * to call VOP_INACTIVE if the reference count
1271 			 * drops back to zero since it was never really
1272 			 * active. We must remove it from the free list
1273 			 * before sleeping so that multiple processes do
1274 			 * not try to recycle it.
1275 			 */
1276 			simple_lock(&vp->v_interlock);
1277 			vp->v_usecount--;
1278 			if (vp->v_usecount > 0) {
1279 				simple_unlock(&vp->v_interlock);
1280 				return (error);
1281 			}
1282 			/*
1283 			 * insert at tail of LRU list
1284 			 */
1285 			simple_lock(&vnode_free_list_slock);
1286 			if (vp->v_holdcnt > 0)
1287 				TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1288 				    v_freelist);
1289 			else
1290 				TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1291 				    v_freelist);
1292 			simple_unlock(&vnode_free_list_slock);
1293 			simple_unlock(&vp->v_interlock);
1294 		}
1295 		return (error);
1296 	}
1297 	simple_unlock(&vp->v_interlock);
1298 	return (0);
1299 }
1300 
1301 /*
1302  * vput(), just unlock and vrele()
1303  */
1304 void
1305 vput(vp)
1306 	struct vnode *vp;
1307 {
1308 	struct proc *p = curproc;	/* XXX */
1309 
1310 #ifdef DIAGNOSTIC
1311 	if (vp == NULL)
1312 		panic("vput: null vp");
1313 #endif
1314 	simple_lock(&vp->v_interlock);
1315 	vp->v_usecount--;
1316 	if (vp->v_usecount > 0) {
1317 		simple_unlock(&vp->v_interlock);
1318 		VOP_UNLOCK(vp, 0);
1319 		return;
1320 	}
1321 #ifdef DIAGNOSTIC
1322 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1323 		vprint("vput: bad ref count", vp);
1324 		panic("vput: ref cnt");
1325 	}
1326 #endif
1327 	/*
1328 	 * Insert at tail of LRU list.
1329 	 */
1330 	simple_lock(&vnode_free_list_slock);
1331 	if (vp->v_holdcnt > 0)
1332 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1333 	else
1334 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1335 	simple_unlock(&vnode_free_list_slock);
1336 	if (vp->v_flag & VEXECMAP) {
1337 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1338 		uvmexp.filepages += vp->v_uobj.uo_npages;
1339 	}
1340 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1341 	simple_unlock(&vp->v_interlock);
1342 	VOP_INACTIVE(vp, p);
1343 }
1344 
1345 /*
1346  * Vnode release.
1347  * If count drops to zero, call inactive routine and return to freelist.
1348  */
1349 void
1350 vrele(vp)
1351 	struct vnode *vp;
1352 {
1353 	struct proc *p = curproc;	/* XXX */
1354 
1355 #ifdef DIAGNOSTIC
1356 	if (vp == NULL)
1357 		panic("vrele: null vp");
1358 #endif
1359 	simple_lock(&vp->v_interlock);
1360 	vp->v_usecount--;
1361 	if (vp->v_usecount > 0) {
1362 		simple_unlock(&vp->v_interlock);
1363 		return;
1364 	}
1365 #ifdef DIAGNOSTIC
1366 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1367 		vprint("vrele: bad ref count", vp);
1368 		panic("vrele: ref cnt vp %p", vp);
1369 	}
1370 #endif
1371 	/*
1372 	 * Insert at tail of LRU list.
1373 	 */
1374 	simple_lock(&vnode_free_list_slock);
1375 	if (vp->v_holdcnt > 0)
1376 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1377 	else
1378 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1379 	simple_unlock(&vnode_free_list_slock);
1380 	if (vp->v_flag & VEXECMAP) {
1381 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1382 		uvmexp.filepages += vp->v_uobj.uo_npages;
1383 	}
1384 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1385 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1386 		VOP_INACTIVE(vp, p);
1387 }
1388 
1389 #ifdef DIAGNOSTIC
1390 /*
1391  * Page or buffer structure gets a reference.
1392  */
1393 void
1394 vholdl(vp)
1395 	struct vnode *vp;
1396 {
1397 
1398 	/*
1399 	 * If it is on the freelist and the hold count is currently
1400 	 * zero, move it to the hold list. The test of the back
1401 	 * pointer and the use reference count of zero is because
1402 	 * it will be removed from a free list by getnewvnode,
1403 	 * but will not have its reference count incremented until
1404 	 * after calling vgone. If the reference count were
1405 	 * incremented first, vgone would (incorrectly) try to
1406 	 * close the previous instance of the underlying object.
1407 	 * So, the back pointer is explicitly set to `0xdeadb' in
1408 	 * getnewvnode after removing it from a freelist to ensure
1409 	 * that we do not try to move it here.
1410 	 */
1411 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1412 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1413 		simple_lock(&vnode_free_list_slock);
1414 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1415 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1416 		simple_unlock(&vnode_free_list_slock);
1417 	}
1418 	vp->v_holdcnt++;
1419 }
1420 
1421 /*
1422  * Page or buffer structure frees a reference.
1423  */
1424 void
1425 holdrelel(vp)
1426 	struct vnode *vp;
1427 {
1428 
1429 	if (vp->v_holdcnt <= 0)
1430 		panic("holdrelel: holdcnt vp %p", vp);
1431 	vp->v_holdcnt--;
1432 
1433 	/*
1434 	 * If it is on the holdlist and the hold count drops to
1435 	 * zero, move it to the free list. The test of the back
1436 	 * pointer and the use reference count of zero is because
1437 	 * it will be removed from a free list by getnewvnode,
1438 	 * but will not have its reference count incremented until
1439 	 * after calling vgone. If the reference count were
1440 	 * incremented first, vgone would (incorrectly) try to
1441 	 * close the previous instance of the underlying object.
1442 	 * So, the back pointer is explicitly set to `0xdeadb' in
1443 	 * getnewvnode after removing it from a freelist to ensure
1444 	 * that we do not try to move it here.
1445 	 */
1446 
1447 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1448 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1449 		simple_lock(&vnode_free_list_slock);
1450 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1451 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1452 		simple_unlock(&vnode_free_list_slock);
1453 	}
1454 }
1455 
1456 /*
1457  * Vnode reference.
1458  */
1459 void
1460 vref(vp)
1461 	struct vnode *vp;
1462 {
1463 
1464 	simple_lock(&vp->v_interlock);
1465 	if (vp->v_usecount <= 0)
1466 		panic("vref used where vget required, vp %p", vp);
1467 	vp->v_usecount++;
1468 #ifdef DIAGNOSTIC
1469 	if (vp->v_usecount == 0) {
1470 		vprint("vref", vp);
1471 		panic("vref: usecount overflow, vp %p", vp);
1472 	}
1473 #endif
1474 	simple_unlock(&vp->v_interlock);
1475 }
1476 #endif /* DIAGNOSTIC */
1477 
1478 /*
1479  * Remove any vnodes in the vnode table belonging to mount point mp.
1480  *
1481  * If FORCECLOSE is not specified, there should not be any active ones,
1482  * return error if any are found (nb: this is a user error, not a
1483  * system error). If FORCECLOSE is specified, detach any active vnodes
1484  * that are found.
1485  *
1486  * If WRITECLOSE is set, only flush out regular file vnodes open for
1487  * writing.
1488  *
1489  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1490  */
1491 #ifdef DEBUG
1492 int busyprt = 0;	/* print out busy vnodes */
1493 struct ctldebug debug1 = { "busyprt", &busyprt };
1494 #endif
1495 
1496 int
1497 vflush(mp, skipvp, flags)
1498 	struct mount *mp;
1499 	struct vnode *skipvp;
1500 	int flags;
1501 {
1502 	struct proc *p = curproc;	/* XXX */
1503 	struct vnode *vp, *nvp;
1504 	int busy = 0;
1505 
1506 	simple_lock(&mntvnode_slock);
1507 loop:
1508 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1509 		if (vp->v_mount != mp)
1510 			goto loop;
1511 		nvp = LIST_NEXT(vp, v_mntvnodes);
1512 		/*
1513 		 * Skip over a selected vnode.
1514 		 */
1515 		if (vp == skipvp)
1516 			continue;
1517 		simple_lock(&vp->v_interlock);
1518 		/*
1519 		 * Skip over a vnodes marked VSYSTEM.
1520 		 */
1521 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1522 			simple_unlock(&vp->v_interlock);
1523 			continue;
1524 		}
1525 		/*
1526 		 * If WRITECLOSE is set, only flush out regular file
1527 		 * vnodes open for writing.
1528 		 */
1529 		if ((flags & WRITECLOSE) &&
1530 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1531 			simple_unlock(&vp->v_interlock);
1532 			continue;
1533 		}
1534 		/*
1535 		 * With v_usecount == 0, all we need to do is clear
1536 		 * out the vnode data structures and we are done.
1537 		 */
1538 		if (vp->v_usecount == 0) {
1539 			simple_unlock(&mntvnode_slock);
1540 			vgonel(vp, p);
1541 			simple_lock(&mntvnode_slock);
1542 			continue;
1543 		}
1544 		/*
1545 		 * If FORCECLOSE is set, forcibly close the vnode.
1546 		 * For block or character devices, revert to an
1547 		 * anonymous device. For all other files, just kill them.
1548 		 */
1549 		if (flags & FORCECLOSE) {
1550 			simple_unlock(&mntvnode_slock);
1551 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1552 				vgonel(vp, p);
1553 			} else {
1554 				vclean(vp, 0, p);
1555 				vp->v_op = spec_vnodeop_p;
1556 				insmntque(vp, (struct mount *)0);
1557 			}
1558 			simple_lock(&mntvnode_slock);
1559 			continue;
1560 		}
1561 #ifdef DEBUG
1562 		if (busyprt)
1563 			vprint("vflush: busy vnode", vp);
1564 #endif
1565 		simple_unlock(&vp->v_interlock);
1566 		busy++;
1567 	}
1568 	simple_unlock(&mntvnode_slock);
1569 	if (busy)
1570 		return (EBUSY);
1571 	return (0);
1572 }
1573 
1574 /*
1575  * Disassociate the underlying file system from a vnode.
1576  */
1577 void
1578 vclean(vp, flags, p)
1579 	struct vnode *vp;
1580 	int flags;
1581 	struct proc *p;
1582 {
1583 	struct mount *mp;
1584 	int active;
1585 
1586 	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1587 
1588 	/*
1589 	 * Check to see if the vnode is in use.
1590 	 * If so we have to reference it before we clean it out
1591 	 * so that its count cannot fall to zero and generate a
1592 	 * race against ourselves to recycle it.
1593 	 */
1594 
1595 	if ((active = vp->v_usecount) != 0) {
1596 		vp->v_usecount++;
1597 #ifdef DIAGNOSTIC
1598 		if (vp->v_usecount == 0) {
1599 			vprint("vclean", vp);
1600 			panic("vclean: usecount overflow");
1601 		}
1602 #endif
1603 	}
1604 
1605 	/*
1606 	 * Prevent the vnode from being recycled or
1607 	 * brought into use while we clean it out.
1608 	 */
1609 	if (vp->v_flag & VXLOCK)
1610 		panic("vclean: deadlock, vp %p", vp);
1611 	vp->v_flag |= VXLOCK;
1612 	if (vp->v_flag & VEXECMAP) {
1613 		uvmexp.execpages -= vp->v_uobj.uo_npages;
1614 		uvmexp.filepages += vp->v_uobj.uo_npages;
1615 	}
1616 	vp->v_flag &= ~(VTEXT|VEXECMAP);
1617 
1618 	/*
1619 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1620 	 * have the object locked while it cleans it out. The VOP_LOCK
1621 	 * ensures that the VOP_INACTIVE routine is done with its work.
1622 	 * For active vnodes, it ensures that no other activity can
1623 	 * occur while the underlying object is being cleaned out.
1624 	 */
1625 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1626 
1627 	/*
1628 	 * Clean out any cached data associated with the vnode.
1629 	 * If special device, remove it from special device alias list.
1630 	 * if it is on one.
1631 	 */
1632 	if (flags & DOCLOSE) {
1633 		int error;
1634 		struct vnode *vq, *vx;
1635 
1636 		vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1637 		error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1638 		vn_finished_write(mp, V_LOWER);
1639 		if (error)
1640 			error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
1641 		KASSERT(error == 0);
1642 		KASSERT((vp->v_flag & VONWORKLST) == 0);
1643 
1644 		if (active)
1645 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1646 
1647 		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1648 		    vp->v_specinfo != 0) {
1649 			simple_lock(&spechash_slock);
1650 			if (vp->v_hashchain != NULL) {
1651 				if (*vp->v_hashchain == vp) {
1652 					*vp->v_hashchain = vp->v_specnext;
1653 				} else {
1654 					for (vq = *vp->v_hashchain; vq;
1655 					     vq = vq->v_specnext) {
1656 						if (vq->v_specnext != vp)
1657 							continue;
1658 						vq->v_specnext = vp->v_specnext;
1659 						break;
1660 					}
1661 					if (vq == NULL)
1662 						panic("missing bdev");
1663 				}
1664 				if (vp->v_flag & VALIASED) {
1665 					vx = NULL;
1666 						for (vq = *vp->v_hashchain; vq;
1667 						     vq = vq->v_specnext) {
1668 						if (vq->v_rdev != vp->v_rdev ||
1669 						    vq->v_type != vp->v_type)
1670 							continue;
1671 						if (vx)
1672 							break;
1673 						vx = vq;
1674 					}
1675 					if (vx == NULL)
1676 						panic("missing alias");
1677 					if (vq == NULL)
1678 						vx->v_flag &= ~VALIASED;
1679 					vp->v_flag &= ~VALIASED;
1680 				}
1681 			}
1682 			simple_unlock(&spechash_slock);
1683 			FREE(vp->v_specinfo, M_VNODE);
1684 			vp->v_specinfo = NULL;
1685 		}
1686 	}
1687 	LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1688 
1689 	/*
1690 	 * If purging an active vnode, it must be closed and
1691 	 * deactivated before being reclaimed. Note that the
1692 	 * VOP_INACTIVE will unlock the vnode.
1693 	 */
1694 	if (active) {
1695 		VOP_INACTIVE(vp, p);
1696 	} else {
1697 		/*
1698 		 * Any other processes trying to obtain this lock must first
1699 		 * wait for VXLOCK to clear, then call the new lock operation.
1700 		 */
1701 		VOP_UNLOCK(vp, 0);
1702 	}
1703 	/*
1704 	 * Reclaim the vnode.
1705 	 */
1706 	if (VOP_RECLAIM(vp, p))
1707 		panic("vclean: cannot reclaim, vp %p", vp);
1708 	if (active) {
1709 		/*
1710 		 * Inline copy of vrele() since VOP_INACTIVE
1711 		 * has already been called.
1712 		 */
1713 		simple_lock(&vp->v_interlock);
1714 		if (--vp->v_usecount <= 0) {
1715 #ifdef DIAGNOSTIC
1716 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1717 				vprint("vclean: bad ref count", vp);
1718 				panic("vclean: ref cnt");
1719 			}
1720 #endif
1721 			/*
1722 			 * Insert at tail of LRU list.
1723 			 */
1724 
1725 			simple_unlock(&vp->v_interlock);
1726 			simple_lock(&vnode_free_list_slock);
1727 #ifdef DIAGNOSTIC
1728 			if (vp->v_holdcnt > 0)
1729 				panic("vclean: not clean, vp %p", vp);
1730 #endif
1731 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1732 			simple_unlock(&vnode_free_list_slock);
1733 		} else
1734 			simple_unlock(&vp->v_interlock);
1735 	}
1736 
1737 	KASSERT(vp->v_uobj.uo_npages == 0);
1738 	cache_purge(vp);
1739 
1740 	/*
1741 	 * Done with purge, notify sleepers of the grim news.
1742 	 */
1743 	vp->v_op = dead_vnodeop_p;
1744 	vp->v_tag = VT_NON;
1745 	simple_lock(&vp->v_interlock);
1746 	VN_KNOTE(vp, NOTE_REVOKE);	/* FreeBSD has this in vn_pollgone() */
1747 	vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1748 	if (vp->v_flag & VXWANT) {
1749 		vp->v_flag &= ~VXWANT;
1750 		simple_unlock(&vp->v_interlock);
1751 		wakeup((caddr_t)vp);
1752 	} else
1753 		simple_unlock(&vp->v_interlock);
1754 }
1755 
1756 /*
1757  * Recycle an unused vnode to the front of the free list.
1758  * Release the passed interlock if the vnode will be recycled.
1759  */
1760 int
1761 vrecycle(vp, inter_lkp, p)
1762 	struct vnode *vp;
1763 	struct simplelock *inter_lkp;
1764 	struct proc *p;
1765 {
1766 
1767 	simple_lock(&vp->v_interlock);
1768 	if (vp->v_usecount == 0) {
1769 		if (inter_lkp)
1770 			simple_unlock(inter_lkp);
1771 		vgonel(vp, p);
1772 		return (1);
1773 	}
1774 	simple_unlock(&vp->v_interlock);
1775 	return (0);
1776 }
1777 
1778 /*
1779  * Eliminate all activity associated with a vnode
1780  * in preparation for reuse.
1781  */
1782 void
1783 vgone(vp)
1784 	struct vnode *vp;
1785 {
1786 	struct proc *p = curproc;	/* XXX */
1787 
1788 	simple_lock(&vp->v_interlock);
1789 	vgonel(vp, p);
1790 }
1791 
1792 /*
1793  * vgone, with the vp interlock held.
1794  */
1795 void
1796 vgonel(vp, p)
1797 	struct vnode *vp;
1798 	struct proc *p;
1799 {
1800 
1801 	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1802 
1803 	/*
1804 	 * If a vgone (or vclean) is already in progress,
1805 	 * wait until it is done and return.
1806 	 */
1807 
1808 	if (vp->v_flag & VXLOCK) {
1809 		vp->v_flag |= VXWANT;
1810 		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1811 		return;
1812 	}
1813 
1814 	/*
1815 	 * Clean out the filesystem specific data.
1816 	 */
1817 
1818 	vclean(vp, DOCLOSE, p);
1819 	KASSERT((vp->v_flag & VONWORKLST) == 0);
1820 
1821 	/*
1822 	 * Delete from old mount point vnode list, if on one.
1823 	 */
1824 
1825 	if (vp->v_mount != NULL)
1826 		insmntque(vp, (struct mount *)0);
1827 
1828 	/*
1829 	 * The test of the back pointer and the reference count of
1830 	 * zero is because it will be removed from the free list by
1831 	 * getcleanvnode, but will not have its reference count
1832 	 * incremented until after calling vgone. If the reference
1833 	 * count were incremented first, vgone would (incorrectly)
1834 	 * try to close the previous instance of the underlying object.
1835 	 * So, the back pointer is explicitly set to `0xdeadb' in
1836 	 * getnewvnode after removing it from the freelist to ensure
1837 	 * that we do not try to move it here.
1838 	 */
1839 
1840 	vp->v_type = VBAD;
1841 	if (vp->v_usecount == 0) {
1842 		boolean_t dofree;
1843 
1844 		simple_lock(&vnode_free_list_slock);
1845 		if (vp->v_holdcnt > 0)
1846 			panic("vgonel: not clean, vp %p", vp);
1847 		/*
1848 		 * if it isn't on the freelist, we're called by getcleanvnode
1849 		 * and vnode is being re-used.  otherwise, we'll free it.
1850 		 */
1851 		dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1852 		if (dofree) {
1853 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1854 			numvnodes--;
1855 		}
1856 		simple_unlock(&vnode_free_list_slock);
1857 		if (dofree)
1858 			pool_put(&vnode_pool, vp);
1859 	}
1860 }
1861 
1862 /*
1863  * Lookup a vnode by device number.
1864  */
1865 int
1866 vfinddev(dev, type, vpp)
1867 	dev_t dev;
1868 	enum vtype type;
1869 	struct vnode **vpp;
1870 {
1871 	struct vnode *vp;
1872 	int rc = 0;
1873 
1874 	simple_lock(&spechash_slock);
1875 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1876 		if (dev != vp->v_rdev || type != vp->v_type)
1877 			continue;
1878 		*vpp = vp;
1879 		rc = 1;
1880 		break;
1881 	}
1882 	simple_unlock(&spechash_slock);
1883 	return (rc);
1884 }
1885 
1886 /*
1887  * Revoke all the vnodes corresponding to the specified minor number
1888  * range (endpoints inclusive) of the specified major.
1889  */
1890 void
1891 vdevgone(maj, minl, minh, type)
1892 	int maj, minl, minh;
1893 	enum vtype type;
1894 {
1895 	struct vnode *vp;
1896 	int mn;
1897 
1898 	for (mn = minl; mn <= minh; mn++)
1899 		if (vfinddev(makedev(maj, mn), type, &vp))
1900 			VOP_REVOKE(vp, REVOKEALL);
1901 }
1902 
1903 /*
1904  * Calculate the total number of references to a special device.
1905  */
1906 int
1907 vcount(vp)
1908 	struct vnode *vp;
1909 {
1910 	struct vnode *vq, *vnext;
1911 	int count;
1912 
1913 loop:
1914 	if ((vp->v_flag & VALIASED) == 0)
1915 		return (vp->v_usecount);
1916 	simple_lock(&spechash_slock);
1917 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1918 		vnext = vq->v_specnext;
1919 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1920 			continue;
1921 		/*
1922 		 * Alias, but not in use, so flush it out.
1923 		 */
1924 		if (vq->v_usecount == 0 && vq != vp &&
1925 		    (vq->v_flag & VXLOCK) == 0) {
1926 			simple_unlock(&spechash_slock);
1927 			vgone(vq);
1928 			goto loop;
1929 		}
1930 		count += vq->v_usecount;
1931 	}
1932 	simple_unlock(&spechash_slock);
1933 	return (count);
1934 }
1935 
1936 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1937 #define ARRAY_PRINT(idx, arr) \
1938     ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1939 
1940 const char * const vnode_tags[] = { VNODE_TAGS };
1941 const char * const vnode_types[] = { VNODE_TYPES };
1942 const char vnode_flagbits[] = VNODE_FLAGBITS;
1943 
1944 /*
1945  * Print out a description of a vnode.
1946  */
1947 void
1948 vprint(label, vp)
1949 	char *label;
1950 	struct vnode *vp;
1951 {
1952 	char buf[96];
1953 
1954 	if (label != NULL)
1955 		printf("%s: ", label);
1956 	printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, "
1957 	    "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1958 	    ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1959 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1960 	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
1961 	if (buf[0] != '\0')
1962 		printf(" flags (%s)", &buf[1]);
1963 	if (vp->v_data == NULL) {
1964 		printf("\n");
1965 	} else {
1966 		printf("\n\t");
1967 		VOP_PRINT(vp);
1968 	}
1969 }
1970 
1971 #ifdef DEBUG
1972 /*
1973  * List all of the locked vnodes in the system.
1974  * Called when debugging the kernel.
1975  */
1976 void
1977 printlockedvnodes()
1978 {
1979 	struct mount *mp, *nmp;
1980 	struct vnode *vp;
1981 
1982 	printf("Locked vnodes\n");
1983 	simple_lock(&mountlist_slock);
1984 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1985 	     mp = nmp) {
1986 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1987 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1988 			continue;
1989 		}
1990 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1991 			if (VOP_ISLOCKED(vp))
1992 				vprint(NULL, vp);
1993 		}
1994 		simple_lock(&mountlist_slock);
1995 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1996 		vfs_unbusy(mp);
1997 	}
1998 	simple_unlock(&mountlist_slock);
1999 }
2000 #endif
2001 
2002 /*
2003  * sysctl helper routine for vfs.generic.conf lookups.
2004  */
2005 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2006 static int
2007 sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
2008 {
2009         struct vfsconf vfc;
2010         extern const char * const mountcompatnames[];
2011         extern int nmountcompatnames;
2012 	struct sysctlnode node;
2013 	struct vfsops *vfsp;
2014 	u_int vfsnum;
2015 
2016 	if (namelen != 1)
2017 		return (ENOTDIR);
2018 	vfsnum = name[0];
2019 	if (vfsnum >= nmountcompatnames ||
2020 	    mountcompatnames[vfsnum] == NULL)
2021 		return (EOPNOTSUPP);
2022 	vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
2023 	if (vfsp == NULL)
2024 		return (EOPNOTSUPP);
2025 
2026 	vfc.vfc_vfsops = vfsp;
2027 	strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
2028 	vfc.vfc_typenum = vfsnum;
2029 	vfc.vfc_refcount = vfsp->vfs_refcount;
2030 	vfc.vfc_flags = 0;
2031 	vfc.vfc_mountroot = vfsp->vfs_mountroot;
2032 	vfc.vfc_next = NULL;
2033 
2034 	node = *rnode;
2035 	node.sysctl_data = &vfc;
2036 	return (sysctl_lookup(SYSCTLFN_CALL(&node)));
2037 }
2038 #endif
2039 
2040 /*
2041  * sysctl helper routine to return list of supported fstypes
2042  */
2043 static int
2044 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2045 {
2046 	char buf[MFSNAMELEN];
2047 	char *where = oldp;
2048 	struct vfsops *v;
2049 	size_t needed, left, slen;
2050 	int error, first;
2051 
2052 	if (newp != NULL)
2053 		return (EPERM);
2054 	if (namelen != 0)
2055 		return (EINVAL);
2056 
2057 	first = 1;
2058 	error = 0;
2059 	needed = 0;
2060 	left = *oldlenp;
2061 
2062 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2063 		if (where == NULL)
2064 			needed += strlen(v->vfs_name) + 1;
2065 		else {
2066 			memset(buf, 0, sizeof(buf));
2067 			if (first) {
2068 				strncpy(buf, v->vfs_name, sizeof(buf));
2069 				first = 0;
2070 			} else {
2071 				buf[0] = ' ';
2072 				strncpy(buf + 1, v->vfs_name, sizeof(buf) - 1);
2073 			}
2074 			buf[sizeof(buf)-1] = '\0';
2075 			slen = strlen(buf);
2076 			if (left < slen + 1)
2077 				break;
2078 			/* +1 to copy out the trailing NUL byte */
2079 			error = copyout(buf, where, slen + 1);
2080 			if (error)
2081 				break;
2082 			where += slen;
2083 			needed += slen;
2084 			left -= slen;
2085 		}
2086 	}
2087 	*oldlenp = needed;
2088 	return (error);
2089 }
2090 
2091 /*
2092  * Top level filesystem related information gathering.
2093  */
2094 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
2095 {
2096 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2097 	extern int nmountcompatnames;
2098 #endif
2099 
2100 	sysctl_createv(clog, 0, NULL, NULL,
2101 		       CTLFLAG_PERMANENT,
2102 		       CTLTYPE_NODE, "vfs", NULL,
2103 		       NULL, 0, NULL, 0,
2104 		       CTL_VFS, CTL_EOL);
2105 	sysctl_createv(clog, 0, NULL, NULL,
2106 		       CTLFLAG_PERMANENT,
2107 		       CTLTYPE_NODE, "generic",
2108 		       SYSCTL_DESCR("Non-specific vfs related information"),
2109 		       NULL, 0, NULL, 0,
2110 		       CTL_VFS, VFS_GENERIC, CTL_EOL);
2111 
2112 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2113 	sysctl_createv(clog, 0, NULL, NULL,
2114 		       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
2115 		       CTLTYPE_INT, "maxtypenum",
2116 		       SYSCTL_DESCR("Highest valid filesystem type number"),
2117 		       NULL, nmountcompatnames, NULL, 0,
2118 		       CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
2119 #endif
2120 	sysctl_createv(clog, 0, NULL, NULL,
2121 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2122 		       CTLTYPE_INT, "usermount",
2123 		       SYSCTL_DESCR("Whether unprivileged users may mount "
2124 				    "filesystems"),
2125 		       NULL, 0, &dovfsusermount, 0,
2126 		       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
2127 	sysctl_createv(clog, 0, NULL, NULL,
2128 		       CTLFLAG_PERMANENT,
2129 		       CTLTYPE_STRING, "fstypes",
2130 		       SYSCTL_DESCR("List of file systems present"),
2131 		       sysctl_vfs_generic_fstypes, 0, NULL, 0,
2132 		       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
2133 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2134 	sysctl_createv(clog, 0, NULL, NULL,
2135 		       CTLFLAG_PERMANENT,
2136 		       CTLTYPE_STRUCT, "conf",
2137 		       SYSCTL_DESCR("Filesystem configuration information"),
2138 		       sysctl_vfs_generic_conf, 0, NULL,
2139 		       sizeof(struct vfsconf),
2140 		       CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
2141 #endif
2142 }
2143 
2144 
2145 int kinfo_vdebug = 1;
2146 int kinfo_vgetfailed;
2147 #define KINFO_VNODESLOP	10
2148 /*
2149  * Dump vnode list (via sysctl).
2150  * Copyout address of vnode followed by vnode.
2151  */
2152 /* ARGSUSED */
2153 int
2154 sysctl_kern_vnode(SYSCTLFN_ARGS)
2155 {
2156 	char *where = oldp;
2157 	size_t *sizep = oldlenp;
2158 	struct mount *mp, *nmp;
2159 	struct vnode *nvp, *vp;
2160 	char *bp = where, *savebp;
2161 	char *ewhere;
2162 	int error;
2163 
2164 	if (namelen != 0)
2165 		return (EOPNOTSUPP);
2166 	if (newp != NULL)
2167 		return (EPERM);
2168 
2169 #define VPTRSZ	sizeof(struct vnode *)
2170 #define VNODESZ	sizeof(struct vnode)
2171 	if (where == NULL) {
2172 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2173 		return (0);
2174 	}
2175 	ewhere = where + *sizep;
2176 
2177 	simple_lock(&mountlist_slock);
2178 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2179 	     mp = nmp) {
2180 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
2181 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
2182 			continue;
2183 		}
2184 		savebp = bp;
2185 again:
2186 		simple_lock(&mntvnode_slock);
2187 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
2188 		     vp != NULL;
2189 		     vp = nvp) {
2190 			/*
2191 			 * Check that the vp is still associated with
2192 			 * this filesystem.  RACE: could have been
2193 			 * recycled onto the same filesystem.
2194 			 */
2195 			if (vp->v_mount != mp) {
2196 				simple_unlock(&mntvnode_slock);
2197 				if (kinfo_vdebug)
2198 					printf("kinfo: vp changed\n");
2199 				bp = savebp;
2200 				goto again;
2201 			}
2202 			nvp = LIST_NEXT(vp, v_mntvnodes);
2203 			if (bp + VPTRSZ + VNODESZ > ewhere) {
2204 				simple_unlock(&mntvnode_slock);
2205 				*sizep = bp - where;
2206 				return (ENOMEM);
2207 			}
2208 			simple_unlock(&mntvnode_slock);
2209 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2210 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2211 				return (error);
2212 			bp += VPTRSZ + VNODESZ;
2213 			simple_lock(&mntvnode_slock);
2214 		}
2215 		simple_unlock(&mntvnode_slock);
2216 		simple_lock(&mountlist_slock);
2217 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
2218 		vfs_unbusy(mp);
2219 	}
2220 	simple_unlock(&mountlist_slock);
2221 
2222 	*sizep = bp - where;
2223 	return (0);
2224 }
2225 
2226 /*
2227  * Check to see if a filesystem is mounted on a block device.
2228  */
2229 int
2230 vfs_mountedon(vp)
2231 	struct vnode *vp;
2232 {
2233 	struct vnode *vq;
2234 	int error = 0;
2235 
2236 	if (vp->v_specmountpoint != NULL)
2237 		return (EBUSY);
2238 	if (vp->v_flag & VALIASED) {
2239 		simple_lock(&spechash_slock);
2240 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2241 			if (vq->v_rdev != vp->v_rdev ||
2242 			    vq->v_type != vp->v_type)
2243 				continue;
2244 			if (vq->v_specmountpoint != NULL) {
2245 				error = EBUSY;
2246 				break;
2247 			}
2248 		}
2249 		simple_unlock(&spechash_slock);
2250 	}
2251 	return (error);
2252 }
2253 
2254 static int
2255 sacheck(struct sockaddr *sa)
2256 {
2257 	switch (sa->sa_family) {
2258 #ifdef INET
2259 	case AF_INET: {
2260 		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2261 		char *p = (char *)sin->sin_zero;
2262 		size_t i;
2263 
2264 		if (sin->sin_len != sizeof(*sin))
2265 			return -1;
2266 		if (sin->sin_port != 0)
2267 			return -1;
2268 		for (i = 0; i < sizeof(sin->sin_zero); i++)
2269 			if (*p++ != '\0')
2270 				return -1;
2271 		return 0;
2272 	}
2273 #endif
2274 #ifdef INET6
2275 	case AF_INET6: {
2276 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2277 
2278 		if (sin6->sin6_len != sizeof(*sin6))
2279 			return -1;
2280 		if (sin6->sin6_port != 0)
2281 			return -1;
2282 		return 0;
2283 	}
2284 #endif
2285 	default:
2286 		return -1;
2287 	}
2288 }
2289 
2290 /*
2291  * Build hash lists of net addresses and hang them off the mount point.
2292  * Called by ufs_mount() to set up the lists of export addresses.
2293  */
2294 static int
2295 vfs_hang_addrlist(mp, nep, argp)
2296 	struct mount *mp;
2297 	struct netexport *nep;
2298 	struct export_args *argp;
2299 {
2300 	struct netcred *np, *enp;
2301 	struct radix_node_head *rnh;
2302 	int i;
2303 	struct sockaddr *saddr, *smask = 0;
2304 	struct domain *dom;
2305 	int error;
2306 
2307 	if (argp->ex_addrlen == 0) {
2308 		if (mp->mnt_flag & MNT_DEFEXPORTED)
2309 			return (EPERM);
2310 		np = &nep->ne_defexported;
2311 		np->netc_exflags = argp->ex_flags;
2312 		crcvt(&np->netc_anon, &argp->ex_anon);
2313 		np->netc_anon.cr_ref = 1;
2314 		mp->mnt_flag |= MNT_DEFEXPORTED;
2315 		return (0);
2316 	}
2317 
2318 	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
2319 		return (EINVAL);
2320 
2321 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2322 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
2323 	memset((caddr_t)np, 0, i);
2324 	saddr = (struct sockaddr *)(np + 1);
2325 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2326 	if (error)
2327 		goto out;
2328 	if (saddr->sa_len > argp->ex_addrlen)
2329 		saddr->sa_len = argp->ex_addrlen;
2330 	if (sacheck(saddr) == -1)
2331 		return EINVAL;
2332 	if (argp->ex_masklen) {
2333 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2334 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
2335 		if (error)
2336 			goto out;
2337 		if (smask->sa_len > argp->ex_masklen)
2338 			smask->sa_len = argp->ex_masklen;
2339 		if (smask->sa_family != saddr->sa_family)
2340 			return EINVAL;
2341 		if (sacheck(smask) == -1)
2342 			return EINVAL;
2343 	}
2344 	i = saddr->sa_family;
2345 	if ((rnh = nep->ne_rtable[i]) == 0) {
2346 		/*
2347 		 * Seems silly to initialize every AF when most are not
2348 		 * used, do so on demand here
2349 		 */
2350 		DOMAIN_FOREACH(dom) {
2351 			if (dom->dom_family == i && dom->dom_rtattach) {
2352 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
2353 					dom->dom_rtoffset);
2354 				break;
2355 			}
2356 		}
2357 		if ((rnh = nep->ne_rtable[i]) == 0) {
2358 			error = ENOBUFS;
2359 			goto out;
2360 		}
2361 	}
2362 
2363 	enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
2364 	    np->netc_rnodes);
2365 	if (enp != np) {
2366 		if (enp == NULL) {
2367 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2368 			    smask, rnh);
2369 			if (enp == NULL) {
2370 				error = EPERM;
2371 				goto out;
2372 			}
2373 		} else
2374 			enp->netc_refcnt++;
2375 
2376 		goto check;
2377 	} else
2378 		enp->netc_refcnt = 1;
2379 
2380 	np->netc_exflags = argp->ex_flags;
2381 	crcvt(&np->netc_anon, &argp->ex_anon);
2382 	np->netc_anon.cr_ref = 1;
2383 	return 0;
2384 check:
2385 	if (enp->netc_exflags != argp->ex_flags ||
2386 	    crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
2387 		error = EPERM;
2388 	else
2389 		error = 0;
2390 out:
2391 	free(np, M_NETADDR);
2392 	return error;
2393 }
2394 
2395 /* ARGSUSED */
2396 static int
2397 vfs_free_netcred(rn, w)
2398 	struct radix_node *rn;
2399 	void *w;
2400 {
2401 	struct radix_node_head *rnh = (struct radix_node_head *)w;
2402 	struct netcred *np = (struct netcred *)(void *)rn;
2403 
2404 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2405 	if (--(np->netc_refcnt) <= 0)
2406 		free(np, M_NETADDR);
2407 	return (0);
2408 }
2409 
2410 /*
2411  * Free the net address hash lists that are hanging off the mount points.
2412  */
2413 static void
2414 vfs_free_addrlist(nep)
2415 	struct netexport *nep;
2416 {
2417 	int i;
2418 	struct radix_node_head *rnh;
2419 
2420 	for (i = 0; i <= AF_MAX; i++)
2421 		if ((rnh = nep->ne_rtable[i]) != NULL) {
2422 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
2423 			free((caddr_t)rnh, M_RTABLE);
2424 			nep->ne_rtable[i] = 0;
2425 		}
2426 }
2427 
2428 int
2429 vfs_export(mp, nep, argp)
2430 	struct mount *mp;
2431 	struct netexport *nep;
2432 	struct export_args *argp;
2433 {
2434 	int error;
2435 
2436 	if (argp->ex_flags & MNT_DELEXPORT) {
2437 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2438 			vfs_setpublicfs(NULL, NULL, NULL);
2439 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2440 		}
2441 		vfs_free_addrlist(nep);
2442 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2443 	}
2444 	if (argp->ex_flags & MNT_EXPORTED) {
2445 		if (argp->ex_flags & MNT_EXPUBLIC) {
2446 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2447 				return (error);
2448 			mp->mnt_flag |= MNT_EXPUBLIC;
2449 		}
2450 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
2451 			return (error);
2452 		mp->mnt_flag |= MNT_EXPORTED;
2453 	}
2454 	return (0);
2455 }
2456 
2457 /*
2458  * Set the publicly exported filesystem (WebNFS). Currently, only
2459  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2460  */
2461 int
2462 vfs_setpublicfs(mp, nep, argp)
2463 	struct mount *mp;
2464 	struct netexport *nep;
2465 	struct export_args *argp;
2466 {
2467 	int error;
2468 	struct vnode *rvp;
2469 	char *cp;
2470 
2471 	/*
2472 	 * mp == NULL -> invalidate the current info, the FS is
2473 	 * no longer exported. May be called from either vfs_export
2474 	 * or unmount, so check if it hasn't already been done.
2475 	 */
2476 	if (mp == NULL) {
2477 		if (nfs_pub.np_valid) {
2478 			nfs_pub.np_valid = 0;
2479 			if (nfs_pub.np_index != NULL) {
2480 				FREE(nfs_pub.np_index, M_TEMP);
2481 				nfs_pub.np_index = NULL;
2482 			}
2483 		}
2484 		return (0);
2485 	}
2486 
2487 	/*
2488 	 * Only one allowed at a time.
2489 	 */
2490 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2491 		return (EBUSY);
2492 
2493 	/*
2494 	 * Get real filehandle for root of exported FS.
2495 	 */
2496 	memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
2497 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsidx;
2498 
2499 	if ((error = VFS_ROOT(mp, &rvp)))
2500 		return (error);
2501 
2502 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2503 		return (error);
2504 
2505 	vput(rvp);
2506 
2507 	/*
2508 	 * If an indexfile was specified, pull it in.
2509 	 */
2510 	if (argp->ex_indexfile != NULL) {
2511 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2512 		    M_WAITOK);
2513 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2514 		    MAXNAMLEN, (size_t *)0);
2515 		if (!error) {
2516 			/*
2517 			 * Check for illegal filenames.
2518 			 */
2519 			for (cp = nfs_pub.np_index; *cp; cp++) {
2520 				if (*cp == '/') {
2521 					error = EINVAL;
2522 					break;
2523 				}
2524 			}
2525 		}
2526 		if (error) {
2527 			FREE(nfs_pub.np_index, M_TEMP);
2528 			return (error);
2529 		}
2530 	}
2531 
2532 	nfs_pub.np_mount = mp;
2533 	nfs_pub.np_valid = 1;
2534 	return (0);
2535 }
2536 
2537 struct netcred *
2538 vfs_export_lookup(mp, nep, nam)
2539 	struct mount *mp;
2540 	struct netexport *nep;
2541 	struct mbuf *nam;
2542 {
2543 	struct netcred *np;
2544 	struct radix_node_head *rnh;
2545 	struct sockaddr *saddr;
2546 
2547 	np = NULL;
2548 	if (mp->mnt_flag & MNT_EXPORTED) {
2549 		/*
2550 		 * Lookup in the export list first.
2551 		 */
2552 		if (nam != NULL) {
2553 			saddr = mtod(nam, struct sockaddr *);
2554 			rnh = nep->ne_rtable[saddr->sa_family];
2555 			if (rnh != NULL) {
2556 				np = (struct netcred *)
2557 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2558 							      rnh);
2559 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2560 					np = NULL;
2561 			}
2562 		}
2563 		/*
2564 		 * If no address match, use the default if it exists.
2565 		 */
2566 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2567 			np = &nep->ne_defexported;
2568 	}
2569 	return (np);
2570 }
2571 
2572 /*
2573  * Do the usual access checking.
2574  * file_mode, uid and gid are from the vnode in question,
2575  * while acc_mode and cred are from the VOP_ACCESS parameter list
2576  */
2577 int
2578 vaccess(type, file_mode, uid, gid, acc_mode, cred)
2579 	enum vtype type;
2580 	mode_t file_mode;
2581 	uid_t uid;
2582 	gid_t gid;
2583 	mode_t acc_mode;
2584 	struct ucred *cred;
2585 {
2586 	mode_t mask;
2587 
2588 	/*
2589 	 * Super-user always gets read/write access, but execute access depends
2590 	 * on at least one execute bit being set.
2591 	 */
2592 	if (cred->cr_uid == 0) {
2593 		if ((acc_mode & VEXEC) && type != VDIR &&
2594 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2595 			return (EACCES);
2596 		return (0);
2597 	}
2598 
2599 	mask = 0;
2600 
2601 	/* Otherwise, check the owner. */
2602 	if (cred->cr_uid == uid) {
2603 		if (acc_mode & VEXEC)
2604 			mask |= S_IXUSR;
2605 		if (acc_mode & VREAD)
2606 			mask |= S_IRUSR;
2607 		if (acc_mode & VWRITE)
2608 			mask |= S_IWUSR;
2609 		return ((file_mode & mask) == mask ? 0 : EACCES);
2610 	}
2611 
2612 	/* Otherwise, check the groups. */
2613 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
2614 		if (acc_mode & VEXEC)
2615 			mask |= S_IXGRP;
2616 		if (acc_mode & VREAD)
2617 			mask |= S_IRGRP;
2618 		if (acc_mode & VWRITE)
2619 			mask |= S_IWGRP;
2620 		return ((file_mode & mask) == mask ? 0 : EACCES);
2621 	}
2622 
2623 	/* Otherwise, check everyone else. */
2624 	if (acc_mode & VEXEC)
2625 		mask |= S_IXOTH;
2626 	if (acc_mode & VREAD)
2627 		mask |= S_IROTH;
2628 	if (acc_mode & VWRITE)
2629 		mask |= S_IWOTH;
2630 	return ((file_mode & mask) == mask ? 0 : EACCES);
2631 }
2632 
2633 /*
2634  * Unmount all file systems.
2635  * We traverse the list in reverse order under the assumption that doing so
2636  * will avoid needing to worry about dependencies.
2637  */
2638 void
2639 vfs_unmountall(p)
2640 	struct proc *p;
2641 {
2642 	struct mount *mp, *nmp;
2643 	int allerror, error;
2644 
2645 	printf("unmounting file systems...");
2646 	for (allerror = 0,
2647 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2648 		nmp = mp->mnt_list.cqe_prev;
2649 #ifdef DEBUG
2650 		printf("\nunmounting %s (%s)...",
2651 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2652 #endif
2653 		/*
2654 		 * XXX Freeze syncer.  Must do this before locking the
2655 		 * mount point.  See dounmount() for details.
2656 		 */
2657 		lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2658 		if (vfs_busy(mp, 0, 0)) {
2659 			lockmgr(&syncer_lock, LK_RELEASE, NULL);
2660 			continue;
2661 		}
2662 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2663 			printf("unmount of %s failed with error %d\n",
2664 			    mp->mnt_stat.f_mntonname, error);
2665 			allerror = 1;
2666 		}
2667 	}
2668 	printf(" done\n");
2669 	if (allerror)
2670 		printf("WARNING: some file systems would not unmount\n");
2671 }
2672 
2673 extern struct simplelock bqueue_slock; /* XXX */
2674 
2675 /*
2676  * Sync and unmount file systems before shutting down.
2677  */
2678 void
2679 vfs_shutdown()
2680 {
2681 	struct lwp *l = curlwp;
2682 	struct proc *p;
2683 
2684 	/* XXX we're certainly not running in proc0's context! */
2685 	if (l == NULL || (p = l->l_proc) == NULL)
2686 		p = &proc0;
2687 
2688 	printf("syncing disks... ");
2689 
2690 	/* remove user process from run queue */
2691 	suspendsched();
2692 	(void) spl0();
2693 
2694 	/* avoid coming back this way again if we panic. */
2695 	doing_shutdown = 1;
2696 
2697 	sys_sync(l, NULL, NULL);
2698 
2699 	/* Wait for sync to finish. */
2700 	if (buf_syncwait() != 0) {
2701 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2702 		Debugger();
2703 #endif
2704 		printf("giving up\n");
2705 		return;
2706 	} else
2707 		printf("done\n");
2708 
2709 	/*
2710 	 * If we've panic'd, don't make the situation potentially
2711 	 * worse by unmounting the file systems.
2712 	 */
2713 	if (panicstr != NULL)
2714 		return;
2715 
2716 	/* Release inodes held by texts before update. */
2717 #ifdef notdef
2718 	vnshutdown();
2719 #endif
2720 	/* Unmount file systems. */
2721 	vfs_unmountall(p);
2722 }
2723 
2724 /*
2725  * Mount the root file system.  If the operator didn't specify a
2726  * file system to use, try all possible file systems until one
2727  * succeeds.
2728  */
2729 int
2730 vfs_mountroot()
2731 {
2732 	struct vfsops *v;
2733 	int error = ENODEV;
2734 
2735 	if (root_device == NULL)
2736 		panic("vfs_mountroot: root device unknown");
2737 
2738 	switch (root_device->dv_class) {
2739 	case DV_IFNET:
2740 		if (rootdev != NODEV)
2741 			panic("vfs_mountroot: rootdev set for DV_IFNET "
2742 			    "(0x%08x -> %d,%d)", rootdev,
2743 			    major(rootdev), minor(rootdev));
2744 		break;
2745 
2746 	case DV_DISK:
2747 		if (rootdev == NODEV)
2748 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2749 	        if (bdevvp(rootdev, &rootvp))
2750 	                panic("vfs_mountroot: can't get vnode for rootdev");
2751 		error = VOP_OPEN(rootvp, FREAD, FSCRED, curproc);
2752 		if (error) {
2753 			printf("vfs_mountroot: can't open root device\n");
2754 			return (error);
2755 		}
2756 		break;
2757 
2758 	default:
2759 		printf("%s: inappropriate for root file system\n",
2760 		    root_device->dv_xname);
2761 		return (ENODEV);
2762 	}
2763 
2764 	/*
2765 	 * If user specified a file system, use it.
2766 	 */
2767 	if (mountroot != NULL) {
2768 		error = (*mountroot)();
2769 		goto done;
2770 	}
2771 
2772 	/*
2773 	 * Try each file system currently configured into the kernel.
2774 	 */
2775 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2776 		if (v->vfs_mountroot == NULL)
2777 			continue;
2778 #ifdef DEBUG
2779 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2780 #endif
2781 		error = (*v->vfs_mountroot)();
2782 		if (!error) {
2783 			aprint_normal("root file system type: %s\n",
2784 			    v->vfs_name);
2785 			break;
2786 		}
2787 	}
2788 
2789 	if (v == NULL) {
2790 		printf("no file system for %s", root_device->dv_xname);
2791 		if (root_device->dv_class == DV_DISK)
2792 			printf(" (dev 0x%x)", rootdev);
2793 		printf("\n");
2794 		error = EFTYPE;
2795 	}
2796 
2797 done:
2798 	if (error && root_device->dv_class == DV_DISK) {
2799 		VOP_CLOSE(rootvp, FREAD, FSCRED, curproc);
2800 		vrele(rootvp);
2801 	}
2802 	return (error);
2803 }
2804 
2805 /*
2806  * Given a file system name, look up the vfsops for that
2807  * file system, or return NULL if file system isn't present
2808  * in the kernel.
2809  */
2810 struct vfsops *
2811 vfs_getopsbyname(name)
2812 	const char *name;
2813 {
2814 	struct vfsops *v;
2815 
2816 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2817 		if (strcmp(v->vfs_name, name) == 0)
2818 			break;
2819 	}
2820 
2821 	return (v);
2822 }
2823 
2824 /*
2825  * Establish a file system and initialize it.
2826  */
2827 int
2828 vfs_attach(vfs)
2829 	struct vfsops *vfs;
2830 {
2831 	struct vfsops *v;
2832 	int error = 0;
2833 
2834 
2835 	/*
2836 	 * Make sure this file system doesn't already exist.
2837 	 */
2838 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2839 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2840 			error = EEXIST;
2841 			goto out;
2842 		}
2843 	}
2844 
2845 	/*
2846 	 * Initialize the vnode operations for this file system.
2847 	 */
2848 	vfs_opv_init(vfs->vfs_opv_descs);
2849 
2850 	/*
2851 	 * Now initialize the file system itself.
2852 	 */
2853 	(*vfs->vfs_init)();
2854 
2855 	/*
2856 	 * ...and link it into the kernel's list.
2857 	 */
2858 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2859 
2860 	/*
2861 	 * Sanity: make sure the reference count is 0.
2862 	 */
2863 	vfs->vfs_refcount = 0;
2864 
2865  out:
2866 	return (error);
2867 }
2868 
2869 /*
2870  * Remove a file system from the kernel.
2871  */
2872 int
2873 vfs_detach(vfs)
2874 	struct vfsops *vfs;
2875 {
2876 	struct vfsops *v;
2877 
2878 	/*
2879 	 * Make sure no one is using the filesystem.
2880 	 */
2881 	if (vfs->vfs_refcount != 0)
2882 		return (EBUSY);
2883 
2884 	/*
2885 	 * ...and remove it from the kernel's list.
2886 	 */
2887 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2888 		if (v == vfs) {
2889 			LIST_REMOVE(v, vfs_list);
2890 			break;
2891 		}
2892 	}
2893 
2894 	if (v == NULL)
2895 		return (ESRCH);
2896 
2897 	/*
2898 	 * Now run the file system-specific cleanups.
2899 	 */
2900 	(*vfs->vfs_done)();
2901 
2902 	/*
2903 	 * Free the vnode operations vector.
2904 	 */
2905 	vfs_opv_free(vfs->vfs_opv_descs);
2906 	return (0);
2907 }
2908 
2909 void
2910 vfs_reinit(void)
2911 {
2912 	struct vfsops *vfs;
2913 
2914 	LIST_FOREACH(vfs, &vfs_list, vfs_list) {
2915 		if (vfs->vfs_reinit) {
2916 			(*vfs->vfs_reinit)();
2917 		}
2918 	}
2919 }
2920 
2921 /*
2922  * Request a filesystem to suspend write operations.
2923  */
2924 int
2925 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
2926 {
2927 	struct proc *p = curproc;	/* XXX */
2928 	int error;
2929 
2930 	while ((mp->mnt_iflag & IMNT_SUSPEND)) {
2931 		if (slptimeo < 0)
2932 			return EWOULDBLOCK;
2933 		error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
2934 		if (error)
2935 			return error;
2936 	}
2937 	mp->mnt_iflag |= IMNT_SUSPEND;
2938 
2939 	simple_lock(&mp->mnt_slock);
2940 	if (mp->mnt_writeopcountupper > 0)
2941 		ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
2942 			0, &mp->mnt_slock);
2943 	simple_unlock(&mp->mnt_slock);
2944 
2945 	error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
2946 	if (error) {
2947 		vfs_write_resume(mp);
2948 		return error;
2949 	}
2950 	mp->mnt_iflag |= IMNT_SUSPENDLOW;
2951 
2952 	simple_lock(&mp->mnt_slock);
2953 	if (mp->mnt_writeopcountlower > 0)
2954 		ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
2955 			0, &mp->mnt_slock);
2956 	mp->mnt_iflag |= IMNT_SUSPENDED;
2957 	simple_unlock(&mp->mnt_slock);
2958 
2959 	return 0;
2960 }
2961 
2962 /*
2963  * Request a filesystem to resume write operations.
2964  */
2965 void
2966 vfs_write_resume(struct mount *mp)
2967 {
2968 
2969 	if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
2970 		return;
2971 	mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
2972 	wakeup(&mp->mnt_flag);
2973 }
2974 
2975 void
2976 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2977 {
2978 	const struct statvfs *mbp;
2979 
2980 	if (sbp == (mbp = &mp->mnt_stat))
2981 		return;
2982 
2983 	(void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2984 	sbp->f_fsid = mbp->f_fsid;
2985 	sbp->f_owner = mbp->f_owner;
2986 	sbp->f_flag = mbp->f_flag;
2987 	sbp->f_syncwrites = mbp->f_syncwrites;
2988 	sbp->f_asyncwrites = mbp->f_asyncwrites;
2989 	sbp->f_syncreads = mbp->f_syncreads;
2990 	sbp->f_asyncreads = mbp->f_asyncreads;
2991 	(void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2992 	(void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2993 	    sizeof(sbp->f_fstypename));
2994 	(void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2995 	    sizeof(sbp->f_mntonname));
2996 	(void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2997 	    sizeof(sbp->f_mntfromname));
2998 	sbp->f_namemax = mbp->f_namemax;
2999 }
3000 
3001 int
3002 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
3003     struct mount *mp, struct proc *p)
3004 {
3005 	int error;
3006 	size_t size;
3007 	struct statvfs *sfs = &mp->mnt_stat;
3008 	int (*fun)(const void *, void *, size_t, size_t *);
3009 
3010 	(void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
3011 	    sizeof(mp->mnt_stat.f_fstypename));
3012 
3013 	if (onp) {
3014 		struct cwdinfo *cwdi = p->p_cwdi;
3015 		fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
3016 		if (cwdi->cwdi_rdir != NULL) {
3017 			size_t len;
3018 			char *bp;
3019 			char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
3020 
3021 			if (!path) /* XXX can't happen with M_WAITOK */
3022 				return ENOMEM;
3023 
3024 			bp = path + MAXPATHLEN;
3025 			*--bp = '\0';
3026 			error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
3027 			    path, MAXPATHLEN / 2, 0, p);
3028 			if (error) {
3029 				free(path, M_TEMP);
3030 				return error;
3031 			}
3032 
3033 			len = strlen(bp);
3034 			if (len > sizeof(sfs->f_mntonname) - 1)
3035 				len = sizeof(sfs->f_mntonname) - 1;
3036 			(void)strncpy(sfs->f_mntonname, bp, len);
3037 			free(path, M_TEMP);
3038 
3039 			if (len < sizeof(sfs->f_mntonname) - 1) {
3040 				error = (*fun)(onp, &sfs->f_mntonname[len],
3041 				    sizeof(sfs->f_mntonname) - len - 1, &size);
3042 				if (error)
3043 					return error;
3044 				size += len;
3045 			} else {
3046 				size = len;
3047 			}
3048 		} else {
3049 			error = (*fun)(onp, &sfs->f_mntonname,
3050 			    sizeof(sfs->f_mntonname) - 1, &size);
3051 			if (error)
3052 				return error;
3053 		}
3054 		(void)memset(sfs->f_mntonname + size, 0,
3055 		    sizeof(sfs->f_mntonname) - size);
3056 	}
3057 
3058 	if (fromp) {
3059 		fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
3060 		error = (*fun)(fromp, sfs->f_mntfromname,
3061 		    sizeof(sfs->f_mntfromname) - 1, &size);
3062 		if (error)
3063 			return error;
3064 		(void)memset(sfs->f_mntfromname + size, 0,
3065 		    sizeof(sfs->f_mntfromname) - size);
3066 	}
3067 	return 0;
3068 }
3069 
3070 /*
3071  * Default vfs_extattrctl routine for file systems that do not support
3072  * it.
3073  */
3074 /*ARGSUSED*/
3075 int
3076 vfs_stdextattrctl(struct mount *mp, int cmt, struct vnode *vp,
3077     int attrnamespace, const char *attrname, struct proc *p)
3078 {
3079 
3080 	if (vp != NULL)
3081 		VOP_UNLOCK(vp, 0);
3082 	return (EOPNOTSUPP);
3083 }
3084 
3085 /*
3086  * Credential check based on process requesting service, and per-attribute
3087  * permissions.
3088  *
3089  * NOTE: Vnode must be locked.
3090  */
3091 int
3092 extattr_check_cred(struct vnode *vp, int attrnamespace,
3093     struct ucred *cred, struct proc *p, int access)
3094 {
3095 
3096 	if (cred == NOCRED)
3097 		return (0);
3098 
3099 	switch (attrnamespace) {
3100 	case EXTATTR_NAMESPACE_SYSTEM:
3101 		/*
3102 		 * Do we really want to allow this, or just require that
3103 		 * these requests come from kernel code (NOCRED case above)?
3104 		 */
3105 		return (suser(cred, &p->p_acflag));
3106 
3107 	case EXTATTR_NAMESPACE_USER:
3108 		return (VOP_ACCESS(vp, access, cred, p));
3109 
3110 	default:
3111 		return (EPERM);
3112 	}
3113 }
3114 
3115 #ifdef DDB
3116 const char buf_flagbits[] = BUF_FLAGBITS;
3117 
3118 void
3119 vfs_buf_print(bp, full, pr)
3120 	struct buf *bp;
3121 	int full;
3122 	void (*pr)(const char *, ...);
3123 {
3124 	char buf[1024];
3125 
3126 	(*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
3127 		  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
3128 
3129 	bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
3130 	(*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
3131 
3132 	(*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3133 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
3134 	(*pr)("  data %p saveaddr %p dep %p\n",
3135 		  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
3136 	(*pr)("  iodone %p\n", bp->b_iodone);
3137 }
3138 
3139 
3140 void
3141 vfs_vnode_print(vp, full, pr)
3142 	struct vnode *vp;
3143 	int full;
3144 	void (*pr)(const char *, ...);
3145 {
3146 	char buf[256];
3147 
3148 	uvm_object_printit(&vp->v_uobj, full, pr);
3149 	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
3150 	(*pr)("\nVNODE flags %s\n", buf);
3151 	(*pr)("mp %p numoutput %d size 0x%llx\n",
3152 	      vp->v_mount, vp->v_numoutput, vp->v_size);
3153 
3154 	(*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
3155 	      vp->v_data, vp->v_usecount, vp->v_writecount,
3156 	      vp->v_holdcnt, vp->v_numoutput);
3157 
3158 	(*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3159 	      ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3160 	      ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
3161 	      vp->v_mount, vp->v_mountedhere);
3162 
3163 	if (full) {
3164 		struct buf *bp;
3165 
3166 		(*pr)("clean bufs:\n");
3167 		LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3168 			(*pr)(" bp %p\n", bp);
3169 			vfs_buf_print(bp, full, pr);
3170 		}
3171 
3172 		(*pr)("dirty bufs:\n");
3173 		LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3174 			(*pr)(" bp %p\n", bp);
3175 			vfs_buf_print(bp, full, pr);
3176 		}
3177 	}
3178 }
3179 
3180 void
3181 vfs_mount_print(mp, full, pr)
3182 	struct mount *mp;
3183 	int full;
3184 	void (*pr)(const char *, ...);
3185 {
3186 	char sbuf[256];
3187 
3188 	(*pr)("vnodecovered = %p syncer = %p data = %p\n",
3189 			mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3190 
3191 	(*pr)("fs_bshift %d dev_bshift = %d\n",
3192 			mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3193 
3194 	bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
3195 	(*pr)("flag = %s\n", sbuf);
3196 
3197 	bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
3198 	(*pr)("iflag = %s\n", sbuf);
3199 
3200 	/* XXX use lockmgr_printinfo */
3201 	if (mp->mnt_lock.lk_sharecount)
3202 		(*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
3203 		    mp->mnt_lock.lk_sharecount);
3204 	else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
3205 		(*pr)(" lock type %s: EXCL (count %d) by ",
3206 		    mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
3207 		if (mp->mnt_lock.lk_flags & LK_SPIN)
3208 			(*pr)("processor %lu", mp->mnt_lock.lk_cpu);
3209 		else
3210 			(*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
3211 			    mp->mnt_lock.lk_locklwp);
3212 	} else
3213 		(*pr)(" not locked");
3214 	if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
3215 		(*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
3216 
3217 	(*pr)("\n");
3218 
3219 	if (mp->mnt_unmounter) {
3220 		(*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
3221 	}
3222 	(*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
3223 		mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
3224 
3225 	(*pr)("statvfs cache:\n");
3226 	(*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3227 	(*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3228 	(*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3229 
3230 	(*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks);
3231 	(*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree);
3232 	(*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail);
3233 	(*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd);
3234 
3235 	(*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files);
3236 	(*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree);
3237 	(*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail);
3238 	(*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd);
3239 
3240 	(*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3241 			mp->mnt_stat.f_fsidx.__fsid_val[0],
3242 			mp->mnt_stat.f_fsidx.__fsid_val[1]);
3243 
3244 	(*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3245 	(*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3246 
3247 	bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
3248 	    sizeof(sbuf));
3249 	(*pr)("\tflag = %s\n",sbuf);
3250 	(*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3251 	(*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3252 	(*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads);
3253 	(*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3254 	(*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3255 	(*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3256 	(*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3257 
3258 	{
3259 		int cnt = 0;
3260 		struct vnode *vp;
3261 		(*pr)("locked vnodes =");
3262 		/* XXX would take mountlist lock, except ddb may not have context */
3263 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3264 			if (VOP_ISLOCKED(vp)) {
3265 				if ((++cnt % 6) == 0) {
3266 					(*pr)(" %p,\n\t", vp);
3267 				} else {
3268 					(*pr)(" %p,", vp);
3269 				}
3270 			}
3271 		}
3272 		(*pr)("\n");
3273 	}
3274 
3275 	if (full) {
3276 		int cnt = 0;
3277 		struct vnode *vp;
3278 		(*pr)("all vnodes =");
3279 		/* XXX would take mountlist lock, except ddb may not have context */
3280 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3281 			if (!LIST_NEXT(vp, v_mntvnodes)) {
3282 				(*pr)(" %p", vp);
3283 			} else if ((++cnt % 6) == 0) {
3284 				(*pr)(" %p,\n\t", vp);
3285 			} else {
3286 				(*pr)(" %p,", vp);
3287 			}
3288 		}
3289 		(*pr)("\n", vp);
3290 	}
3291 }
3292 
3293 #endif
3294